├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── configs
    ├── unidet3d_1xb8_scannet.py
    └── unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py
├── data
    ├── 3rscan
    │   ├── README.md
    │   ├── meta_data
    │   │   ├── 3RScan.json
    │   │   ├── 3RScan.v2_Semantic-Classes-Mapping.csv
    │   │   ├── bbox_size.pkl
    │   │   ├── camera_pose.pkl
    │   │   ├── nyu40_labels.csv
    │   │   ├── reference_axis_align_matrix.pkl
    │   │   ├── scans.txt
    │   │   ├── split
    │   │   │   ├── 3rscan_test.txt
    │   │   │   ├── 3rscan_train.txt
    │   │   │   ├── 3rscan_val.txt
    │   │   │   ├── test.txt
    │   │   │   ├── train.txt
    │   │   │   └── val.txt
    │   │   ├── test.txt
    │   │   ├── train.txt
    │   │   └── val.txt
    │   ├── prepare_bins_pkls.py
    │   ├── preprocess_raw_data.py
    │   └── utils.py
    ├── arkitscenes
    │   ├── README.md
    │   ├── arkitscenes_data_utils.py
    │   ├── data_prepare_offline.py
    │   ├── misc.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── box_utils.py
    │   │   ├── pc_utils.py
    │   │   ├── rotation.py
    │   │   ├── taxonomy.py
    │   │   └── tenFpsDataLoader.py
    ├── multiscan
    │   ├── README.md
    │   └── prepare_bins_pkls.py
    ├── s3dis
    │   ├── README.md
    │   └── remap_superpoints.py
    ├── scannet
    │   ├── README.md
    │   ├── batch_load_scannet_data.py
    │   ├── load_scannet_data.py
    │   ├── meta_data
    │   │   ├── scannet_means.npz
    │   │   ├── scannet_train.txt
    │   │   ├── scannetv2-labels.combined.tsv
    │   │   ├── scannetv2_test.txt
    │   │   ├── scannetv2_train.txt
    │   │   └── scannetv2_val.txt
    │   └── scannet_utils.py
    └── scannetpp
    │   ├── README.md
    │   ├── prepare_bins_pkls.py
    │   └── preprocess_raw_data.py
├── tools
    ├── create_data.py
    ├── indoor_converter.py
    ├── scannet_data_utils.py
    ├── test.py
    ├── train.py
    └── update_infos_to_v2.py
└── unidet3d
    ├── __init__.py
    ├── arkitscenes_dataset.py
    ├── axis_aligned_iou_loss.py
    ├── concat_dataset.py
    ├── criterion.py
    ├── data_preprocessor.py
    ├── encoder.py
    ├── formatting.py
    ├── image_vis.py
    ├── indoor_eval.py
    ├── indoor_metric.py
    ├── loading.py
    ├── multiscan_dataset.py
    ├── rotated_iou_loss.py
    ├── rscan_dataset.py
    ├── s3dis_dataset.py
    ├── scannet_dataset.py
    ├── scannetpp_dataset.py
    ├── show_results.py
    ├── spconv_unet.py
    ├── structures.py
    ├── transforms_3d.py
    └── unidet3d.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | work_dirs
3 | .vscode
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 | *.ipynb
8 | *ipynb_checkpoints


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
  2 | 
  3 | # Install base apt packages
  4 | RUN apt-get update \
  5 |     && DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libopenblas-dev
  6 | 
  7 | # Install MinkowskiEngine
  8 | RUN TORCH_CUDA_ARCH_LIST="6.1 7.0 8.6 9.0" \
  9 |     pip install git+https://github.com/daizhirui/MinkowskiEngine.git@ce930eeb403a8e3f99693662ec5ce329a0ab3528 -v --no-deps \
 10 |     --global-option="--blas=openblas" \
 11 |     --global-option="--force_cuda"
 12 | 
 13 | # Install OpenMMLab projects
 14 | RUN pip install --no-deps \
 15 |     mmengine==0.9.0 \
 16 |     mmdet==3.3.0 \
 17 |     mmsegmentation==1.2.0 \
 18 |     mmdet3d==1.4.0 \
 19 |     mmpretrain==1.2.0
 20 | 
 21 | # Install mmcv
 22 | RUN git clone https://github.com/open-mmlab/mmcv.git \
 23 |     && cd mmcv \
 24 |     && git reset --hard 780ffed9f3736fedadf18b51266ecbf521e64cf6 \
 25 |     && sed -i "s/'-std=c++14'] if cuda_args else/'-std=c++14', '-arch=sm_90'] if cuda_args else/g" setup.py \
 26 |     && TORCH_CUDA_ARCH_LIST="6.1 7.0 8.6 9.0" \
 27 |     && pip install -v -e . --no-deps \
 28 |     && cd ..
 29 | 
 30 | # Install torch-scatter 
 31 | RUN pip install torch-scatter==2.1.2 -f https://data.pyg.org/whl/torch-2.1.0+cu121.html --no-deps
 32 | 
 33 | # Install ScanNet superpoint segmentator
 34 | RUN git clone https://github.com/Karbo123/segmentator.git \
 35 |     && cd segmentator/csrc \
 36 |     && git reset --hard 76efe46d03dd27afa78df972b17d07f2c6cfb696 \
 37 |     && sed -i "s/set(CMAKE_CXX_STANDARD 14)/set(CMAKE_CXX_STANDARD 17)/g" CMakeLists.txt \
 38 |     && mkdir build \
 39 |     && cd build \
 40 |     && cmake .. \
 41 |         -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` \
 42 |         -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
 43 |         -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \
 44 |         -DCMAKE_INSTALL_PREFIX=`python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())'` \
 45 |     && make \
 46 |     && make install \
 47 |     && cd ../../..
 48 | 
 49 | # Install remaining python packages
 50 | RUN pip install --no-deps \
 51 |     spconv-cu120==2.3.6 \
 52 |     addict==2.4.0 \
 53 |     yapf==0.33.0 \
 54 |     termcolor==2.3.0 \
 55 |     packaging==23.1 \
 56 |     numpy==1.24.1 \
 57 |     rich==13.3.5 \
 58 |     opencv-python==4.7.0.72 \
 59 |     pycocotools==2.0.6 \
 60 |     Shapely==1.8.5 \
 61 |     scipy==1.10.1 \
 62 |     terminaltables==3.1.10 \
 63 |     numba==0.57.0 \
 64 |     llvmlite==0.40.0 \
 65 |     pccm==0.4.7 \
 66 |     ccimport==0.4.2 \
 67 |     pybind11==2.10.4 \
 68 |     ninja==1.11.1 \
 69 |     lark==1.1.5 \
 70 |     cumm-cu120==0.5.1 \
 71 |     pyquaternion==0.9.9 \
 72 |     lyft-dataset-sdk==0.0.8 \
 73 |     pandas==2.0.1 \
 74 |     python-dateutil==2.8.2 \
 75 |     matplotlib==3.5.2 \
 76 |     pyparsing==3.0.9 \
 77 |     cycler==0.11.0 \
 78 |     kiwisolver==1.4.4 \
 79 |     scikit-learn==1.2.2 \
 80 |     joblib==1.2.0 \
 81 |     threadpoolctl==3.1.0 \
 82 |     cachetools==5.3.0 \
 83 |     nuscenes-devkit==1.1.10 \
 84 |     trimesh==3.21.6 \
 85 |     open3d==0.17.0 \
 86 |     plotly==5.18.0 \
 87 |     dash==2.14.2 \
 88 |     plyfile==1.0.2 \
 89 |     flask==3.0.0 \
 90 |     werkzeug==3.0.1 \
 91 |     click==8.1.7 \
 92 |     blinker==1.7.0 \
 93 |     itsdangerous==2.1.2 \
 94 |     importlib_metadata==2.1.2 \
 95 |     zipp==3.17.0 \
 96 |     natsort==8.4.0 \
 97 |     timm==0.9.16 \
 98 |     imageio==2.34.0 \
 99 |     portalocker==2.8.2 \
100 |     ftfy==6.2.0 \
101 |     regex==2024.4.16


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## UniDet3D: Multi-dataset Indoor 3D Object Detection
  2 | 
  3 | **News**:
  4 |  * :fire: December, 2024. UniDet3D is now accepted at AAAI 2025.
  5 |  * :fire: September, 2024. UniDet3D is state-of-the-art in 6 indoor benchmarks: <br>
  6 |  ScanNet [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-scannetv2)](https://paperswithcode.com/sota/3d-object-detection-on-scannetv2?p=unidet3d-multi-dataset-indoor-3d-object) <br>
  7 |  ARKitScenes [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-arkitscenes)](https://paperswithcode.com/sota/3d-object-detection-on-arkitscenes?p=unidet3d-multi-dataset-indoor-3d-object) <br>
  8 |  S3DIS [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-s3dis)](https://paperswithcode.com/sota/3d-object-detection-on-s3dis?p=unidet3d-multi-dataset-indoor-3d-object) <br>
  9 |  MultiScan [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-multiscan)](https://paperswithcode.com/sota/3d-object-detection-on-multiscan?p=unidet3d-multi-dataset-indoor-3d-object) <br>
 10 |  3RScan [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-3rscan)](https://paperswithcode.com/sota/3d-object-detection-on-3rscan?p=unidet3d-multi-dataset-indoor-3d-object) <br>
 11 |  ScanNet++ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-scannet-1)](https://paperswithcode.com/sota/3d-object-detection-on-scannet-1?p=unidet3d-multi-dataset-indoor-3d-object).  
 12 | 
 13 | This repository contains an implementation of UniDet3D, a multi-dataset indoor 3D object detection method introduced in our paper:
 14 | 
 15 | > **UniDet3D: Multi-dataset Indoor 3D Object Detection**<br>
 16 | > [Maksim Kolodiazhnyi](https://github.com/col14m),
 17 | > [Anna Vorontsova](https://github.com/highrut),
 18 | > [Matvey Skripkin](https://scholar.google.com/citations?user=hAlwb4wAAAAJ),
 19 | > [Danila Rukhovich](https://github.com/filaPro),
 20 | > [Anton Konushin](https://scholar.google.com/citations?user=ZT_k-wMAAAAJ)
 21 | > <br>
 22 | > Artificial Intelligence Research Institute <img src="https://github.com/user-attachments/assets/c6304076-153a-4e3b-b58c-db522f7f78fe" width="50"/> <br>
 23 | > https://arxiv.org/abs/2409.04234
 24 | 
 25 | ### Installation
 26 | 
 27 | For convenience, we provide a [Dockerfile](Dockerfile).
 28 | This implementation is based on [mmdetection3d](https://github.com/open-mmlab/mmdetection3d) framework `v1.1.0`. If not using Docker, please follow [getting_started.md](https://github.com/open-mmlab/mmdetection3d/blob/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/docs/en/get_started.md) for the installation instructions.
 29 | 
 30 | 
 31 | ### Getting Started
 32 | 
 33 | Please see [test_train.md](https://github.com/open-mmlab/mmdetection3d/blob/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/docs/en/user_guides/train_test.md) for some basic usage examples.
 34 | 
 35 | #### Data Preprocessing
 36 | 
 37 | UniDet3D is trained and tested using 6 datasets: [ScanNet](data/scannet), [ARKitScenes](data/arkitscenes), [S3DIS](data/s3dis), [MultiScan](data/multiscan), [3RScan](data/3rscan), and [ScanNet++](data/scannetpp).
 38 | Preprocessed data can be found at our [Hugging Face](https://huggingface.co/datasets/maksimko123/UniDet3D). Download each archive, unpack, and move into the corresponding directory in [data](data). Please comply with the license agreement before downloading the data.
 39 | 
 40 | Alternatively, you can preprocess the data by youself. 
 41 | Training data for 3D object detection methods that do not requires superpoints, e.g. [TR3D](https://github.com/SamsungLabs/tr3d) or [FCAF3D](https://github.com/SamsungLabs/fcaf3d), can be prepared according to the [instructions](data).
 42 | 
 43 | Superpoints for ScanNet and MultiScan are provided as a part of the original annotation. For the rest datasets, you can either download pre-computed superpoints at our [Hugging Face](https://huggingface.co/datasets/maksimko123/UniDet3D), or compute them using [superpoint_transformer](https://github.com/drprojects/superpoint_transformer).
 44 | 
 45 | #### Training
 46 | 
 47 | Before training, please download the backbone [checkpoint](https://github.com/filapro/oneformer3d/releases/download/v1.0/oneformer3d_1xb4_scannet.pth) and save it under `work_dirs/tmp`.
 48 | 
 49 | To train UniDet3D on 6 datasets jointly, simply run the [training](tools/train.py) script:
 50 | 
 51 | ```bash
 52 | python tools/train.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py
 53 | ```
 54 | 
 55 | UniDet3D can also be trained on individual datasets, e.g., we provide a [config](configs/unidet3d_1xb8_scannet.py) for training using ScanNet solely.
 56 | 
 57 | 
 58 | #### Testing
 59 | 
 60 | To test a trained model, you can run the [testing](tools/test.py) script:
 61 | 
 62 | ```bash
 63 | python tools/test.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py \
 64 |     work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes/epoch_1024.pth
 65 | ```
 66 | 
 67 | UniDet3D can also be tested on individual datasets. To this end, simply remove the unwanted datasets from `val_dataloader.dataset.datasets` in the config file.
 68 | 
 69 | #### Visualization
 70 | 
 71 | To visualize ground truth and predicted boxes, run the [testing](tools/test.py) script with additional arguments:
 72 | 
 73 | ```bash
 74 | python tools/test.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py \
 75 |     work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes/latest.pth --show \
 76 |     --show-dir work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes
 77 | ```
 78 | You can also set `score_thr` in configs to `0.3` for better visualizations.
 79 | 
 80 | ### Trained Model
 81 | 
 82 | Please refer to the UniDet3D [checkpoint](https://github.com/filapro/unidet3d/releases/download/v1.0/unidet3d.pth) and [log file](https://github.com/filapro/unidet3d/releases/download/v1.0/log.txt). The corresponding metrics are given below (they might slightly deviate from the values reported in the paper due to the randomized training/testing procedure).
 83 | 
 84 | | Dataset     | mAP<sub>25</sub>  | mAP<sub>50</sub>  |
 85 | |:-----------:|:-----------------:|:-----------------:|
 86 | | ScanNet     | 77.0              | 65.9              |
 87 | | ARKitScenes | 60.1              | 47.2              |
 88 | | S3DIS       | 76.7              | 65.3              |
 89 | | MultiScan   | 62.6              | 52.3              |
 90 | | 3RScan      | 63.6              | 44.9              |
 91 | | ScanNet++   | 24.0              | 16.8              |
 92 | 
 93 | ### Predictions Example
 94 | 
 95 | <p align="center">
 96 |   <img src="https://github.com/user-attachments/assets/bb535823-cc9b-4482-a1b6-d10cf74c9389" alt="UniDet3D predictions"/>
 97 | </p>
 98 | 
 99 | ### Citation
100 | 
101 | If you find this work useful for your research, please cite our paper:
102 | 
103 | ```
104 | @inproceedings{kolodiazhnyi2025unidet3d,
105 |   title={Unidet3d: Multi-dataset indoor 3d object detection},
106 |   author={Kolodiazhnyi, Maksim and Vorontsova, Anna and Skripkin, Matvey and Rukhovich, Danila and Konushin, Anton},
107 |   booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
108 |   volume={39},
109 |   number={4},
110 |   pages={4365--4373},
111 |   year={2025}
112 | }
113 | ```
114 | 


--------------------------------------------------------------------------------
/data/3rscan/README.md:
--------------------------------------------------------------------------------
 1 | ## Prepare 3RScan Data for Indoor 3D Detection
 2 | 
 3 | 1. Download data from the official [3RScan](https://waldjohannau.github.io/RIO/).
 4 | 
 5 | 2. Preprocess raw data by running:
 6 | 
 7 | ```bash
 8 | python preprocess_raw_data.py --dataset_root path_to_dataset --output_root path_to_save_preprocessed_raw_data
 9 | ```
10 | 
11 | 3. Generate bins and pkls data by running:
12 | 
13 | ```bash
14 | python prepare_bins_pkls.py --path_to_data path_to_preprocessed_raw_data --path_to_save_bins path_to_save_bins
15 | ```
16 | 
17 | Overall you achieve the following file structure in `bins` directory:
18 | ```
19 | bins
20 | ├── bboxs
21 | │   ├── xxxxx_xx.npy
22 | ├── instance_mask
23 | │   ├── xxxxx_xx.bin
24 | ├── points
25 | │   ├── xxxxx_xx.bin
26 | ├── semantic_mask
27 | │   ├── xxxxx_xx.bin
28 | ├── superpoints
29 | │   ├── xxxxx_xx.bin
30 | ├── 3rscan_infos_train.pkl
31 | ├── 3rscan_infos_val.pkl
32 | ├── 3rscan_infos_test.pkl
33 | ```
34 | 


--------------------------------------------------------------------------------
/data/3rscan/meta_data/bbox_size.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/bbox_size.pkl


--------------------------------------------------------------------------------
/data/3rscan/meta_data/camera_pose.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/camera_pose.pkl


--------------------------------------------------------------------------------
/data/3rscan/meta_data/nyu40_labels.csv:
--------------------------------------------------------------------------------
 1 | nyu40id,nyu40class,mappedId,mappedIdConsecutive,weight
 2 | 1,wall,(ignore),19,0.0
 3 | 2,floor,(ignore),19,0.0
 4 | 3,cabinet,3,1,3.9644974086960434
 5 | 4,bed,4,2,5.459494152836571
 6 | 5,chair,5,3,2.241522691584157
 7 | 6,sofa,6,4,4.820655512680854
 8 | 7,table,7,5,3.565918577548873
 9 | 8,door,8,6,3.538498341919445
10 | 9,window,9,7,4.636521236560596
11 | 10,bookshelf,10,8,5.445050937449535
12 | 11,picture,11,9,5.079250281008131
13 | 12,counter,12,10,6.2030429647735845
14 | 13,blinds,(ignore),19,0.0
15 | 14,desk,14,11,4.622662494840168
16 | 15,shelves,(ignore),19,0.0
17 | 16,curtain,16,12,5.956294301248057
18 | 17,dresser,(ignore),19,0.0
19 | 18,pillow,(ignore),19,0.0
20 | 19,mirror,(ignore),19,0.0
21 | 20,floor_mat,(ignore),19,0.0
22 | 21,clothes,(ignore),19,0.0
23 | 22,ceiling,(ignore),19,0.0
24 | 23,books,(ignore),19,0.0
25 | 24,refridgerator,24,13,5.459141107819665
26 | 25,television,(ignore),19,0.0
27 | 26,paper,(ignore),19,0.0
28 | 27,towel,(ignore),19,0.0
29 | 28,shower_curtain,28,14,6.724871661883906
30 | 29,box,(ignore),19,0.0
31 | 30,whiteboard,(ignore),19,0.0
32 | 31,person,(ignore),19,0.0
33 | 32,night_stand,(ignore),19,0.0
34 | 33,toilet,33,15,5.832442848923174
35 | 34,sink,34,16,5.064773947290611
36 | 35,lamp,(ignore),19,0.0
37 | 36,bathtub,36,17,6.738988357113375
38 | 37,bag,(ignore),19,0.0
39 | 38,otherstructure,(ignore),19,0.0
40 | 39,otherfurniture,39,18,3.375217918833916
41 | 40,otherprop,(ignore),19,0.0


--------------------------------------------------------------------------------
/data/3rscan/meta_data/reference_axis_align_matrix.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/reference_axis_align_matrix.pkl


--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/3rscan_test.txt:
--------------------------------------------------------------------------------
 1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660
 2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9
 3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46
 4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d
 5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba
 6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34
 7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51
 8 | 10b17942-3938-2467-8933-5d40ada6d445
 9 | 13af338e-7397-2e54-84fc-fa003f91ac0c
10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3
11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91
12 | 20c993bb-698f-29c5-8569-0db38352364f
13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47
14 | 352e9c30-69fb-27a7-8b19-c703f0e190da
15 | 38770cb0-86d7-27b8-8466-1782505891fd
16 | 422885b9-192d-25fc-84f4-6f7c1afd29af
17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa
18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878
19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4
20 | 5630cfdc-12bf-2860-87b7-c7eab95718be
21 | 634b2183-f5d0-2fb7-87ef-c536720918dc
22 | 634d11cd-6833-255d-8c5c-2b0cf661494a
23 | 6a360555-fa53-2915-9721-bff473f98125
24 | 6bde608b-9162-246f-8d16-901b429b2563
25 | 6bde60d2-9162-246f-8dbf-dbd911924def
26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4
27 | 75c259a1-9ca2-2844-973c-adc28f935d5d
28 | 7747a514-9431-24e8-8505-5979f3f20906
29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e
30 | 8eabc463-5af7-2f32-8537-22977a89efdd
31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751
32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96
33 | ad408c83-84db-2095-8aa4-924f966af2dc
34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91
35 | b1f2330c-d255-2761-965e-d203c6e253c3
36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4
37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca
38 | bf9a3da4-45a5-2e80-8082-be634b241693
39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe
40 | c7895f2f-339c-2d13-8388-28e95af958de
41 | c7895f80-339c-2d13-8253-64b490296e49
42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b
43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f
44 | ddc7379d-765b-241a-9f0b-50b72d6cd829
45 | ebc42041-82a4-2113-8583-cc8c1be818b3
46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74


--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/3rscan_val.txt:
--------------------------------------------------------------------------------
  1 | 095821f7-e2c2-2de1-9568-b9ce59920e29
  2 | 2e369567-e133-204c-909a-c5da44bb58df
  3 | 095821f9-e2c2-2de1-9707-8f735cd1c148
  4 | 095821fb-e2c2-2de1-94df-20f2cb423bcb
  5 | 0988ea72-eb32-2e61-8344-99e2283c2728
  6 | 9766cbe5-6321-2e2f-8040-4e5b7a5d8ba1
  7 | 9766cbf5-6321-2e2f-8131-78c4e204635d
  8 | 9766cbf7-6321-2e2f-81e1-2b46533c64dd
  9 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f
 10 | 0cac7532-8d6f-2d13-8cea-1e70d5ae4856
 11 | 0cac7534-8d6f-2d13-8de7-8a915ed90050
 12 | 0cac7584-8d6f-2d13-8df8-c05e4307b418
 13 | 0cac7582-8d6f-2d13-8d4b-e4041cb166c4
 14 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc
 15 | 0cac75de-8d6f-2d13-8e1a-b574569c3885
 16 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721
 17 | 7272e17c-a01b-20f6-8b2f-e659331ae41a
 18 | 10b17940-3938-2467-8a7a-958300ba83d3
 19 | c2d9933f-1947-2fbf-807f-c44bc1aed269
 20 | c2d9933d-1947-2fbf-81fa-c8a7f9625eea
 21 | 5630cfe7-12bf-2860-8710-52729dc36cc6
 22 | c2d99349-1947-2fbf-837e-a0bd5e027c52
 23 | c2d99347-1947-2fbf-834b-f95790c125dd
 24 | c2d99345-1947-2fbf-818d-90ea82acef29
 25 | c2d99343-1947-2fbf-808f-92dbb7d47aa5
 26 | c2d99341-1947-2fbf-817a-5aa9b44f724f
 27 | 6e67e55f-1209-2cd0-8194-8c6278434c80
 28 | 137a8158-1db5-2cc0-8003-31c12610471e
 29 | 5630cfd3-12bf-2860-8749-9dacb499fb14
 30 | c92fb5b7-f771-2064-87a9-31c819832405
 31 | f2c76ff1-2239-29d0-87f5-8a0346584384
 32 | f2c76fed-2239-29d0-8598-9ed42cec9dc5
 33 | f2c76feb-2239-29d0-8418-72b6051fc144
 34 | f2c76fe9-2239-29d0-87ec-f2c7ced812c1
 35 | f2c76fe7-2239-29d0-84f5-144c30fd7451
 36 | f2c76fe5-2239-29d0-8593-1a2555125595
 37 | d04eb40f-1d53-27ea-8a41-47892bde7017
 38 | 6e67e550-1209-2cd0-8294-7cc2564cf82c
 39 | 10b1792a-3938-2467-8b4e-a93da27a0985
 40 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a
 41 | 1d233fe6-e280-2b1a-8caf-eb0d13a59ad6
 42 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2
 43 | 1d234026-e280-2b1a-8fe1-713e28269f4d
 44 | 1d234024-e280-2b1a-8c28-2743fefed020
 45 | 20c993b7-698f-29c5-847d-c8cb8a685f5a
 46 | 20c993b3-698f-29c5-859c-dca8ddecf220
 47 | 280d8ebb-6cc6-2788-9153-98959a2da801
 48 | 4731976c-f9f7-2a1a-95cc-31c4d1751d0b
 49 | 1d2f850c-d757-207c-8fba-60b90a7d4691
 50 | ea318260-0a4c-2749-9389-4c16c782c4b1
 51 | 10b17957-3938-2467-88a5-9e9254930dad
 52 | 321c8680-a5a8-2a84-85c2-816a26d59516
 53 | 321c867e-a5a8-2a84-851a-818df115be05
 54 | 38770ca1-86d7-27b8-8619-ab66f67d9adf
 55 | 38770ca3-86d7-27b8-85a7-7d840ffdec6a
 56 | 38770ca5-86d7-27b8-871c-57fdbfe87905
 57 | 4138582f-a238-2435-8332-6902542c2823
 58 | 5341b7a5-8a66-2cdd-8751-70b98263cb8d
 59 | 8eabc445-5af7-2f32-85ae-90deb8eb1b0b
 60 | 422885b3-192d-25fc-84c9-9b80eea1752d
 61 | 422885c5-192d-25fc-85e6-12a3d65c8e7b
 62 | 4238490c-60a7-271e-9f38-3c651e3b3912
 63 | 4238490a-60a7-271e-9c04-3846221dc354
 64 | 42384908-60a7-271e-9c46-01e562c8974c
 65 | 10b17963-3938-2467-8a48-0d4af350ce92
 66 | 43b8cae1-6678-2e38-9865-c19c07c25015
 67 | 43b8cadf-6678-2e38-9920-064144c99406
 68 | 43b8cae3-6678-2e38-9b67-5905de29f6d7
 69 | 4a9a43d2-7736-2874-874d-d0fad0570e19
 70 | 4a9a43d4-7736-2874-87a6-0c3089281af8
 71 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f
 72 | 4d3d82ae-8cf4-2e04-80de-20f96c814d9c
 73 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4
 74 | 4fbad32f-465b-2a5d-8408-146ab1d72808
 75 | 4fbad331-465b-2a5d-8488-852fcda9513c
 76 | 9af05c68-5794-2e19-8c5a-979f448da545
 77 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75
 78 | 7272e16c-a01b-20f6-8961-a0927b4a7629
 79 | 5341b7e3-8a66-2cdd-8709-66a2159f0017
 80 | 4138585b-a238-2435-810b-9728fc989b2f
 81 | 5630cfcf-12bf-2860-8784-83d28a611a83
 82 | 5630cfd1-12bf-2860-86b2-e7a96bc32c19
 83 | bf9a3d9e-45a5-2e80-83c6-4e427c5586a2
 84 | 10b1792e-3938-2467-8bb3-172148ae5a67
 85 | 10b17944-3938-2467-8bac-5552375e4467
 86 | 6bde6081-9162-246f-8c4e-ffaf709d17b1
 87 | 6bde607b-9162-246f-8e65-76e3ef265504
 88 | 6bde607d-9162-246f-8f84-98cf7ac2374c
 89 | 6bde6083-9162-246f-8c9c-e170212059b2
 90 | 6bde60cb-9162-246f-8cf5-d04f7426e56f
 91 | 6bde60cd-9162-246f-8fad-fca80b4d6ad8
 92 | 6bde60cf-9162-246f-8f98-6355d75494c2
 93 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662
 94 | 68bae75f-3567-2f7c-829d-7422117729f3
 95 | 742e8f17-be0a-294e-9dd3-52492d308e2b
 96 | 742e8f19-be0a-294e-9eb6-50dc474b110e
 97 | 75c25975-9ca2-2844-9769-84677f46d4cf
 98 | 8eabc455-5af7-2f32-8606-a0bdbe6c537d
 99 | 7747a50c-9431-24e8-877d-e60c3a341cc2
100 | 7747a4ec-9431-24e8-848f-897279a1e9fe
101 | 7747a510-9431-24e8-8705-907ee78be2a2
102 | 8e0f1c28-9e28-2339-8584-ff06ff93c341
103 | 20c993bd-698f-29c5-8494-5556ba7d3fe9
104 | 20c993bf-698f-29c5-8549-a69fd169c1e1
105 | 8eabc45f-5af7-2f32-8528-640861d2a135
106 | 75c25989-9ca2-2844-97b4-31b81f7554b8
107 | 41385849-a238-2435-81d0-ceb0eba4541a
108 | 5341b7b3-8a66-2cdd-856d-9d70e194568b
109 | 8eabc461-5af7-2f32-8663-ce5a10fd97b3
110 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7
111 | a0905fdb-66f7-2272-9fc5-7c0008d5e87b
112 | a0905fdd-66f7-2272-9cdb-89360888ea67
113 | ab835fae-54c6-29a1-995e-b06cfc555786
114 | ab835faa-54c6-29a1-9b55-1a5217fcba19
115 | d7dc987e-a34a-2794-85c8-f75389b27532
116 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5
117 | 0cac75e8-8d6f-2d13-8fc4-acdbf00437c8
118 | 0cac75ea-8d6f-2d13-8e50-c5faf0159e32
119 | c7895f63-339c-2d13-81a3-0b07b1eb23b4
120 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e
121 | 0cac761b-8d6f-2d13-8f16-23a7d73c54fe
122 | 0cac7619-8d6f-2d13-8f36-ac562ec9a4de
123 | b1f23308-d255-2761-94da-981d962c6bf8
124 | 77361fd4-d054-2a22-88c4-b5b404f904ca
125 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64
126 | ba6fdaac-a4c1-2dca-8380-f16765679fd7
127 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8
128 | bcb0fe04-4f39-2c70-9f03-d0eec592de24
129 | bf9a3da2-45a5-2e80-8219-1f0a216399fe
130 | cdcaf5bd-ddd8-2ed6-97c3-489e105e4dde
131 | 10b1794e-3938-2467-89a7-ebc89e84cf88
132 | bf9a3de9-45a5-2e80-8022-277108d67404
133 | bf9a3ddf-45a5-2e80-8007-8e9e7f323e52
134 | bf9a3ddd-45a5-2e80-80bc-647365c7ca08
135 | bf9a3dd9-45a5-2e80-817c-f918e193231b
136 | c7895f27-339c-2d13-836b-c12dca280261
137 | c7895f2b-339c-2d13-8248-b0507e050314
138 | c7895f29-339c-2d13-83e9-90dbe61fa8be
139 | c7895f7c-339c-2d13-819f-3bb0b26c91f6
140 | c7895f7a-339c-2d13-82ac-09ef1c9001ba
141 | c7895f78-339c-2d13-82bb-cc990cbbc90f
142 | c92fb5b5-f771-2064-8570-dbe16cb33764
143 | 5630cfde-12bf-2860-8563-d68bdd98fab0
144 | 10b1793e-3938-2467-8b92-f56541e7ef9e
145 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42
146 | d7d40d4c-7a5d-2b36-95c1-5f6c9147caf0
147 | d7d40d50-7a5d-2b36-9446-7d636174329f
148 | ddc73797-765b-241a-9e2c-097c5989baf6
149 | 2451c048-fae8-24f6-9043-f1604dbada2c
150 | ddc7379b-765b-241a-9f45-c37e41608726
151 | ddc73799-765b-241a-9c30-f75dcb7627d4
152 | 0cac75b7-8d6f-2d13-8cb2-0b4e06913140
153 | c7895f07-339c-2d13-8176-7418b6e8d7ce
154 | e61b0e04-bada-2f31-82d6-72831a602ba7
155 | e61b0e02-bada-2f31-82d0-80fc5c70bd6f
156 | fcf66d7b-622d-291c-86b8-7db96aebcee3
157 | 787ed58c-9d98-2c97-83b9-b48a609ace15


--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/test.txt:
--------------------------------------------------------------------------------
 1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660
 2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9
 3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46
 4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d
 5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba
 6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34
 7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51
 8 | 10b17942-3938-2467-8933-5d40ada6d445
 9 | 13af338e-7397-2e54-84fc-fa003f91ac0c
10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3
11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91
12 | 20c993bb-698f-29c5-8569-0db38352364f
13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47
14 | 352e9c30-69fb-27a7-8b19-c703f0e190da
15 | 38770cb0-86d7-27b8-8466-1782505891fd
16 | 422885b9-192d-25fc-84f4-6f7c1afd29af
17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa
18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878
19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4
20 | 5630cfdc-12bf-2860-87b7-c7eab95718be
21 | 634b2183-f5d0-2fb7-87ef-c536720918dc
22 | 634d11cd-6833-255d-8c5c-2b0cf661494a
23 | 6a360555-fa53-2915-9721-bff473f98125
24 | 6bde608b-9162-246f-8d16-901b429b2563
25 | 6bde60d2-9162-246f-8dbf-dbd911924def
26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4
27 | 75c259a1-9ca2-2844-973c-adc28f935d5d
28 | 7747a514-9431-24e8-8505-5979f3f20906
29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e
30 | 8eabc463-5af7-2f32-8537-22977a89efdd
31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751
32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96
33 | ad408c83-84db-2095-8aa4-924f966af2dc
34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91
35 | b1f2330c-d255-2761-965e-d203c6e253c3
36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4
37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca
38 | bf9a3da4-45a5-2e80-8082-be634b241693
39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe
40 | c7895f2f-339c-2d13-8388-28e95af958de
41 | c7895f80-339c-2d13-8253-64b490296e49
42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b
43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f
44 | ddc7379d-765b-241a-9f0b-50b72d6cd829
45 | ebc42041-82a4-2113-8583-cc8c1be818b3
46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74


--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/val.txt:
--------------------------------------------------------------------------------
 1 | 0988ea72-eb32-2e61-8344-99e2283c2728
 2 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f
 3 | 0cac7584-8d6f-2d13-8df8-c05e4307b418
 4 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc
 5 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721
 6 | 10b17940-3938-2467-8a7a-958300ba83d3
 7 | 137a8158-1db5-2cc0-8003-31c12610471e
 8 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a
 9 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2
10 | 20c993b7-698f-29c5-847d-c8cb8a685f5a
11 | 280d8ebb-6cc6-2788-9153-98959a2da801
12 | 321c8680-a5a8-2a84-85c2-816a26d59516
13 | 38770ca1-86d7-27b8-8619-ab66f67d9adf
14 | 422885b3-192d-25fc-84c9-9b80eea1752d
15 | 4238490c-60a7-271e-9f38-3c651e3b3912
16 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4
17 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75
18 | 5341b7e3-8a66-2cdd-8709-66a2159f0017
19 | 5630cfcf-12bf-2860-8784-83d28a611a83
20 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f
21 | 6bde6081-9162-246f-8c4e-ffaf709d17b1
22 | 6bde60cb-9162-246f-8cf5-d04f7426e56f
23 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662
24 | 75c25975-9ca2-2844-9769-84677f46d4cf
25 | 7747a50c-9431-24e8-877d-e60c3a341cc2
26 | 8e0f1c28-9e28-2339-8584-ff06ff93c341
27 | 8eabc45f-5af7-2f32-8528-640861d2a135
28 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7
29 | ab835fae-54c6-29a1-995e-b06cfc555786
30 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e
31 | b1f23308-d255-2761-94da-981d962c6bf8
32 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64
33 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8
34 | bf9a3da2-45a5-2e80-8219-1f0a216399fe
35 | bf9a3de9-45a5-2e80-8022-277108d67404
36 | c7895f27-339c-2d13-836b-c12dca280261
37 | c7895f7c-339c-2d13-819f-3bb0b26c91f6
38 | c92fb5b5-f771-2064-8570-dbe16cb33764
39 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42
40 | ddc73797-765b-241a-9e2c-097c5989baf6
41 | fcf66d7b-622d-291c-86b8-7db96aebcee3
42 | 4138582f-a238-2435-8332-6902542c2823
43 | 43b8cae1-6678-2e38-9865-c19c07c25015
44 | 4a9a43d2-7736-2874-874d-d0fad0570e19
45 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5
46 | 095821f7-e2c2-2de1-9568-b9ce59920e29
47 | e61b0e04-bada-2f31-82d6-72831a602ba7


--------------------------------------------------------------------------------
/data/3rscan/meta_data/test.txt:
--------------------------------------------------------------------------------
 1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660
 2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9
 3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46
 4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d
 5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba
 6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34
 7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51
 8 | 10b17942-3938-2467-8933-5d40ada6d445
 9 | 13af338e-7397-2e54-84fc-fa003f91ac0c
10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3
11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91
12 | 20c993bb-698f-29c5-8569-0db38352364f
13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47
14 | 352e9c30-69fb-27a7-8b19-c703f0e190da
15 | 38770cb0-86d7-27b8-8466-1782505891fd
16 | 422885b9-192d-25fc-84f4-6f7c1afd29af
17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa
18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878
19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4
20 | 5630cfdc-12bf-2860-87b7-c7eab95718be
21 | 634b2183-f5d0-2fb7-87ef-c536720918dc
22 | 634d11cd-6833-255d-8c5c-2b0cf661494a
23 | 6a360555-fa53-2915-9721-bff473f98125
24 | 6bde608b-9162-246f-8d16-901b429b2563
25 | 6bde60d2-9162-246f-8dbf-dbd911924def
26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4
27 | 75c259a1-9ca2-2844-973c-adc28f935d5d
28 | 7747a514-9431-24e8-8505-5979f3f20906
29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e
30 | 8eabc463-5af7-2f32-8537-22977a89efdd
31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751
32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96
33 | ad408c83-84db-2095-8aa4-924f966af2dc
34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91
35 | b1f2330c-d255-2761-965e-d203c6e253c3
36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4
37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca
38 | bf9a3da4-45a5-2e80-8082-be634b241693
39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe
40 | c7895f2f-339c-2d13-8388-28e95af958de
41 | c7895f80-339c-2d13-8253-64b490296e49
42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b
43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f
44 | ddc7379d-765b-241a-9f0b-50b72d6cd829
45 | ebc42041-82a4-2113-8583-cc8c1be818b3
46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74


--------------------------------------------------------------------------------
/data/3rscan/meta_data/val.txt:
--------------------------------------------------------------------------------
 1 | 0988ea72-eb32-2e61-8344-99e2283c2728
 2 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f
 3 | 0cac7584-8d6f-2d13-8df8-c05e4307b418
 4 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc
 5 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721
 6 | 10b17940-3938-2467-8a7a-958300ba83d3
 7 | 137a8158-1db5-2cc0-8003-31c12610471e
 8 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a
 9 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2
10 | 20c993b7-698f-29c5-847d-c8cb8a685f5a
11 | 280d8ebb-6cc6-2788-9153-98959a2da801
12 | 321c8680-a5a8-2a84-85c2-816a26d59516
13 | 38770ca1-86d7-27b8-8619-ab66f67d9adf
14 | 422885b3-192d-25fc-84c9-9b80eea1752d
15 | 4238490c-60a7-271e-9f38-3c651e3b3912
16 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4
17 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75
18 | 5341b7e3-8a66-2cdd-8709-66a2159f0017
19 | 5630cfcf-12bf-2860-8784-83d28a611a83
20 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f
21 | 6bde6081-9162-246f-8c4e-ffaf709d17b1
22 | 6bde60cb-9162-246f-8cf5-d04f7426e56f
23 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662
24 | 75c25975-9ca2-2844-9769-84677f46d4cf
25 | 7747a50c-9431-24e8-877d-e60c3a341cc2
26 | 8e0f1c28-9e28-2339-8584-ff06ff93c341
27 | 8eabc45f-5af7-2f32-8528-640861d2a135
28 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7
29 | ab835fae-54c6-29a1-995e-b06cfc555786
30 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e
31 | b1f23308-d255-2761-94da-981d962c6bf8
32 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64
33 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8
34 | bf9a3da2-45a5-2e80-8219-1f0a216399fe
35 | bf9a3de9-45a5-2e80-8022-277108d67404
36 | c7895f27-339c-2d13-836b-c12dca280261
37 | c7895f7c-339c-2d13-819f-3bb0b26c91f6
38 | c92fb5b5-f771-2064-8570-dbe16cb33764
39 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42
40 | ddc73797-765b-241a-9e2c-097c5989baf6
41 | fcf66d7b-622d-291c-86b8-7db96aebcee3
42 | 4138582f-a238-2435-8332-6902542c2823
43 | 43b8cae1-6678-2e38-9865-c19c07c25015
44 | 4a9a43d2-7736-2874-874d-d0fad0570e19
45 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5
46 | 095821f7-e2c2-2de1-9568-b9ce59920e29
47 | e61b0e04-bada-2f31-82d6-72831a602ba7


--------------------------------------------------------------------------------
/data/3rscan/prepare_bins_pkls.py:
--------------------------------------------------------------------------------
  1 | import mmengine
  2 | import os
  3 | from tqdm.auto import tqdm
  4 | import numpy as np
  5 | import argparse
  6 | 
  7 | COLOR_TO_LABEL = {
  8 |     (0, 0, 0): 'unknown',
  9 |     (174, 199, 232): 'wall',
 10 |     (152, 223, 138): 'floor',
 11 |     (31, 119, 180): 'cabinet',
 12 |     (255, 187, 120): 'bed',
 13 |     (188, 189, 34): 'chair',
 14 |     (140, 86, 75): 'sofa',
 15 |     (255, 152, 150): 'table',
 16 |     (214, 39, 40): 'door',
 17 |     (197, 176, 213): 'window',
 18 |     (148, 103, 189): 'bookshelf',
 19 |     (196, 156, 148): 'picture',
 20 |     (23, 190, 207): 'counter',
 21 |     (178, 76, 76): 'blinds',
 22 |     (247, 182, 210): 'desk',
 23 |     (66, 188, 102): 'shelves',
 24 |     (219, 219, 141): 'curtain',
 25 |     (140, 57, 197): 'dresser',
 26 |     (202, 185, 52): 'pillow',
 27 |     (51, 176, 203): 'mirror',
 28 |     (200, 54, 131): 'floor mat',
 29 |     (92, 193, 61): 'clothes',
 30 |     (78, 71, 183): 'ceiling',
 31 |     (172, 114, 82): 'books',
 32 |     (255, 127, 14): 'fridge',
 33 |     (91, 163, 138): 'television',
 34 |     (153, 98, 156): 'paper',
 35 |     (140, 153, 101): 'towel',
 36 |     (158, 218, 229): 'shower curtain',
 37 |     (100, 125, 154): 'box',
 38 |     (178, 127, 135): 'whiteboard',
 39 |     (120, 185, 128): 'person',
 40 |     (146, 111, 194): 'night stand',
 41 |     (44, 160, 44): 'toilet',
 42 |     (112, 128, 144): 'sink',
 43 |     (96, 207, 209): 'lamp',
 44 |     (227, 119, 194): 'bathtub',
 45 |     (213, 92, 176): 'bag',
 46 |     (94, 106, 211): 'structure',
 47 |     (82, 84, 163): 'furniture',
 48 |     (100, 85, 144): 'prop'
 49 | }
 50 | 
 51 | OBJ2SEM = {v: idx for idx, (k, v) in enumerate(COLOR_TO_LABEL.items())}
 52 | OBJ2SEM['unknown'] = -1
 53 | REMAIN_BB_LABELS = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39] 
 54 | 
 55 | def create_dir(path):
 56 |     if not os.path.exists(path):
 57 |         os.mkdir(path)
 58 | 
 59 | 
 60 | def _filter_bb(bb):
 61 |     final  = []
 62 |     for i in bb:
 63 |         if i[-1] in REMAIN_BB_LABELS:
 64 |             final.append(i)
 65 | 
 66 |     if len(final) == 0:
 67 |         return np.zeros((0,7))
 68 | 
 69 |     return np.stack(final)
 70 | 
 71 | def create_dirs(path):
 72 |     points = os.path.join(path, 'points')
 73 |     create_dir(points)
 74 |     
 75 |     semantic_mask = os.path.join(path, 'semantic_mask')
 76 |     create_dir(semantic_mask)
 77 |     
 78 |     instance_mask = os.path.join(path, 'instance_mask')
 79 |     create_dir(instance_mask)
 80 |     
 81 |     bboxs = os.path.join(path, 'bboxs')
 82 |     create_dir(bboxs)
 83 | 
 84 |     superpoints = os.path.join(path, 'superpoints')
 85 |     create_dir(superpoints)
 86 |     return {
 87 |         'points': points,
 88 |         'semantic_mask': semantic_mask,
 89 |         'instance_mask': instance_mask,
 90 |         'bboxs': bboxs,
 91 |         'superpoints': superpoints
 92 |     }
 93 | 
 94 | 
 95 | 
 96 | def rearrange_sup(sup):
 97 |     sup = sup.copy()
 98 |     unique_super = np.unique(sup)
 99 | 
100 |     for idx, un in enumerate(unique_super):
101 |         ind  = np.where(sup == un)[0]
102 |         sup[ind] = idx
103 | 
104 |     return sup
105 | 
106 | 
107 | def create_metainfo():
108 | 
109 |     return {
110 |         'categories': OBJ2SEM,
111 |         'dataset': '3RScan',
112 |         'info_version': '1.0'
113 |     }
114 | 
115 | def create_data_list(split, splits, bins_path):
116 |     scenes = splits[split]
117 |     final_list = []
118 |     for scene in tqdm(scenes):
119 |             
120 |         lidar_points = {
121 |             'num_pts_feats': 6,
122 |             'lidar_path': f'{scene}.bin'
123 |         }
124 |         raw_bboxs = np.load(os.path.join(bins_path['bboxs'], f'{scene}.npy'))
125 |         instances = []
126 |         for rb in raw_bboxs:
127 |             if len(rb) == 0:
128 |                 instances = []
129 |                 
130 |             else:
131 |                 instances.append({
132 |                     'bbox_3d': rb[:6].tolist(),
133 |                     'bbox_label_3d': int(rb[-1])
134 |                 })
135 | 
136 |         final_list.append({
137 |             'lidar_points': lidar_points,
138 |             'instances': instances,
139 |             'pts_semantic_mask_path': f'{scene}.bin',
140 |             'pts_instance_mask_path': f'{scene}.bin',
141 |             'axis_align_matrix': np.eye(4)
142 |         })
143 | 
144 |     return final_list
145 | 
146 | def create_pkl_file(path_to_save, split, splits, bins_path, pkl_prefix = '3rscan'):
147 |     metainfo = create_metainfo()
148 |     data_list = create_data_list(split, splits, bins_path)
149 |     anno = {
150 |         'metainfo': metainfo,
151 |         'data_list': data_list
152 |     }
153 |     filename = os.path.join(path_to_save, f'{pkl_prefix}_infos_{split}.pkl')
154 |     mmengine.dump(anno, filename, 'pkl')
155 | 
156 | 
157 | if __name__ == '__main__':
158 |     parser = argparse.ArgumentParser()
159 |     parser.add_argument(
160 |         '--path_to_data',
161 |         required=True,
162 |         help='Path to preprocessed raw data',
163 |         type=str,
164 |     )
165 | 
166 |     parser.add_argument(
167 |         '--path_to_save_bins',
168 |         required=True,
169 |         help='Enter here the path where to save bins and pkls',
170 |         type=str,
171 |     )
172 | 
173 |     parser.add_argument(
174 |         '--path_to_splits',
175 |         default='meta_data/split/',
176 |         help='Path to train/val/test splits',
177 |         type=str,
178 |     )
179 | 
180 |     args = parser.parse_args()
181 |     print(args)
182 | 
183 |     path_to_splits = args.path_to_splits
184 |     path_to_raw_data = args.path_to_data
185 | 
186 |     path_to_save_data = args.path_to_save_bins
187 |     create_dir(path_to_save_data)
188 |     bins_path = create_dirs(path_to_save_data)
189 |     
190 |     with open(path_to_splits + '/train.txt') as train_file:
191 |         train_scenes = train_file.read().splitlines()
192 |     with open(path_to_splits + '/val.txt') as val_file:
193 |         val_scenes = val_file.read().splitlines()
194 |     with open(path_to_splits + '/test.txt') as test_file:
195 |         test_scenes = test_file.read().splitlines()
196 | 
197 |     splits = {
198 |         'train': train_scenes,
199 |         'val': val_scenes,
200 |         'test': test_scenes
201 |     }
202 | 
203 |     scene_ids = os.listdir(path_to_raw_data)
204 | 
205 |     for si in tqdm(scene_ids):
206 |         temp_path = os.path.join(path_to_raw_data, si)
207 |         point_cloud = np.load(temp_path + f'/{si}_aligned_vert.npy')
208 |         sem_label = np.load(temp_path + f'/{si}_sem_label.npy')[:, 0]
209 |         ins_label = np.load(temp_path + f'/{si}_ins_label.npy')[:, 0]
210 |         bboxs = np.load(temp_path + f'/{si}_aligned_bbox.npy')
211 |         superpoints = np.load(temp_path + f'/{si}_superpoints.npy')
212 |         superpoints = rearrange_sup(superpoints)
213 |         bboxs = _filter_bb(bboxs)
214 |     
215 |         superpoints = np.load(temp_path + f'/{si}_superpoints.npy')
216 |         superpoints = rearrange_sup(superpoints)
217 |     
218 |         point_cloud.astype(np.float32).tofile(os.path.join(bins_path['points'], 
219 |                                                            f'{si}.bin'))
220 |         sem_label.astype(np.int64).tofile(os.path.join(bins_path['semantic_mask'], 
221 |                                                        f'{si}.bin'))
222 |         ins_label.astype(np.int64).tofile(os.path.join(bins_path['instance_mask'], 
223 |                                                        f'{si}.bin'))
224 |         superpoints.astype(np.int64).tofile(os.path.join(bins_path['superpoints'], 
225 |                                                          f'{si}.bin'))
226 |         np.save(os.path.join(bins_path['bboxs'], f'{si}.npy'), bboxs)
227 | 
228 | 
229 |     create_pkl_file(path_to_save_data, 'train', splits, bins_path)
230 |     create_pkl_file(path_to_save_data, 'val', splits, bins_path)
231 |     create_pkl_file(path_to_save_data, 'test', splits, bins_path)
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 


--------------------------------------------------------------------------------
/data/3rscan/preprocess_raw_data.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.filterwarnings("ignore", category=DeprecationWarning)
  3 | 
  4 | import pickle
  5 | import sys
  6 | import os
  7 | import argparse
  8 | import glob
  9 | import json
 10 | import numpy as np
 11 | import pandas as pd
 12 | from concurrent.futures import ProcessPoolExecutor
 13 | from itertools import repeat
 14 | 
 15 | from utils import read_objmesh, point_indices_from_group
 16 | 
 17 | #CLOUD_FILE_PFIX = 'mesh.refined.v2.color'
 18 | CLOUD_FILE_PFIX = 'mesh.refined.v2'
 19 | AGGREGATIONS_FILE_PFIX = 'semseg.v2.json'
 20 | SEGMENTS_FILE_PFIX = 'mesh.refined.0.010000.segs.v2.json'
 21 | 
 22 | 
 23 | def create_dir(path):
 24 |     if not os.path.exists(path):
 25 |         os.mkdir(path)
 26 | 
 27 | def read_transform_matrix(Scan3RJson_PATH):
 28 |     rescan2ref = {}
 29 |     with open(Scan3RJson_PATH , "r") as read_file:
 30 |         data = json.load(read_file)
 31 |         for scene in data:
 32 |             for scans in scene["scans"]:
 33 |                 if "transform" in scans:
 34 |                     rescan2ref[scans["reference"]] = \
 35 |                         np.array(scans["transform"]).reshape(4,4).T
 36 |     return rescan2ref
 37 | 
 38 | def get_reference_dic(Scan3RJson_PATH):
 39 |     meta_data = json.load(open(Scan3RJson_PATH))
 40 |     reference_dic = {}
 41 |     for record in meta_data:
 42 |         reference = record['reference'] 
 43 |         reference_dic[reference] = reference
 44 |         if 'scans' not in record:
 45 |             continue
 46 |         for scan in record['scans']:
 47 |             reference_dic[scan['reference']] = reference
 48 |     return reference_dic
 49 | 
 50 | def handle_process(scene_path, output_path, labels_pd, 
 51 |                    train_scenes, val_scenes, test_scenes):
 52 |     scene_id = scene_path.split('/')[-1]
 53 |     obj_path = os.path.join(scene_path, f'{CLOUD_FILE_PFIX}.obj')    
 54 |     aggregations_file = os.path.join(scene_path, f'{AGGREGATIONS_FILE_PFIX}')
 55 |     segments_file = os.path.join(scene_path, f'{SEGMENTS_FILE_PFIX}')
 56 |     # Rotating the mesh to axis aligned
 57 |     rot_matrix = rescan2ref.get(scene_id, np.identity(4))
 58 |     
 59 |     ref_scene_id = reference_dic[scene_id]
 60 |     ref_rot_matrix = reference_axis_align_matrix_dic[ref_scene_id]
 61 |     
 62 |     if scene_id in train_scenes:
 63 |         split_name = 'train'
 64 |     elif scene_id in val_scenes:
 65 |         split_name = 'val'
 66 |     elif scene_id in test_scenes:
 67 |         split_name = 'test'
 68 |     else:
 69 |         print('*', scene_id, 
 70 |               'does not exist in [train, val, test] that have seg files')
 71 |         return 
 72 | 
 73 |     print('Processing: ', scene_id, 'in', split_name)
 74 |     
 75 |     pointcloud, faces_array = read_objmesh(obj_path)
 76 |     points = pointcloud[:, :3]
 77 |     colors = pointcloud[:, 3:6]
 78 | 
 79 |     # Rotate PC to axis aligned
 80 |     r_points = pointcloud[:, :3].transpose()
 81 |     r_points = np.append(r_points, np.ones((1, 
 82 |                                             r_points.shape[1])), axis=0)
 83 |     # reference align
 84 |     r_points = np.dot(rot_matrix, r_points)
 85 |     # reference axis align
 86 |     r_points = np.dot(ref_rot_matrix, r_points)
 87 |     ##### !
 88 |     aligned_pointcloud = np.append(r_points.transpose()[:, :3], 
 89 |                                    pointcloud[:, 3:], axis=1)
 90 | 
 91 |     # Generate new labels
 92 |     labelled_pc = np.zeros((pointcloud.shape[0], 1)) - 1 # -1: unannotated
 93 |     instance_ids = np.zeros((pointcloud.shape[0], 1)) - 1 # -1: unannotated
 94 |         
 95 |     if os.path.isfile(aggregations_file):
 96 |         # Load segments file
 97 |         with open(segments_file) as f:
 98 |             segments = json.load(f)
 99 |             seg_indices = np.array(segments['segIndices'])        
100 |         # Load Aggregations file
101 |         with open(aggregations_file) as f:
102 |             aggregation = json.load(f)
103 |             seg_groups = np.array(aggregation['segGroups'])
104 | 
105 |         num_instances = len(seg_groups)        
106 |         instance_bboxes = np.zeros((num_instances, 7))
107 |         aligned_instance_bboxes = np.zeros((num_instances, 7))
108 |             
109 |         for obj_idx, group in enumerate(seg_groups):
110 |             segment_points, aligned_segment_points, p_inds, label_id = \
111 |                 point_indices_from_group(pointcloud, aligned_pointcloud, 
112 |                                          seg_indices, group, labels_pd)
113 |             labelled_pc[p_inds] = label_id
114 |             
115 |             if len(segment_points) == 0: continue
116 |                 
117 |             xmin = np.min(segment_points[:,0])
118 |             ymin = np.min(segment_points[:,1])
119 |             zmin = np.min(segment_points[:,2])
120 |             xmax = np.max(segment_points[:,0])
121 |             ymax = np.max(segment_points[:,1])
122 |             zmax = np.max(segment_points[:,2])
123 |             bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, 
124 |                              (zmin+zmax)/2, xmax-xmin, 
125 |                              ymax-ymin, zmax-zmin, label_id]) # also include object id
126 |             instance_bboxes[obj_idx,:] = bbox 
127 |             
128 |             if len(aligned_segment_points) == 0: continue
129 |                 
130 |             instance_ids[p_inds] = obj_idx
131 |             xmin = np.min(aligned_segment_points[:,0])
132 |             ymin = np.min(aligned_segment_points[:,1])
133 |             zmin = np.min(aligned_segment_points[:,2])
134 |             xmax = np.max(aligned_segment_points[:,0])
135 |             ymax = np.max(aligned_segment_points[:,1])
136 |             zmax = np.max(aligned_segment_points[:,2])
137 |             bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, 
138 |                              (zmin+zmax)/2, xmax-xmin, ymax-ymin, 
139 |                              zmax-zmin, label_id]) # also include object id
140 |             aligned_instance_bboxes[obj_idx,:] = bbox 
141 |     else:
142 |         # use zero as placeholders for the test scene
143 |         #print("use placeholders")
144 |         instance_bboxes = np.zeros((1, 7)) 
145 |         aligned_instance_bboxes = np.zeros((1, 7))
146 | 
147 |     labelled_pc = labelled_pc.astype(int)
148 |     instance_ids = instance_ids.astype(int)
149 |     assert np.all(instance_ids[np.where(labelled_pc == -1)[0]] == -1)
150 |     if -1 in np.unique(instance_ids):
151 |         assert len(instance_bboxes) == len(np.unique(instance_ids)[1:])
152 |         
153 |     else:
154 |         assert len(instance_bboxes) == len(np.unique(instance_ids))   
155 |     
156 |     if (np.any(np.isnan(pointcloud)) or not np.all(np.isfinite(pointcloud))):
157 |         raise ValueError('nan')   
158 |     
159 |     output_path = os.path.join(output_path, f'{scene_id}')
160 |     create_dir(os.path.join(output_path))
161 |     output_prefix = os.path.join(output_path, f'{scene_id}')
162 |     np.save(output_prefix+'_aligned_vert.npy', aligned_pointcloud[:, :6])
163 |     np.save(output_prefix+'_sem_label.npy', labelled_pc)
164 |     np.save(output_prefix+'_ins_label.npy', instance_ids)
165 |     np.save(output_prefix+'_aligned_bbox.npy', aligned_instance_bboxes)
166 |     np.save(output_prefix+'_superpoints.npy', seg_indices)
167 |     
168 | 
169 | if __name__ == '__main__':
170 |     parser = argparse.ArgumentParser()
171 |     parser.add_argument('--dataset_root', default='../data/3rscan/', 
172 |                         help='Path to the 3RScan dataset containing scene folders')
173 |     parser.add_argument('--output_root', default='preprocessed_raw_data', 
174 |                         help='Output path where processed data will be located')
175 |     parser.add_argument('--label_map_file', 
176 |                         default='meta_data/3RScan.v2_Semantic-Classes-Mapping.csv', 
177 |                         help='path to scannetv2-labels.combined.tsv')
178 |     parser.add_argument('--num_workers', default=12, 
179 |                         type=int, help='The number of parallel workers')
180 |     parser.add_argument('--splits_path', default='meta_data/split', 
181 |                         help='Where the txt files with the train/val splits live')
182 |     config = parser.parse_args()
183 | 
184 |     # Load label map
185 |     labels_pd = pd.read_csv(config.label_map_file, sep=',', header=1)
186 | 
187 |     # Load train/val splits
188 |     with open(config.splits_path + '/train.txt') as train_file:
189 |         train_scenes = train_file.read().splitlines()
190 |     with open(config.splits_path + '/val.txt') as val_file:
191 |         val_scenes = val_file.read().splitlines()
192 |     with open(config.splits_path + '/test.txt') as test_file:
193 |         test_scenes = test_file.read().splitlines()
194 |         
195 |     META_FILE = 'meta_data/3RScan.json'
196 |     rescan2ref = read_transform_matrix(META_FILE)
197 |     reference_dic = get_reference_dic(META_FILE)
198 |     
199 |     with open('./meta_data/reference_axis_align_matrix.pkl', 'rb') as f:
200 |         reference_axis_align_matrix_dic = pickle.load(f)
201 | 
202 |     os.makedirs(config.output_root, exist_ok=True)
203 | 
204 |     # Load scene paths
205 |     scene_paths = sorted(glob.glob(config.dataset_root + '/*'))
206 |     
207 |     # Preprocess data.
208 |     pool = ProcessPoolExecutor(max_workers=config.num_workers)
209 |     print('Processing scenes...')
210 |     _ = list(pool.map(handle_process, scene_paths, 
211 |                       repeat(config.output_root), repeat(labels_pd), 
212 |                       repeat(train_scenes), repeat(val_scenes), 
213 |                       repeat(test_scenes)))
214 | 


--------------------------------------------------------------------------------
/data/arkitscenes/README.md:
--------------------------------------------------------------------------------
 1 | ## Prepare ARKitScenes Data for Indoor 3D Detection
 2 | 
 3 | For now we only support offline benchmark with a single reconstructed point clound for each scene. Online benchmark for single RGB-D frame detection can be supported in the future. The `utils` directory is used unchanged from [ARKitScenes](https://github.com/apple/ARKitScenes/tree/main/threedod/benchmark_scripts/utils), except fixing a single [issue](https://github.com/apple/ARKitScenes/issues/53).
 4 | 
 5 | 1. Download data from the official [ARKitScenes](https://github.com/apple/ARKitScenes). From their repo you may run:
 6 | ```
 7 | python download_data.py 3dod --video-id-csv threedod/3dod_train_val_splits.csv
 8 | ```
 9 | 
10 | After this step you have the following file structure here:
11 | ```
12 | 3dod
13 | ├── metadata.csv
14 | ├── Training
15 | │   ├── xxxxxxxx
16 | │   │   ├── xxxxxxxx_3dod_annotation.json
17 | │   │   ├── xxxxxxxx_3dod_mesh.ply
18 | │   │   ├── xxxxxxxx_frames
19 | ├── Validation
20 | │   ├── xxxxxxxx
21 | │   │   ├── xxxxxxxx_3dod_annotation.json
22 | │   │   ├── xxxxxxxx_3dod_mesh.ply
23 | │   │   ├── xxxxxxxx_frames
24 | ```
25 | 
26 | 2. Preprocess data for offline benchmark with our adapted script:
27 | ```
28 | python data_prepare_offline.py
29 | ```
30 | After this step you have the following file structure here:
31 | ```
32 | offline_prepared_data
33 | ├── xxxxxxxx_point.npy
34 | ├── xxxxxxxx_bbox.npy
35 | ├── xxxxxxxx_label.npy
36 | ```
37 | 
38 | 3. Enter the project root directory, generate training and validation data by running:
39 | ```
40 | python tools/create_data.py arkitscenes --root-path ./data/arkitscenes --out-dir ./data/arkitscenes --extra-tag arkitscenes-offline
41 | ```
42 | Overall you achieve the following file structure in `data` directory:
43 | ```
44 | arkitscenes
45 | ├── offline_prepared_data
46 | │   ├── xxxxxxxx_point.bin
47 | ├── arkitscenes_offline_train_infos.pkl
48 | ├── arkitscenes_offline_val_infos.pkl
49 | 
50 | ```
51 | 


--------------------------------------------------------------------------------
/data/arkitscenes/arkitscenes_data_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from concurrent import futures as futures
  3 | from os import path as osp
  4 | import mmengine
  5 | import numpy as np
  6 | from typing import List, Optional
  7 | 
  8 | 
  9 | class ARKitScenesOfflineData:
 10 |     """ARKitScenesOfflineData
 11 |     Generate arkitscenes infos (offline benchmark) for indoor_converter.
 12 | 
 13 |     Args:
 14 |         root_path (str): Root path of the raw data.
 15 |         split (str): Stplit type 'train' or 'val'.
 16 |     """
 17 |     def __init__(self, root_path: str, split: str):
 18 |         self.split = split
 19 |         raw_path = os.path.join(root_path, '3dod')
 20 |         self.data_path = os.path.join(root_path, 'offline_prepared_data')
 21 |         assert split in ['train', 'val']
 22 |         class_names = [
 23 |             'cabinet', 'refrigerator', 'shelf', 'stove', 'bed',
 24 |             'sink', 'washer', 'toilet', 'bathtub', 'oven',
 25 |             'dishwasher', 'fireplace', 'stool', 'chair', 'table',
 26 |             'tv_monitor', 'sofa'
 27 |         ]
 28 |         self.name2class = {
 29 |             name: i
 30 |             for i, name in enumerate(class_names)
 31 |         }
 32 |         all_id_list = set(
 33 |             map(lambda x: x.split('_')[0],
 34 |             os.listdir(self.data_path)))
 35 |         split_dir = 'Training' if split == 'train' else 'Validation'
 36 |         split_id_list = set(os.listdir(osp.join(raw_path, split_dir)))
 37 |         self.sample_id_list = all_id_list & split_id_list
 38 |         print(f'{split}, raw ids: {len(split_id_list)}, '
 39 |               f'processed ids: {len(self.sample_id_list)}')
 40 | 
 41 |     def __len__(self) -> int:
 42 |         """Length of the dataset."""
 43 |         return len(self.sample_id_list)
 44 |     
 45 |     def get_infos(self,
 46 |                   num_workers: int = 4,
 47 |                   has_label: bool = True,
 48 |                   sample_id_list: Optional[List[str]] = None) -> dict:
 49 |         """Get data infos.
 50 |         This method gets information from the raw data.
 51 | 
 52 |         Args:
 53 |             num_workers (int, optional): Number of threads to be used.
 54 |                 Default: 4.
 55 |             has_label (bool, optional): Whether the data has label.
 56 |                 Default: True.
 57 |             sample_id_list (list[str], optional): Index list of the sample.
 58 |                 Default: None.
 59 | 
 60 |         Returns:
 61 |             dict: Information of the raw data.
 62 |         """
 63 |         def process_single_scene(sample_idx):
 64 |             print(f'{self.split} sample_idx: {sample_idx}', end='\r')
 65 |             info = {
 66 |                 'lidar_points': {
 67 |                     'num_pts_feats': 6,
 68 |                     'lidar_path': f'{sample_idx}_point.bin'
 69 |                 }
 70 |             }
 71 |             boxes = np.load(
 72 |                 osp.join(self.data_path, f'{sample_idx}_bbox.npy'))
 73 |             labels = np.load(
 74 |                 osp.join(self.data_path, f'{sample_idx}_label.npy'))
 75 |             instances = []
 76 |             for box, label in zip(boxes, labels):
 77 |                 # follow heading angle of DepthInstance3DBoxes
 78 |                 box[-1] = -box[-1]
 79 |                 instances.append({
 80 |                     'bbox_3d': box.tolist(),
 81 |                     'bbox_label_3d': self.name2class[label]
 82 |                 })
 83 |             info['instances'] = instances
 84 |             return info
 85 | 
 86 |         sample_id_list = sample_id_list if sample_id_list is not None \
 87 |             else self.sample_id_list
 88 |         with futures.ThreadPoolExecutor(num_workers) as executor:
 89 |             infos = executor.map(process_single_scene, list(sample_id_list))
 90 |         
 91 |         infos = {
 92 |             'metainfo': {
 93 |                 'categories': self.name2class,
 94 |                 'dataset': 'arkitscenes_offline',
 95 |                 'info_version': '1.0'
 96 |             },
 97 |             'data_list': list(infos)
 98 |         }
 99 |         return infos
100 | 
101 | 
102 | # do not want to add create_annotations.py to projects
103 | if __name__ == '__main__':
104 |     root_path = '/opt/project/data/arkitscenes'
105 |     out_path = '/opt/project/work_dirs/tmp'
106 |     infos_train = ARKitScenesOfflineData(
107 |         root_path=root_path, split='train').get_infos()
108 |     train_filename = osp.join(out_path, 'arkitscenes_offline_infos_train.pkl')
109 |     mmengine.dump(infos_train, train_filename, 'pkl')
110 |     infos_val = ARKitScenesOfflineData(
111 |         root_path=root_path, split='val').get_infos()
112 |     val_filename = osp.join(out_path, 'arkitscenes_offline_infos_val.pkl')
113 |     mmengine.dump(infos_val, val_filename, 'pkl')
114 | 


--------------------------------------------------------------------------------
/data/arkitscenes/data_prepare_offline.py:
--------------------------------------------------------------------------------
  1 | # adapted from https://github.com/apple/ARKitScenes/blob/main/threedod/benchmark_scripts/data_prepare_offline.py
  2 | import argparse
  3 | import numpy as np
  4 | import os
  5 | import pandas as pd
  6 | from functools import partial
  7 | from tqdm.contrib.concurrent import process_map
  8 | 
  9 | import utils.box_utils as box_utils
 10 | import utils.pc_utils as pc_utils
 11 | import utils.taxonomy as taxonomy
 12 | from utils.tenFpsDataLoader import TenFpsDataLoader, extract_gt
 13 | 
 14 | # we keep this rough grid_size=0.05 from the original benchmark,
 15 | # however accuracy might be better with smaller grid_size
 16 | def accumulate_wrapper(loader, grid_size=0.05):
 17 |     """
 18 |     Args:
 19 |         loader: TenFpsDataLoader
 20 |     Returns:
 21 |         world_pc: (N, 3)
 22 |             xyz in world coordinate system
 23 |         world_sem: (N, d)
 24 |             semantic for each point
 25 |         grid_size: float
 26 |             keep only one point in each (g_size, g_size, g_size) grid
 27 |     """
 28 |     world_pc, world_rgb, poses = np.zeros((0, 3)), np.zeros((0, 3)), []
 29 |     for i in range(len(loader)):
 30 |         frame = loader[i]
 31 |         image_path = frame["image_path"]
 32 |         pcd = frame["pcd"]  # in world coordinate
 33 |         pose = frame["pose"]
 34 |         rgb = frame["color"]
 35 | 
 36 |         world_pc = np.concatenate((world_pc, pcd), axis=0)
 37 |         world_rgb = np.concatenate((world_rgb, rgb), axis=0)
 38 | 
 39 |         choices = pc_utils.down_sample(world_pc, grid_size)
 40 |         world_pc = world_pc[choices]
 41 |         world_rgb = world_rgb[choices]
 42 | 
 43 |     return world_pc, world_rgb, poses
 44 | 
 45 | 
 46 | def main(scene_id, split, data_root, output_dir):
 47 |     # step 0.0: output folder, make dir
 48 |     os.makedirs(output_dir, exist_ok=True)
 49 |     point_output_path = os.path.join(output_dir, f"{scene_id}_point.npy")
 50 |     bbox_output_path = os.path.join(output_dir, f"{scene_id}_bbox.npy")
 51 |     label_output_path = os.path.join(output_dir, f"{scene_id}_label.npy")
 52 |     # skip already processed scenes
 53 |     if os.path.exists(point_output_path) \
 54 |         and os.path.exists(bbox_output_path) \
 55 |         and os.path.exists(label_output_path):
 56 |         return
 57 | 
 58 |     # step 0.1: get annotation first,
 59 |     # if skipped or no gt boxes, we will not bother calling further steps
 60 |     gt_path = os.path.join(data_root, split, scene_id, f"{scene_id}_3dod_annotation.json")
 61 |     skipped, boxes_corners, _, _, labels, _ = extract_gt(gt_path)
 62 |     if skipped or boxes_corners.shape[0] == 0:
 63 |         return
 64 | 
 65 |     # step 0.2: data
 66 |     data_path = os.path.join(data_root, split, scene_id, f"{scene_id}_frames")
 67 |     loader = TenFpsDataLoader(
 68 |         dataset_cfg=None,
 69 |         class_names=taxonomy.class_names,
 70 |         root_path=data_path)
 71 | 
 72 |     # step 1: accumulate points and save points
 73 |     world_pc, world_rgb, _ = accumulate_wrapper(loader)
 74 |     # despite original benchmark script ignores rgb here, we save it 
 75 |     # to allow user to use or skip it for trainig / testing / visualization
 76 |     points = np.concatenate((world_pc, world_rgb), axis=1).astype(np.float32)
 77 |     points.tofile(point_output_path)
 78 | 
 79 |     # step 2: save labels and boxes
 80 |     # not sure if we need uids, but keep them followinig original benchmark
 81 |     boxes = box_utils.corners_to_boxes(boxes_corners)
 82 |     np.save(bbox_output_path, boxes)
 83 |     np.save(label_output_path, labels)
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 |     parser = argparse.ArgumentParser()
 88 |     parser.add_argument(
 89 |         "--data-root",
 90 |         default="./3dod",
 91 |         help="input folder with ./Training/{scene_id}, ./Validation/{scene_id}"
 92 |              "and metadata.json"
 93 |     )
 94 |     parser.add_argument(
 95 |         "--output-dir",
 96 |         default="./offline_prepared_data",
 97 |         help="directory to save the data and annoation"
 98 |     )
 99 |     parser.add_argument(
100 |         "--max-workers",
101 |         default=1,
102 |         type=int,
103 |         help="number of parallel processes"
104 |     )
105 | 
106 |     args = parser.parse_args()
107 |     df = pd.read_csv(os.path.join(args.data_root, "metadata.csv"))
108 |     scene_ids = list(map(str, df["video_id"].to_list()))
109 |     splits = list(map(str, df["fold"].to_list()))
110 |     process_map(
111 |         partial(main, data_root=args.data_root, output_dir=args.output_dir),
112 |         scene_ids, splits, max_workers=args.max_workers)
113 | 


--------------------------------------------------------------------------------
/data/arkitscenes/misc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | from collections import defaultdict
  5 | from plyfile import PlyData
  6 | from tqdm import tqdm
  7 | 
  8 | import mmengine
  9 | from mmdet3d.structures import DepthInstance3DBoxes
 10 | from mmdet3d.apis import inference_detector, init_model
 11 | from projects.TR3D.tr3d.local_visualizer import TR3DLocalVisualizer
 12 | from utils.box_utils import boxes_to_corners_3d
 13 | from utils.pc_utils import down_sample
 14 | 
 15 | 
 16 | def verify_corners():
 17 |     a = np.random.rand(100, 7)
 18 |     mmdet3d_corners = DepthInstance3DBoxes(a, origin=(.5, .5, .5)).corners.numpy()
 19 |     a[:, -1] = -a[:, -1]
 20 |     arkiscenes_corners = boxes_to_corners_3d(a)[:, [2, 6, 7, 3, 1, 5, 4, 0]]
 21 |     assert np.abs(arkiscenes_corners - mmdet3d_corners).max() < 1e-5
 22 | 
 23 | 
 24 | def print_object_statistics(path):
 25 |     print(path)
 26 |     infos = mmengine.load(path)
 27 |     categories = infos['metainfo']['categories']
 28 |     inverse_categories = {v: k for k, v in categories.items()}
 29 |     data = {c: defaultdict(list) for c in categories}
 30 |     for d in infos['data_list']:
 31 |         for instance in d['instances']:
 32 |             category_data = data[inverse_categories[instance['bbox_label_3d']]]
 33 |             box = instance['bbox_3d']
 34 |             category_data['xy_min'].append(min(box[3], box[4]))
 35 |             category_data['xy_max'].append(max(box[3], box[4]))
 36 |             category_data['z'].append(box[5])
 37 | 
 38 |     quantiles = (0, .75, 1)
 39 |     columns = ['category', 'N']
 40 |     df_data = []
 41 |     for key in category_data.keys():
 42 |         for q in quantiles:
 43 |             columns.append(f'{key}.{q}')
 44 |     for category, category_data in data.items():
 45 |         table_row = [category, len(category_data['z'])]
 46 |         for key in category_data.keys():
 47 |             for q in quantiles:
 48 |                 value = np.quantile(category_data[key], q)
 49 |                 table_row.append(value)  # f'{value:.4f}'
 50 |         df_data.append(table_row)
 51 |     df = pd.DataFrame(data=df_data, columns=columns)
 52 |     pd.set_option('display.precision', 3)
 53 |     target = df[['xy_max.0.75', 'z.0.75']].to_numpy().max(axis=1)
 54 |     target = target > np.median(target)
 55 |     df['target'] = target
 56 |     print(df)
 57 |     print('target:', target.astype(int).tolist())
 58 | 
 59 | 
 60 | def aggregate_multiple_ply(path, grid_size=0.05):
 61 |     world_pc, world_rgb = np.zeros((0, 3)), np.zeros((0, 3))
 62 |     for file_name in tqdm(os.listdir(path)):
 63 |         data = PlyData.read(os.path.join(path, file_name))
 64 |         pc = np.stack((
 65 |             data['vertex']['x'],
 66 |             data['vertex']['y'],
 67 |             data['vertex']['z']), axis=1)
 68 |         rgb = np.stack((
 69 |             data['vertex']['red'],
 70 |             data['vertex']['green'],
 71 |             data['vertex']['blue']), axis=1)
 72 |         world_pc = np.concatenate((world_pc, pc))
 73 |         world_rgb = np.concatenate((world_rgb, rgb))
 74 |         choices = down_sample(world_pc, grid_size)
 75 |         world_pc = world_pc[choices]
 76 |         world_rgb = world_rgb[choices]
 77 |     points = np.concatenate((world_pc, world_rgb), axis=1).astype(np.float32)
 78 |     file_name = f'{os.path.basename(os.path.dirname(path))}.bin'
 79 |     points.tofile(os.path.join('work_dirs/tmp/tmp', file_name))
 80 | 
 81 | 
 82 | def predict(pcd_path, config_path, checkpoint_path):
 83 |     model = init_model(config_path, checkpoint_path, device='cuda:0',
 84 |                        cfg_options=dict(test_dataloader=dict(dataset=dict(box_type_3d='depth'))))
 85 |     points = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)
 86 |     points = np.concatenate((points[:, :3], points[:, 3:] / 255), axis=1)
 87 |     result = inference_detector(model, points)
 88 |     TR3DLocalVisualizer().add_datasample(
 89 |         name='',
 90 |         data_input=dict(points=points),
 91 |         data_sample=result[0],
 92 |         draw_gt=False,
 93 |         out_file=pcd_path,
 94 |         vis_task='lidar_det')
 95 | 
 96 | if __name__ == '__main__':
 97 |     # verify_corners()
 98 |     # print_object_statistics('/opt/project/data/arkitscenes/arkitscenes_offline_infos_train.pkl')
 99 |     # print_object_statistics('/opt/project/data/arkitscenes/arkitscenes_offline_infos_val.pkl')
100 |     aggregate_multiple_ply('data/tmp/230621_sr_room_samples/Jun18at10-18PM-poly/pcds')
101 |     predict(
102 |         'work_dirs/tmp/tmp/Jun18at10-18PM-poly.bin',
103 |         'projects/arkitscenes/configs/tr3d_1xb16_arkitscenes-offline-3d-4class.py',
104 |         'work_dirs/tmp/tr3d_arkitscenes_epoch10.pth')
105 | 


--------------------------------------------------------------------------------
/data/arkitscenes/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/arkitscenes/utils/__init__.py


--------------------------------------------------------------------------------
/data/arkitscenes/utils/box_utils.py:
--------------------------------------------------------------------------------
  1 | # TODO: Explain 8 corners logic at the top and use it consistently
  2 | # Add comments of explanation
  3 | 
  4 | import numpy as np
  5 | import scipy.spatial
  6 | 
  7 | from .rotation import rotate_points_along_z
  8 | 
  9 | 
 10 | def get_size(box):
 11 |     """
 12 |     Args:
 13 |         box: 8x3
 14 |     Returns:
 15 |         size: [dx, dy, dz]
 16 |     """
 17 |     distance = scipy.spatial.distance.cdist(box[0:1, :], box[1:5, :])
 18 |     l = distance[0, 2]
 19 |     w = distance[0, 0]
 20 |     h = distance[0, 3]
 21 |     return [l, w, h]
 22 | 
 23 | 
 24 | def get_heading_angle(box):
 25 |     """
 26 |     Args:
 27 |         box: (8, 3)
 28 |     Returns:
 29 |         heading_angle: float
 30 |     """
 31 |     a = box[0, 0] - box[1, 0]
 32 |     b = box[0, 1] - box[1, 1]
 33 | 
 34 |     heading_angle = np.arctan2(a, b)
 35 |     return heading_angle
 36 | 
 37 | 
 38 | def compute_box_3d(size, center, rotmat):
 39 |     """Compute corners of a single box from rotation matrix
 40 |     Args:
 41 |         size: list of float [dx, dy, dz]
 42 |         center: np.array [x, y, z]
 43 |         rotmat: np.array (3, 3)
 44 |     Returns:
 45 |         corners: (8, 3)
 46 |     """
 47 |     l, h, w = [i / 2 for i in size]
 48 |     center = np.reshape(center, (-1, 3))
 49 |     center = center.reshape(3)
 50 |     x_corners = [l, l, -l, -l, l, l, -l, -l]
 51 |     y_corners = [h, -h, -h, h, h, -h, -h, h]
 52 |     z_corners = [w, w, w, w, -w, -w, -w, -w]
 53 |     corners_3d = np.dot(
 54 |         np.transpose(rotmat), np.vstack([x_corners, y_corners, z_corners])
 55 |     )
 56 |     corners_3d[0, :] += center[0]
 57 |     corners_3d[1, :] += center[1]
 58 |     corners_3d[2, :] += center[2]
 59 |     return np.transpose(corners_3d)
 60 | 
 61 | 
 62 | def corners_to_boxes(corners3d):
 63 |     """
 64 |         7 -------- 4
 65 |        /|         /|
 66 |       6 -------- 5 .
 67 |       | |        | |
 68 |       . 3 -------- 0
 69 |       |/         |/
 70 |       2 -------- 1
 71 |     Args:
 72 |         corners: (N, 8, 3), vertex order shown in figure above
 73 | 
 74 |     Returns:
 75 |         boxes3d:  (N, 7) [x, y, z, dx, dy, dz, heading]
 76 |             with (x, y, z) is the box center
 77 |             (dx, dy, dz) as the box size
 78 |             and heading as the clockwise rotation angle
 79 |     """
 80 | 
 81 |     boxes3d = np.zeros((corners3d.shape[0], 7))
 82 |     for i in range(corners3d.shape[0]):
 83 |         boxes3d[i, :3] = np.mean(corners3d[i, :, :], axis=0)
 84 |         boxes3d[i, 3:6] = get_size(corners3d[i, :, :])
 85 |         boxes3d[i, 6] = get_heading_angle(corners3d[i, :, :])
 86 | 
 87 |     return boxes3d
 88 | 
 89 | 
 90 | def boxes_to_corners_3d(boxes3d):
 91 |     """
 92 |         7 -------- 4
 93 |        /|         /|
 94 |       6 -------- 5 .
 95 |       | |        | |
 96 |       . 3 -------- 0
 97 |       |/         |/
 98 |       2 -------- 1
 99 |     Args:
100 |         boxes3d:  (N, 7) [x, y, z, dx, dy, dz, heading],
101 |             (x, y, z) is the box center
102 | 
103 |     Returns:
104 |         corners: (N, 8, 3)
105 |     """
106 |     template = np.array([[1, 1, -1],
107 |         [1, -1, -1],
108 |         [-1, -1, -1],
109 |         [-1, 1, -1],
110 |         [1, 1, 1],
111 |         [1, -1, 1],
112 |         [-1, -1, 1],
113 |         [-1, 1, 1]]
114 |     ) / 2.
115 | 
116 |     # corners3d: of shape (N, 3, 8)
117 |     corners3d = np.tile(boxes3d[:, None, 3:6], (1, 8, 1)) * template[None, :, :]
118 | 
119 |     corners3d = rotate_points_along_z(corners3d.reshape(-1, 8, 3), boxes3d[:, 6]).reshape(
120 |         -1, 8, 3
121 |     )
122 |     corners3d += boxes3d[:, None, 0:3]
123 | 
124 |     return corners3d
125 | 
126 | 
127 | def points_in_boxes(points, boxes):
128 |     """
129 |     Args:
130 |         pc: np.array (n, 3+d)
131 |         boxes: np.array (m, 8, 3)
132 |     Returns:
133 |         mask: np.array (n, m) of type bool
134 |     """
135 |     if len(boxes) == 0:
136 |         return np.zeros([points.shape[0], 1], dtype=np.bool)
137 |     points = points[:, :3]  # get xyz
138 |     # u = p6 - p5
139 |     u = boxes[:, 6, :] - boxes[:, 5, :]  # (m, 3)
140 |     # v = p6 - p7
141 |     v = boxes[:, 6, :] - boxes[:, 7, :]  # (m, 3)
142 |     # w = p6 - p2
143 |     w = boxes[:, 6, :] - boxes[:, 2, :]  # (m, 3)
144 | 
145 |     # ux, vx, wx
146 |     ux = np.matmul(points, u.T)  # (n, m)
147 |     vx = np.matmul(points, v.T)
148 |     wx = np.matmul(points, w.T)
149 | 
150 |     # up6, up5, vp6, vp7, wp6, wp2
151 |     up6 = np.sum(u * boxes[:, 6, :], axis=1)
152 |     up5 = np.sum(u * boxes[:, 5, :], axis=1)
153 |     vp6 = np.sum(v * boxes[:, 6, :], axis=1)
154 |     vp7 = np.sum(v * boxes[:, 7, :], axis=1)
155 |     wp6 = np.sum(w * boxes[:, 6, :], axis=1)
156 |     wp2 = np.sum(w * boxes[:, 2, :], axis=1)
157 | 
158 |     mask_u = np.logical_and(ux <= up6, ux >= up5)  # (1024, n)
159 |     mask_v = np.logical_and(vx <= vp6, vx >= vp7)
160 |     mask_w = np.logical_and(wx <= wp6, wx >= wp2)
161 | 
162 |     mask = mask_u & mask_v & mask_w  # (10240, n)
163 | 
164 |     return mask
165 | 
166 | 
167 | def poly_area(x,y):
168 |     """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
169 |     return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
170 | 
171 | 
172 | def polygon_clip(subjectPolygon, clipPolygon):
173 |     """ Clip a polygon with another polygon.
174 |     Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
175 |     Args:
176 |       subjectPolygon: a list of (x,y) 2d points, any polygon.
177 |       clipPolygon: a list of (x,y) 2d points, has to be *convex*
178 |     Note:
179 |       **points have to be counter-clockwise ordered**
180 |     Return:
181 |       a list of (x,y) vertex point for the intersection polygon.
182 |     """
183 | 
184 |     def inside(p):
185 |         return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0])
186 | 
187 |     def computeIntersection():
188 |         dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]]
189 |         dp = [s[0] - e[0], s[1] - e[1]]
190 |         n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
191 |         n2 = s[0] * e[1] - s[1] * e[0]
192 |         n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
193 |         return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3]
194 | 
195 |     outputList = subjectPolygon
196 |     cp1 = clipPolygon[-1]
197 | 
198 |     for clipVertex in clipPolygon:
199 |         cp2 = clipVertex
200 |         inputList = outputList
201 |         outputList = []
202 |         s = inputList[-1]
203 | 
204 |         for subjectVertex in inputList:
205 |             e = subjectVertex
206 |             if inside(e):
207 |                 if not inside(s):
208 |                     outputList.append(computeIntersection())
209 |                 outputList.append(e)
210 |             elif inside(s):
211 |                 outputList.append(computeIntersection())
212 |             s = e
213 |         cp1 = cp2
214 |         if len(outputList) == 0:
215 |             return None
216 |     return (outputList)
217 | 
218 | 
219 | def convex_hull_intersection(p1, p2):
220 |     """ Compute area of two convex hull's intersection area.
221 |         p1,p2 are a list of (x,y) tuples of hull vertices.
222 |         return a list of (x,y) for the intersection and its volume
223 |     """
224 |     inter_p = polygon_clip(p1,p2)
225 |     if inter_p is not None:
226 |         hull_inter = scipy.spatial.ConvexHull(inter_p)
227 |         return inter_p, hull_inter.volume
228 |     else:
229 |         return None, 0.0
230 | 
231 | 
232 | def box3d_vol(corners):
233 |     ''' corners: (8,3) no assumption on axis direction '''
234 |     a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2))
235 |     b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2))
236 |     c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2))
237 |     return a*b*c
238 | 
239 | 
240 | def box3d_iou(corners1, corners2):
241 |     ''' Compute 3D bounding box IoU.
242 | 
243 |     Input:
244 |         corners1: numpy array (8,3), assume up direction is negative Y
245 |         corners2: numpy array (8,3), assume up direction is negative Y
246 |     Output:
247 |         iou: 3D bounding box IoU
248 |         iou_2d: bird's eye view 2D bounding box IoU
249 | 
250 |     '''
251 |     # corner points are in counter clockwise order
252 |     rect1 = [(corners1[i,0], corners1[i,1]) for i in range(3,-1,-1)]
253 |     rect2 = [(corners2[i,0], corners2[i,1]) for i in range(3,-1,-1)]
254 |     area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
255 |     area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
256 |     inter, inter_area = convex_hull_intersection(rect1, rect2)
257 |     iou_2d = inter_area/(area1+area2-inter_area)
258 |     ymax = min(corners1[:,2].max(), corners2[:,2].max())
259 |     ymin = max(corners1[:,2].min(), corners2[:,2].min())
260 |     inter_vol = inter_area * max(0.0, ymax-ymin)
261 |     vol1 = box3d_vol(corners1)
262 |     vol2 = box3d_vol(corners2)
263 |     iou = inter_vol / (vol1 + vol2 - inter_vol)
264 |     return iou


--------------------------------------------------------------------------------
/data/arkitscenes/utils/pc_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def down_sample(point_cloud, voxel_sz):
 5 |     """Quantize point cloud by voxel_size
 6 |     Returns kept indices
 7 | 
 8 |     Args:
 9 |         all_points: np.array (n, 3) float
10 |         voxel_sz: float
11 |     Returns:
12 |         indices: (m, ) int
13 |     """
14 |     coordinates = np.round(point_cloud / voxel_sz).astype(np.int32)
15 |     _, indices = np.unique(coordinates, axis=0, return_index=True)
16 |     return indices


--------------------------------------------------------------------------------
/data/arkitscenes/utils/rotation.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import math
  3 | import numpy as np
  4 | 
  5 | 
  6 | def eulerAnglesToRotationMatrix(theta):
  7 |     """Euler rotation matrix with clockwise logic.
  8 |     Rotation
  9 | 
 10 |     Args:
 11 |         theta: list of float
 12 |             [theta_x, theta_y, theta_z]
 13 |     Returns:
 14 |         R: np.array (3, 3)
 15 |             rotation matrix of Rz*Ry*Rx
 16 |     """
 17 |     R_x = np.array(
 18 |         [
 19 |             [1, 0, 0],
 20 |             [0, math.cos(theta[0]), -math.sin(theta[0])],
 21 |             [0, math.sin(theta[0]), math.cos(theta[0])],
 22 |         ]
 23 |     )
 24 | 
 25 |     R_y = np.array(
 26 |         [
 27 |             [math.cos(theta[1]), 0, math.sin(theta[1])],
 28 |             [0, 1, 0],
 29 |             [-math.sin(theta[1]), 0, math.cos(theta[1])],
 30 |         ]
 31 |     )
 32 | 
 33 |     R_z = np.array(
 34 |         [
 35 |             [math.cos(theta[2]), -math.sin(theta[2]), 0],
 36 |             [math.sin(theta[2]), math.cos(theta[2]), 0],
 37 |             [0, 0, 1],
 38 |         ]
 39 |     )
 40 | 
 41 |     R = np.dot(R_z, np.dot(R_y, R_x))
 42 |     return R
 43 | 
 44 | 
 45 | def upright_camera_relative_transform(pose):
 46 |     """Generate pose matrix with z-dim as height
 47 | 
 48 |     Args:
 49 |         pose: np.array (4, 4)
 50 |     Returns:
 51 |         urc: (4, 4)
 52 |         urc_inv: (4, 4)
 53 |     """
 54 | 
 55 |     # take viewing direction in camera local coordiantes (which is simply unit vector along +z)
 56 |     view_dir_camera = np.asarray([0, 0, 1])
 57 |     R = pose[0:3, 0:3]
 58 |     t = pose[0:3, 3]
 59 | 
 60 |     # convert to world coordinates
 61 |     view_dir_world = np.dot(R, view_dir_camera)
 62 | 
 63 |     # compute heading
 64 |     view_dir_xy = view_dir_world[0:2]
 65 |     heading = math.atan2(view_dir_xy[1], view_dir_xy[0])
 66 | 
 67 |     # compute rotation around Z to align heading with +Y
 68 |     zRot = -heading + math.pi / 2
 69 | 
 70 |     # translation first, back to camera point
 71 |     urc_t = np.identity(4)
 72 |     urc_t[0:2, 3] = -1 * t[0:2]
 73 | 
 74 |     # compute rotation matrix
 75 |     urc_r = np.identity(4)
 76 |     urc_r[0:3, 0:3] = eulerAnglesToRotationMatrix([0, 0, zRot])
 77 | 
 78 |     urc = np.dot(urc_r, urc_t)
 79 |     urc_inv = np.linalg.inv(urc)
 80 | 
 81 |     return urc, urc_inv
 82 | 
 83 | 
 84 | def rotate_pc(pc, rotmat):
 85 |     """Rotation points w.r.t. rotmat
 86 |     Args:
 87 |         pc: np.array (n, 3)
 88 |         rotmat: np.array (4, 4)
 89 |     Returns:
 90 |         pc: (n, 3)
 91 |     """
 92 |     pc_4 = np.ones([pc.shape[0], 4])
 93 |     pc_4[:, 0:3] = pc
 94 |     pc_4 = np.dot(pc_4, np.transpose(rotmat))
 95 | 
 96 |     return pc_4[:, 0:3]
 97 | 
 98 | 
 99 | def rotate_points_along_z(points, angle):
100 |     """Rotation clockwise
101 |     Args:
102 |         points: np.array of np.array (B, N, 3 + C) or
103 |             (N, 3 + C) for single batch
104 |         angle: np.array of np.array (B, )
105 |             or (, ) for single batch
106 |             angle along z-axis, angle increases x ==> y
107 |     Returns:
108 |         points_rot:  (B, N, 3 + C) or (N, 3 + C)
109 | 
110 |     """
111 |     single_batch = len(points.shape) == 2
112 |     if single_batch:
113 |         points = np.expand_dims(points, axis=0)
114 |         angle = np.expand_dims(angle, axis=0)
115 |     cosa = np.expand_dims(np.cos(angle), axis=1)
116 |     sina = np.expand_dims(np.sin(angle), axis=1)
117 |     zeros = np.zeros_like(cosa) # angle.new_zeros(points.shape[0])
118 |     ones = np.ones_like(sina) # angle.new_ones(points.shape[0])
119 | 
120 |     rot_matrix = (
121 |         np.concatenate((cosa, -sina, zeros, sina, cosa, zeros, zeros, zeros, ones), axis=1)
122 |         .reshape(-1, 3, 3)
123 |     )
124 | 
125 |     # print(rot_matrix.view(3, 3))
126 |     points_rot = np.matmul(points[:, :, :3], rot_matrix)
127 |     points_rot = np.concatenate((points_rot, points[:, :, 3:]), axis=-1)
128 | 
129 |     if single_batch:
130 |         points_rot = points_rot.squeeze(0)
131 | 
132 |     return points_rot
133 | 
134 | 
135 | def convert_angle_axis_to_matrix3(angle_axis):
136 |     """Return a Matrix3 for the angle axis.
137 |     Arguments:
138 |         angle_axis {Point3} -- a rotation in angle axis form.
139 |     """
140 |     matrix, jacobian = cv2.Rodrigues(angle_axis)
141 |     return matrix


--------------------------------------------------------------------------------
/data/arkitscenes/utils/taxonomy.py:
--------------------------------------------------------------------------------
 1 | #TODO: no original categories
 2 | # shortened version only
 3 | 
 4 | import copy
 5 | import numpy as np
 6 | 
 7 | 
 8 | # After merging, our label-id to class (string);
 9 | class_names = [
10 |     "cabinet", "refrigerator", "shelf", "stove", "bed", # 0..5
11 |     "sink", "washer", "toilet", "bathtub", "oven", # 5..10
12 |     "dishwasher", "fireplace", "stool", "chair", "table", # 10..15
13 |     "tv_monitor", "sofa", # 15..17
14 | ]
15 | 
16 | # 3D Anchor-sizes of merged categories (dx, dy, dz)
17 | '''
18 |    Anchor box sizes are computed based on box corner order below:
19 |        6 -------- 7
20 |       /|         /|
21 |      5 -------- 4 .
22 |      | |        | |
23 |      . 2 -------- 3
24 |      |/         |/
25 |      1 -------- 0 
26 | '''
27 | 
28 | 
29 | class ARKitDatasetConfig(object):
30 |     def __init__(self):
31 |         """
32 |         init will set values for:
33 |             self.class_names
34 |             self.cls2label (after mapping)
35 |             self.label2cls (after mapping)
36 |             self.num_class
37 | 
38 |         Args:
39 |         """
40 |         # final training/val categories
41 |         self.class_names = class_names
42 |         self.label2cls = {}
43 |         self.cls2label = {}
44 |         for i, cls_ in enumerate(class_names):
45 |             self.label2cls[i] = cls_
46 |             self.cls2label[cls_] = i
47 | 
48 |         self.num_class = len(self.class_names)
49 | 


--------------------------------------------------------------------------------
/data/multiscan/README.md:
--------------------------------------------------------------------------------
 1 | ## Prepare MultiScan Data for Indoor 3D Detection
 2 | 
 3 | 1. Download and unzip data from the official [MultiScan](https://github.com/smartscenes/multiscan?tab=readme-ov-file).
 4 | 
 5 | 2. Generate bins and pkls data by running:
 6 | 
 7 | ```bash
 8 | python prepare_bins_pkls.py --path_to_pths path_to_unzipped_folder --path_to_save_bins path_to_save_bins
 9 | ```
10 | 
11 | Overall you achieve the following file structure in `bins` directory:
12 | ```
13 | bins
14 | ├── bboxs
15 | │   ├── xxxxx_xx.npy
16 | ├── instance_mask
17 | │   ├── xxxxx_xx.bin
18 | ├── points
19 | │   ├── xxxxx_xx.bin
20 | ├── semantic_mask
21 | │   ├── xxxxx_xx.bin
22 | ├── super_points
23 | │   ├── xxxxx_xx.bin
24 | ├── multiscan_infos_train.pkl
25 | ├── multiscan_infos_val.pkl
26 | ├── multiscan_infos_test.pkl
27 | ```
28 | 


--------------------------------------------------------------------------------
/data/s3dis/README.md:
--------------------------------------------------------------------------------
1 | ### Preparation of S3DIS Data for Indoor Detection
2 | 
3 | Please follow original mmdetection3d [instruction](https://github.com/open-mmlab/mmdetection3d/tree/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/data/s3dis). However, to match the order of points in each scene with our superpoints it will be needed to run `remap_superpoints.py` script.
4 | 


--------------------------------------------------------------------------------
/data/s3dis/remap_superpoints.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import numpy as np
 3 | from sklearn.neighbors import KDTree
 4 | import argparse
 5 | from tqdm import tqdm
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(description='Remap superpoints from source to destination point cloud data')
 9 |     parser.add_argument('--src', type=str, required=True, help='Path to source data')
10 |     parser.add_argument('--dst', type=str, required=True, help='Path to destination data')
11 |     return parser.parse_args()
12 | 
13 | args = parse_args()
14 | 
15 | src_folder = Path(args.src)
16 | dst_folder = Path(args.dst)
17 | 
18 | for src_file in tqdm(src_folder.glob('points/*.bin'), desc="Processing files"):
19 |     pcds_src = np.fromfile(src_file, dtype=np.float32).reshape(-1, 6)[:, :3]
20 |     sp_src = np.fromfile(src_file.parent.parent / 'super_points' / src_file.name, dtype=np.int64)
21 |     
22 |     dst_file = dst_folder / 'points' / src_file.name
23 |     if dst_file.exists():
24 |         pcds_dst = np.fromfile(dst_file, dtype=np.float32).reshape(-1, 6)[:, :3]
25 |         
26 |         tree = KDTree(pcds_src)
27 |         _, indices = tree.query(pcds_dst, k=1)
28 |         sp_dst = sp_src[indices.flatten()]
29 |         
30 |         dst_sp_file = dst_file.parent.parent / 'super_points' / dst_file.name
31 |         dst_sp_file.parent.mkdir(parents=True, exist_ok=True)
32 |         sp_dst.astype(np.int64).tofile(dst_sp_file)        
33 |     else:
34 |         print(f"Corresponding file not found in destination folder: {dst_file}")


--------------------------------------------------------------------------------
/data/scannet/README.md:
--------------------------------------------------------------------------------
 1 | ### Prepare ScanNet Data for Indoor Detection or Segmentation Task
 2 | 
 3 | We follow the procedure in [votenet](https://github.com/facebookresearch/votenet/).
 4 | 
 5 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Link or move the 'scans' folder to this level of directory. If you are performing segmentation tasks and want to upload the results to its official [benchmark](http://kaldir.vc.in.tum.de/scannet_benchmark/), please also link or move the 'scans_test' folder to this directory.
 6 | 
 7 | 2. In this directory, extract point clouds and annotations by running `python batch_load_scannet_data.py`. Add the `--scannet200` flag if you want to get markup for the ScanNet200 dataset.
 8 | 
 9 | 3. Enter the project root directory, generate training data by running
10 | 
11 | ```bash
12 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet
13 | ```
14 | &nbsp; &nbsp; &nbsp; &nbsp; or for ScanNet200:
15 | 
16 | ```bash
17 | mkdir data/scannet200
18 | python tools/create_data.py scannet200 --root-path ./data/scannet --out-dir ./data/scannet200 --extra-tag scannet200
19 | ```
20 | 
21 | The overall process for ScanNet could be achieved through the following script
22 | 
23 | ```bash
24 | python batch_load_scannet_data.py
25 | cd ../..
26 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet
27 | ```
28 | 
29 | Or for ScanNet200:
30 | 
31 | ```bash
32 | python batch_load_scannet_data.py --scannet200
33 | cd ../..
34 | mkdir data/scannet200
35 | python tools/create_data.py scannet200 --root-path ./data/scannet --out-dir ./data/scannet200 --extra-tag scannet200
36 | ```
37 | 
38 | The directory structure after pre-processing should be as below
39 | 
40 | ```
41 | scannet
42 | ├── meta_data
43 | ├── batch_load_scannet_data.py
44 | ├── load_scannet_data.py
45 | ├── scannet_utils.py
46 | ├── README.md
47 | ├── scans
48 | ├── scans_test
49 | ├── scannet_instance_data
50 | ├── points
51 | │   ├── xxxxx.bin
52 | ├── instance_mask
53 | │   ├── xxxxx.bin
54 | ├── semantic_mask
55 | │   ├── xxxxx.bin
56 | ├── super_points
57 | │   ├── xxxxx.bin
58 | ├── seg_info
59 | │   ├── train_label_weight.npy
60 | │   ├── train_resampled_scene_idxs.npy
61 | │   ├── val_label_weight.npy
62 | │   ├── val_resampled_scene_idxs.npy
63 | ├── scannet_infos_train.pkl
64 | ├── scannet_infos_val.pkl
65 | ├── scannet_infos_test.pkl
66 | 
67 | ```
68 | 


--------------------------------------------------------------------------------
/data/scannet/batch_load_scannet_data.py:
--------------------------------------------------------------------------------
  1 | # Modified from
  2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/batch_load_scannet_data.py
  3 | # Copyright (c) Facebook, Inc. and its affiliates.
  4 | #
  5 | # This source code is licensed under the MIT license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | """Batch mode in loading Scannet scenes with vertices and ground truth labels
  8 | for semantic and instance segmentations.
  9 | 
 10 | Usage example: python ./batch_load_scannet_data.py
 11 | """
 12 | import argparse
 13 | import datetime
 14 | import os
 15 | from os import path as osp
 16 | 
 17 | import torch
 18 | import segmentator
 19 | import open3d as o3d
 20 | import numpy as np
 21 | from load_scannet_data import export
 22 | 
 23 | DONOTCARE_CLASS_IDS = np.array([])
 24 | 
 25 | SCANNET_OBJ_CLASS_IDS = np.array(
 26 |     [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
 27 | 
 28 | SCANNET200_OBJ_CLASS_IDS = np.array([2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
 29 |                                     72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 145, 148, 154,
 30 |                                     155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230, 232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331, 342, 356, 370, 392, 395, 399, 408, 417,
 31 |                                     488, 540, 562, 570, 572, 581, 609, 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191])
 32 | 
 33 | 
 34 | 
 35 | def export_one_scan(scan_name,
 36 |                     output_filename_prefix,
 37 |                     max_num_point,
 38 |                     label_map_file,
 39 |                     scannet_dir,
 40 |                     test_mode=False,
 41 |                     scannet200=False):
 42 |     mesh_file = osp.join(scannet_dir, scan_name, scan_name + '_vh_clean_2.ply')
 43 |     agg_file = osp.join(scannet_dir, scan_name,
 44 |                         scan_name + '.aggregation.json')
 45 |     seg_file = osp.join(scannet_dir, scan_name,
 46 |                         scan_name + '_vh_clean_2.0.010000.segs.json')
 47 |     # includes axisAlignment info for the train set scans.
 48 |     meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt')
 49 |     mesh_vertices, semantic_labels, instance_labels, unaligned_bboxes, \
 50 |         aligned_bboxes, instance2semantic, axis_align_matrix = export(
 51 |             mesh_file, agg_file, seg_file, meta_file, label_map_file, None,
 52 |             test_mode, scannet200)
 53 | 
 54 |     if not test_mode:
 55 |         mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS))
 56 |         mesh_vertices = mesh_vertices[mask, :]
 57 |         semantic_labels = semantic_labels[mask]
 58 |         instance_labels = instance_labels[mask]
 59 | 
 60 |         num_instances = len(np.unique(instance_labels))
 61 |         print(f'Num of instances: {num_instances}')
 62 |         if scannet200:
 63 |             OBJ_CLASS_IDS = SCANNET200_OBJ_CLASS_IDS
 64 |         else:
 65 |             OBJ_CLASS_IDS = SCANNET_OBJ_CLASS_IDS
 66 | 
 67 |         bbox_mask = np.in1d(unaligned_bboxes[:, -1], OBJ_CLASS_IDS)
 68 |         unaligned_bboxes = unaligned_bboxes[bbox_mask, :]
 69 |         bbox_mask = np.in1d(aligned_bboxes[:, -1], OBJ_CLASS_IDS)
 70 |         aligned_bboxes = aligned_bboxes[bbox_mask, :]
 71 |         assert unaligned_bboxes.shape[0] == aligned_bboxes.shape[0]
 72 |         print(f'Num of care instances: {unaligned_bboxes.shape[0]}')
 73 | 
 74 |     if max_num_point is not None:
 75 |         max_num_point = int(max_num_point)
 76 |         N = mesh_vertices.shape[0]
 77 |         if N > max_num_point:
 78 |             choices = np.random.choice(N, max_num_point, replace=False)
 79 |             mesh_vertices = mesh_vertices[choices, :]
 80 |             if not test_mode:
 81 |                 semantic_labels = semantic_labels[choices]
 82 |                 instance_labels = instance_labels[choices]
 83 |     
 84 |     mesh = o3d.io.read_triangle_mesh(mesh_file)
 85 |     vertices = torch.from_numpy(np.array(mesh.vertices).astype(np.float32))
 86 |     faces = torch.from_numpy(np.array(mesh.triangles).astype(np.int64))
 87 |     superpoints = segmentator.segment_mesh(vertices, faces).numpy()
 88 |     
 89 |     np.save(f'{output_filename_prefix}_sp_label.npy', superpoints)
 90 |     np.save(f'{output_filename_prefix}_vert.npy', mesh_vertices)
 91 | 
 92 |     if not test_mode:
 93 |         assert superpoints.shape == semantic_labels.shape
 94 |         np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels)
 95 |         np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels)
 96 |         np.save(f'{output_filename_prefix}_unaligned_bbox.npy',
 97 |                 unaligned_bboxes)
 98 |         np.save(f'{output_filename_prefix}_aligned_bbox.npy', aligned_bboxes)
 99 |         np.save(f'{output_filename_prefix}_axis_align_matrix.npy',
100 |                 axis_align_matrix)
101 | 
102 | 
103 | def batch_export(max_num_point,
104 |                  output_folder,
105 |                  scan_names_file,
106 |                  label_map_file,
107 |                  scannet_dir,
108 |                  test_mode=False,
109 |                  scannet200=False):
110 |     if test_mode and not os.path.exists(scannet_dir):
111 |         # test data preparation is optional
112 |         return
113 |     if not os.path.exists(output_folder):
114 |         print(f'Creating new data folder: {output_folder}')
115 |         os.mkdir(output_folder)
116 | 
117 |     scan_names = [line.rstrip() for line in open(scan_names_file)]
118 |     for scan_name in scan_names:
119 |         print('-' * 20 + 'begin')
120 |         print(datetime.datetime.now())
121 |         print(scan_name)
122 |         output_filename_prefix = osp.join(output_folder, scan_name)
123 |         if osp.isfile(f'{output_filename_prefix}_vert.npy'):
124 |             print('File already exists. skipping.')
125 |             print('-' * 20 + 'done')
126 |             continue
127 |         try:
128 |             export_one_scan(scan_name, output_filename_prefix, max_num_point,
129 |                             label_map_file, scannet_dir, test_mode, scannet200)
130 |         except Exception:
131 |             print(f'Failed export scan: {scan_name}')
132 |         print('-' * 20 + 'done')
133 | 
134 | 
135 | def main():
136 |     parser = argparse.ArgumentParser()
137 |     parser.add_argument(
138 |         '--max_num_point',
139 |         default=None,
140 |         help='The maximum number of the points.')
141 |     parser.add_argument(
142 |         '--output_folder',
143 |         default='./scannet_instance_data',
144 |         help='output folder of the result.')
145 |     parser.add_argument(
146 |         '--train_scannet_dir', default='scans', help='scannet data directory.')
147 |     parser.add_argument(
148 |         '--test_scannet_dir',
149 |         default='scans_test',
150 |         help='scannet data directory.')
151 |     parser.add_argument(
152 |         '--label_map_file',
153 |         default='meta_data/scannetv2-labels.combined.tsv',
154 |         help='The path of label map file.')
155 |     parser.add_argument(
156 |         '--train_scan_names_file',
157 |         default='meta_data/scannet_train.txt',
158 |         help='The path of the file that stores the scan names.')
159 |     parser.add_argument(
160 |         '--test_scan_names_file',
161 |         default='meta_data/scannetv2_test.txt',
162 |         help='The path of the file that stores the scan names.')
163 |     parser.add_argument(
164 |         '--scannet200',
165 |         action='store_true',
166 |         help='Use it for scannet200 mapping')
167 |     args = parser.parse_args()
168 |     batch_export(
169 |         args.max_num_point,
170 |         args.output_folder,
171 |         args.train_scan_names_file,
172 |         args.label_map_file,
173 |         args.train_scannet_dir,
174 |         test_mode=False,
175 |         scannet200=args.scannet200)
176 |     batch_export(
177 |         args.max_num_point,
178 |         args.output_folder,
179 |         args.test_scan_names_file,
180 |         args.label_map_file,
181 |         args.test_scannet_dir,
182 |         test_mode=True,
183 |         scannet200=args.scannet200)
184 | 
185 | 
186 | if __name__ == '__main__':
187 |     main()
188 | 


--------------------------------------------------------------------------------
/data/scannet/load_scannet_data.py:
--------------------------------------------------------------------------------
  1 | # Modified from
  2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/load_scannet_data.py
  3 | # Copyright (c) Facebook, Inc. and its affiliates.
  4 | #
  5 | # This source code is licensed under the MIT license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | """Load Scannet scenes with vertices and ground truth labels for semantic and
  8 | instance segmentations."""
  9 | import argparse
 10 | import inspect
 11 | import json
 12 | import os
 13 | 
 14 | import numpy as np
 15 | import scannet_utils
 16 | 
 17 | currentdir = os.path.dirname(
 18 |     os.path.abspath(inspect.getfile(inspect.currentframe())))
 19 | 
 20 | 
 21 | def read_aggregation(filename):
 22 |     assert os.path.isfile(filename)
 23 |     object_id_to_segs = {}
 24 |     label_to_segs = {}
 25 |     with open(filename) as f:
 26 |         data = json.load(f)
 27 |         num_objects = len(data['segGroups'])
 28 |         for i in range(num_objects):
 29 |             object_id = data['segGroups'][i][
 30 |                 'objectId'] + 1  # instance ids should be 1-indexed
 31 |             label = data['segGroups'][i]['label']
 32 |             segs = data['segGroups'][i]['segments']
 33 |             object_id_to_segs[object_id] = segs
 34 |             if label in label_to_segs:
 35 |                 label_to_segs[label].extend(segs)
 36 |             else:
 37 |                 label_to_segs[label] = segs
 38 |     return object_id_to_segs, label_to_segs
 39 | 
 40 | 
 41 | def read_segmentation(filename):
 42 |     assert os.path.isfile(filename)
 43 |     seg_to_verts = {}
 44 |     with open(filename) as f:
 45 |         data = json.load(f)
 46 |         num_verts = len(data['segIndices'])
 47 |         for i in range(num_verts):
 48 |             seg_id = data['segIndices'][i]
 49 |             if seg_id in seg_to_verts:
 50 |                 seg_to_verts[seg_id].append(i)
 51 |             else:
 52 |                 seg_to_verts[seg_id] = [i]
 53 |     return seg_to_verts, num_verts
 54 | 
 55 | 
 56 | def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id,
 57 |                  instance_ids):
 58 |     num_instances = len(np.unique(list(object_id_to_segs.keys())))
 59 |     instance_bboxes = np.zeros((num_instances, 7))
 60 |     for obj_id in object_id_to_segs:
 61 |         label_id = object_id_to_label_id[obj_id]
 62 |         obj_pc = mesh_vertices[instance_ids == obj_id, 0:3]
 63 |         if len(obj_pc) == 0:
 64 |             continue
 65 |         xyz_min = np.min(obj_pc, axis=0)
 66 |         xyz_max = np.max(obj_pc, axis=0)
 67 |         bbox = np.concatenate([(xyz_min + xyz_max) / 2.0, xyz_max - xyz_min,
 68 |                                np.array([label_id])])
 69 |         # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
 70 |         instance_bboxes[obj_id - 1, :] = bbox
 71 |     return instance_bboxes
 72 | 
 73 | 
 74 | def export(mesh_file,
 75 |            agg_file,
 76 |            seg_file,
 77 |            meta_file,
 78 |            label_map_file,
 79 |            output_file=None,
 80 |            test_mode=False,
 81 |            scannet200=False):
 82 |     """Export original files to vert, ins_label, sem_label and bbox file.
 83 | 
 84 |     Args:
 85 |         mesh_file (str): Path of the mesh_file.
 86 |         agg_file (str): Path of the agg_file.
 87 |         seg_file (str): Path of the seg_file.
 88 |         meta_file (str): Path of the meta_file.
 89 |         label_map_file (str): Path of the label_map_file.
 90 |         output_file (str): Path of the output folder.
 91 |             Default: None.
 92 |         test_mode (bool): Whether is generating test data without labels.
 93 |             Default: False.
 94 | 
 95 |     It returns a tuple, which contains the the following things:
 96 |         np.ndarray: Vertices of points data.
 97 |         np.ndarray: Indexes of label.
 98 |         np.ndarray: Indexes of instance.
 99 |         np.ndarray: Instance bboxes.
100 |         dict: Map from object_id to label_id.
101 |     """
102 |     if scannet200:
103 |         label_map = scannet_utils.read_label_mapping(
104 |             label_map_file, label_from='raw_category', label_to='id')
105 |     else:
106 |         label_map = scannet_utils.read_label_mapping(
107 |             label_map_file, label_from='raw_category', label_to='nyu40id')
108 | 
109 |     mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file)
110 | 
111 |     # Load scene axis alignment matrix
112 |     lines = open(meta_file).readlines()
113 |     # test set data doesn't have align_matrix
114 |     axis_align_matrix = np.eye(4)
115 |     for line in lines:
116 |         if 'axisAlignment' in line:
117 |             axis_align_matrix = [
118 |                 float(x)
119 |                 for x in line.rstrip().strip('axisAlignment = ').split(' ')
120 |             ]
121 |             break
122 |     axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4))
123 | 
124 |     # perform global alignment of mesh vertices
125 |     pts = np.ones((mesh_vertices.shape[0], 4))
126 |     pts[:, 0:3] = mesh_vertices[:, 0:3]
127 |     pts = np.dot(pts, axis_align_matrix.transpose())  # Nx4
128 |     aligned_mesh_vertices = np.concatenate([pts[:, 0:3], mesh_vertices[:, 3:]],
129 |                                            axis=1)
130 | 
131 |     # Load semantic and instance labels
132 |     if not test_mode:
133 |         object_id_to_segs, label_to_segs = read_aggregation(agg_file)
134 |         seg_to_verts, num_verts = read_segmentation(seg_file)
135 |         label_ids = np.zeros(shape=(num_verts), dtype=np.uint32)
136 |         object_id_to_label_id = {}
137 |         for label, segs in label_to_segs.items():
138 |             label_id = label_map[label]
139 |             for seg in segs:
140 |                 verts = seg_to_verts[seg]
141 |                 label_ids[verts] = label_id
142 |         instance_ids = np.zeros(
143 |             shape=(num_verts), dtype=np.uint32)  # 0: unannotated
144 |         for object_id, segs in object_id_to_segs.items():
145 |             for seg in segs:
146 |                 verts = seg_to_verts[seg]
147 |                 instance_ids[verts] = object_id
148 |                 if object_id not in object_id_to_label_id:
149 |                     object_id_to_label_id[object_id] = label_ids[verts][0]
150 |         unaligned_bboxes = extract_bbox(mesh_vertices, object_id_to_segs,
151 |                                         object_id_to_label_id, instance_ids)
152 |         aligned_bboxes = extract_bbox(aligned_mesh_vertices, object_id_to_segs,
153 |                                       object_id_to_label_id, instance_ids)
154 |     else:
155 |         label_ids = None
156 |         instance_ids = None
157 |         unaligned_bboxes = None
158 |         aligned_bboxes = None
159 |         object_id_to_label_id = None
160 | 
161 |     if output_file is not None:
162 |         np.save(output_file + '_vert.npy', mesh_vertices)
163 |         if not test_mode:
164 |             np.save(output_file + '_sem_label.npy', label_ids)
165 |             np.save(output_file + '_ins_label.npy', instance_ids)
166 |             np.save(output_file + '_unaligned_bbox.npy', unaligned_bboxes)
167 |             np.save(output_file + '_aligned_bbox.npy', aligned_bboxes)
168 |             np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix)
169 | 
170 |     return mesh_vertices, label_ids, instance_ids, unaligned_bboxes, \
171 |         aligned_bboxes, object_id_to_label_id, axis_align_matrix
172 | 
173 | 
174 | def main():
175 |     parser = argparse.ArgumentParser()
176 |     parser.add_argument(
177 |         '--scan_path',
178 |         required=True,
179 |         help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00')
180 |     parser.add_argument('--output_file', required=True, help='output file')
181 |     parser.add_argument(
182 |         '--label_map_file',
183 |         required=True,
184 |         help='path to scannetv2-labels.combined.tsv')
185 |     parser.add_argument(
186 |         '--scannet200',
187 |         action='store_true',
188 |         help='Use it for scannet200 mapping')
189 | 
190 |     opt = parser.parse_args()
191 | 
192 |     scan_name = os.path.split(opt.scan_path)[-1]
193 |     mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply')
194 |     agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json')
195 |     seg_file = os.path.join(opt.scan_path,
196 |                             scan_name + '_vh_clean_2.0.010000.segs.json')
197 |     meta_file = os.path.join(
198 |         opt.scan_path, scan_name +
199 |         '.txt')  # includes axisAlignment info for the train set scans.
200 |     export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file,
201 |            opt.output_file, scannet200=opt.scannet200)
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     main()
206 | 


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannet_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/scannet/meta_data/scannet_means.npz


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannetv2_test.txt:
--------------------------------------------------------------------------------
  1 | scene0707_00
  2 | scene0708_00
  3 | scene0709_00
  4 | scene0710_00
  5 | scene0711_00
  6 | scene0712_00
  7 | scene0713_00
  8 | scene0714_00
  9 | scene0715_00
 10 | scene0716_00
 11 | scene0717_00
 12 | scene0718_00
 13 | scene0719_00
 14 | scene0720_00
 15 | scene0721_00
 16 | scene0722_00
 17 | scene0723_00
 18 | scene0724_00
 19 | scene0725_00
 20 | scene0726_00
 21 | scene0727_00
 22 | scene0728_00
 23 | scene0729_00
 24 | scene0730_00
 25 | scene0731_00
 26 | scene0732_00
 27 | scene0733_00
 28 | scene0734_00
 29 | scene0735_00
 30 | scene0736_00
 31 | scene0737_00
 32 | scene0738_00
 33 | scene0739_00
 34 | scene0740_00
 35 | scene0741_00
 36 | scene0742_00
 37 | scene0743_00
 38 | scene0744_00
 39 | scene0745_00
 40 | scene0746_00
 41 | scene0747_00
 42 | scene0748_00
 43 | scene0749_00
 44 | scene0750_00
 45 | scene0751_00
 46 | scene0752_00
 47 | scene0753_00
 48 | scene0754_00
 49 | scene0755_00
 50 | scene0756_00
 51 | scene0757_00
 52 | scene0758_00
 53 | scene0759_00
 54 | scene0760_00
 55 | scene0761_00
 56 | scene0762_00
 57 | scene0763_00
 58 | scene0764_00
 59 | scene0765_00
 60 | scene0766_00
 61 | scene0767_00
 62 | scene0768_00
 63 | scene0769_00
 64 | scene0770_00
 65 | scene0771_00
 66 | scene0772_00
 67 | scene0773_00
 68 | scene0774_00
 69 | scene0775_00
 70 | scene0776_00
 71 | scene0777_00
 72 | scene0778_00
 73 | scene0779_00
 74 | scene0780_00
 75 | scene0781_00
 76 | scene0782_00
 77 | scene0783_00
 78 | scene0784_00
 79 | scene0785_00
 80 | scene0786_00
 81 | scene0787_00
 82 | scene0788_00
 83 | scene0789_00
 84 | scene0790_00
 85 | scene0791_00
 86 | scene0792_00
 87 | scene0793_00
 88 | scene0794_00
 89 | scene0795_00
 90 | scene0796_00
 91 | scene0797_00
 92 | scene0798_00
 93 | scene0799_00
 94 | scene0800_00
 95 | scene0801_00
 96 | scene0802_00
 97 | scene0803_00
 98 | scene0804_00
 99 | scene0805_00
100 | scene0806_00
101 | 


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannetv2_val.txt:
--------------------------------------------------------------------------------
  1 | scene0568_00
  2 | scene0568_01
  3 | scene0568_02
  4 | scene0304_00
  5 | scene0488_00
  6 | scene0488_01
  7 | scene0412_00
  8 | scene0412_01
  9 | scene0217_00
 10 | scene0019_00
 11 | scene0019_01
 12 | scene0414_00
 13 | scene0575_00
 14 | scene0575_01
 15 | scene0575_02
 16 | scene0426_00
 17 | scene0426_01
 18 | scene0426_02
 19 | scene0426_03
 20 | scene0549_00
 21 | scene0549_01
 22 | scene0578_00
 23 | scene0578_01
 24 | scene0578_02
 25 | scene0665_00
 26 | scene0665_01
 27 | scene0050_00
 28 | scene0050_01
 29 | scene0050_02
 30 | scene0257_00
 31 | scene0025_00
 32 | scene0025_01
 33 | scene0025_02
 34 | scene0583_00
 35 | scene0583_01
 36 | scene0583_02
 37 | scene0701_00
 38 | scene0701_01
 39 | scene0701_02
 40 | scene0580_00
 41 | scene0580_01
 42 | scene0565_00
 43 | scene0169_00
 44 | scene0169_01
 45 | scene0655_00
 46 | scene0655_01
 47 | scene0655_02
 48 | scene0063_00
 49 | scene0221_00
 50 | scene0221_01
 51 | scene0591_00
 52 | scene0591_01
 53 | scene0591_02
 54 | scene0678_00
 55 | scene0678_01
 56 | scene0678_02
 57 | scene0462_00
 58 | scene0427_00
 59 | scene0595_00
 60 | scene0193_00
 61 | scene0193_01
 62 | scene0164_00
 63 | scene0164_01
 64 | scene0164_02
 65 | scene0164_03
 66 | scene0598_00
 67 | scene0598_01
 68 | scene0598_02
 69 | scene0599_00
 70 | scene0599_01
 71 | scene0599_02
 72 | scene0328_00
 73 | scene0300_00
 74 | scene0300_01
 75 | scene0354_00
 76 | scene0458_00
 77 | scene0458_01
 78 | scene0423_00
 79 | scene0423_01
 80 | scene0423_02
 81 | scene0307_00
 82 | scene0307_01
 83 | scene0307_02
 84 | scene0606_00
 85 | scene0606_01
 86 | scene0606_02
 87 | scene0432_00
 88 | scene0432_01
 89 | scene0608_00
 90 | scene0608_01
 91 | scene0608_02
 92 | scene0651_00
 93 | scene0651_01
 94 | scene0651_02
 95 | scene0430_00
 96 | scene0430_01
 97 | scene0689_00
 98 | scene0357_00
 99 | scene0357_01
100 | scene0574_00
101 | scene0574_01
102 | scene0574_02
103 | scene0329_00
104 | scene0329_01
105 | scene0329_02
106 | scene0153_00
107 | scene0153_01
108 | scene0616_00
109 | scene0616_01
110 | scene0671_00
111 | scene0671_01
112 | scene0618_00
113 | scene0382_00
114 | scene0382_01
115 | scene0490_00
116 | scene0621_00
117 | scene0607_00
118 | scene0607_01
119 | scene0149_00
120 | scene0695_00
121 | scene0695_01
122 | scene0695_02
123 | scene0695_03
124 | scene0389_00
125 | scene0377_00
126 | scene0377_01
127 | scene0377_02
128 | scene0342_00
129 | scene0139_00
130 | scene0629_00
131 | scene0629_01
132 | scene0629_02
133 | scene0496_00
134 | scene0633_00
135 | scene0633_01
136 | scene0518_00
137 | scene0652_00
138 | scene0406_00
139 | scene0406_01
140 | scene0406_02
141 | scene0144_00
142 | scene0144_01
143 | scene0494_00
144 | scene0278_00
145 | scene0278_01
146 | scene0316_00
147 | scene0609_00
148 | scene0609_01
149 | scene0609_02
150 | scene0609_03
151 | scene0084_00
152 | scene0084_01
153 | scene0084_02
154 | scene0696_00
155 | scene0696_01
156 | scene0696_02
157 | scene0351_00
158 | scene0351_01
159 | scene0643_00
160 | scene0644_00
161 | scene0645_00
162 | scene0645_01
163 | scene0645_02
164 | scene0081_00
165 | scene0081_01
166 | scene0081_02
167 | scene0647_00
168 | scene0647_01
169 | scene0535_00
170 | scene0353_00
171 | scene0353_01
172 | scene0353_02
173 | scene0559_00
174 | scene0559_01
175 | scene0559_02
176 | scene0593_00
177 | scene0593_01
178 | scene0246_00
179 | scene0653_00
180 | scene0653_01
181 | scene0064_00
182 | scene0064_01
183 | scene0356_00
184 | scene0356_01
185 | scene0356_02
186 | scene0030_00
187 | scene0030_01
188 | scene0030_02
189 | scene0222_00
190 | scene0222_01
191 | scene0338_00
192 | scene0338_01
193 | scene0338_02
194 | scene0378_00
195 | scene0378_01
196 | scene0378_02
197 | scene0660_00
198 | scene0553_00
199 | scene0553_01
200 | scene0553_02
201 | scene0527_00
202 | scene0663_00
203 | scene0663_01
204 | scene0663_02
205 | scene0664_00
206 | scene0664_01
207 | scene0664_02
208 | scene0334_00
209 | scene0334_01
210 | scene0334_02
211 | scene0046_00
212 | scene0046_01
213 | scene0046_02
214 | scene0203_00
215 | scene0203_01
216 | scene0203_02
217 | scene0088_00
218 | scene0088_01
219 | scene0088_02
220 | scene0088_03
221 | scene0086_00
222 | scene0086_01
223 | scene0086_02
224 | scene0670_00
225 | scene0670_01
226 | scene0256_00
227 | scene0256_01
228 | scene0256_02
229 | scene0249_00
230 | scene0441_00
231 | scene0658_00
232 | scene0704_00
233 | scene0704_01
234 | scene0187_00
235 | scene0187_01
236 | scene0131_00
237 | scene0131_01
238 | scene0131_02
239 | scene0207_00
240 | scene0207_01
241 | scene0207_02
242 | scene0461_00
243 | scene0011_00
244 | scene0011_01
245 | scene0343_00
246 | scene0251_00
247 | scene0077_00
248 | scene0077_01
249 | scene0684_00
250 | scene0684_01
251 | scene0550_00
252 | scene0686_00
253 | scene0686_01
254 | scene0686_02
255 | scene0208_00
256 | scene0500_00
257 | scene0500_01
258 | scene0552_00
259 | scene0552_01
260 | scene0648_00
261 | scene0648_01
262 | scene0435_00
263 | scene0435_01
264 | scene0435_02
265 | scene0435_03
266 | scene0690_00
267 | scene0690_01
268 | scene0693_00
269 | scene0693_01
270 | scene0693_02
271 | scene0700_00
272 | scene0700_01
273 | scene0700_02
274 | scene0699_00
275 | scene0231_00
276 | scene0231_01
277 | scene0231_02
278 | scene0697_00
279 | scene0697_01
280 | scene0697_02
281 | scene0697_03
282 | scene0474_00
283 | scene0474_01
284 | scene0474_02
285 | scene0474_03
286 | scene0474_04
287 | scene0474_05
288 | scene0355_00
289 | scene0355_01
290 | scene0146_00
291 | scene0146_01
292 | scene0146_02
293 | scene0196_00
294 | scene0702_00
295 | scene0702_01
296 | scene0702_02
297 | scene0314_00
298 | scene0277_00
299 | scene0277_01
300 | scene0277_02
301 | scene0095_00
302 | scene0095_01
303 | scene0015_00
304 | scene0100_00
305 | scene0100_01
306 | scene0100_02
307 | scene0558_00
308 | scene0558_01
309 | scene0558_02
310 | scene0685_00
311 | scene0685_01
312 | scene0685_02
313 | 


--------------------------------------------------------------------------------
/data/scannet/scannet_utils.py:
--------------------------------------------------------------------------------
 1 | # Modified from
 2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/scannet_utils.py
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | #
 5 | # This source code is licensed under the MIT license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | """Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts
 8 | """
 9 | 
10 | import csv
11 | import os
12 | 
13 | import numpy as np
14 | from plyfile import PlyData
15 | 
16 | 
17 | def represents_int(s):
18 |     """Judge whether string s represents an int.
19 | 
20 |     Args:
21 |         s(str): The input string to be judged.
22 | 
23 |     Returns:
24 |         bool: Whether s represents int or not.
25 |     """
26 |     try:
27 |         int(s)
28 |         return True
29 |     except ValueError:
30 |         return False
31 | 
32 | 
33 | def read_label_mapping(filename,
34 |                        label_from='raw_category',
35 |                        label_to='nyu40id'):
36 |     assert os.path.isfile(filename)
37 |     mapping = dict()
38 |     with open(filename) as csvfile:
39 |         reader = csv.DictReader(csvfile, delimiter='\t')
40 |         for row in reader:
41 |             mapping[row[label_from]] = int(row[label_to])
42 |     if represents_int(list(mapping.keys())[0]):
43 |         mapping = {int(k): v for k, v in mapping.items()}
44 |     return mapping
45 | 
46 | 
47 | def read_mesh_vertices(filename):
48 |     """Read XYZ for each vertex.
49 | 
50 |     Args:
51 |         filename(str): The name of the mesh vertices file.
52 | 
53 |     Returns:
54 |         ndarray: Vertices.
55 |     """
56 |     assert os.path.isfile(filename)
57 |     with open(filename, 'rb') as f:
58 |         plydata = PlyData.read(f)
59 |         num_verts = plydata['vertex'].count
60 |         vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
61 |         vertices[:, 0] = plydata['vertex'].data['x']
62 |         vertices[:, 1] = plydata['vertex'].data['y']
63 |         vertices[:, 2] = plydata['vertex'].data['z']
64 |     return vertices
65 | 
66 | 
67 | def read_mesh_vertices_rgb(filename):
68 |     """Read XYZ and RGB for each vertex.
69 | 
70 |     Args:
71 |         filename(str): The name of the mesh vertices file.
72 | 
73 |     Returns:
74 |         Vertices. Note that RGB values are in 0-255.
75 |     """
76 |     assert os.path.isfile(filename)
77 |     with open(filename, 'rb') as f:
78 |         plydata = PlyData.read(f)
79 |         num_verts = plydata['vertex'].count
80 |         vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32)
81 |         vertices[:, 0] = plydata['vertex'].data['x']
82 |         vertices[:, 1] = plydata['vertex'].data['y']
83 |         vertices[:, 2] = plydata['vertex'].data['z']
84 |         vertices[:, 3] = plydata['vertex'].data['red']
85 |         vertices[:, 4] = plydata['vertex'].data['green']
86 |         vertices[:, 5] = plydata['vertex'].data['blue']
87 |     return vertices
88 | 


--------------------------------------------------------------------------------
/data/scannetpp/README.md:
--------------------------------------------------------------------------------
 1 | ## Prepare ScanNet++ Data for Indoor 3D Detection
 2 | 
 3 | 1. Download data from the official [ScanNet++](https://github.com/scannetpp/scannetpp).
 4 | 
 5 | 2. Preprocess raw data by running:
 6 | 
 7 | ```bash
 8 | python preprocess_raw_data.py --path_to_data path_to_dataset --output_dir path_to_save_preprocessed_raw_data
 9 | ```
10 | 
11 | 3. Generate bins and pkls data by running:
12 | 
13 | ```bash
14 | python prepare_bins_pkls.py --path_to_data path_to_preprocessed_raw_data --path_to_save_bins path_to_save_bins
15 | ```
16 | 
17 | Overall you achieve the following file structure in `bins` directory:
18 | ```
19 | bins
20 | ├── bboxs
21 | │   ├── xxxxx_xx.npy
22 | ├── instance_mask
23 | │   ├── xxxxx_xx.bin
24 | ├── points
25 | │   ├── xxxxx_xx.bin
26 | ├── semantic_mask
27 | │   ├── xxxxx_xx.bin
28 | ├── superpoints
29 | │   ├── xxxxx_xx.bin
30 | ├── scannetpp_infos_train.pkl
31 | ├── scannetpp_infos_val.pkl
32 | ├── scannetpp_infos_test.pkl
33 | ```
34 | 


--------------------------------------------------------------------------------
/data/scannetpp/prepare_bins_pkls.py:
--------------------------------------------------------------------------------
  1 | import mmengine
  2 | import os
  3 | from tqdm.auto import tqdm
  4 | import numpy as np
  5 | import argparse
  6 | 
  7 | OBJ2SEM = {'wall': 0,
  8 |  'ceiling': 1,
  9 |  'floor': 2,
 10 |  'table': 3,
 11 |  'door': 4,
 12 |  'ceiling lamp': 5,
 13 |  'cabinet': 6,
 14 |  'blinds': 7,
 15 |  'curtain': 8,
 16 |  'chair': 9,
 17 |  'storage cabinet': 10,
 18 |  'office chair': 11,
 19 |  'bookshelf': 12,
 20 |  'whiteboard': 13,
 21 |  'window': 14,
 22 |  'box': 15,
 23 |  'window frame': 16,
 24 |  'monitor': 17,
 25 |  'shelf': 18,
 26 |  'doorframe': 19,
 27 |  'pipe': 20,
 28 |  'heater': 21,
 29 |  'kitchen cabinet': 22,
 30 |  'sofa': 23,
 31 |  'windowsill': 24,
 32 |  'bed': 25,
 33 |  'shower wall': 26,
 34 |  'trash can': 27,
 35 |  'book': 28,
 36 |  'plant': 29,
 37 |  'blanket': 30,
 38 |  'tv': 31,
 39 |  'computer tower': 32,
 40 |  'kitchen counter': 33,
 41 |  'refrigerator': 34,
 42 |  'jacket': 35,
 43 |  'electrical duct': 36,
 44 |  'sink': 37,
 45 |  'bag': 38,
 46 |  'picture': 39,
 47 |  'pillow': 40,
 48 |  'towel': 41,
 49 |  'suitcase': 42,
 50 |  'backpack': 43,
 51 |  'crate': 44,
 52 |  'keyboard': 45,
 53 |  'rack': 46,
 54 |  'toilet': 47,
 55 |  'paper': 48,
 56 |  'printer': 49,
 57 |  'poster': 50,
 58 |  'painting': 51,
 59 |  'microwave': 52,
 60 |  'board': 53,
 61 |  'shoes': 54,
 62 |  'socket': 55,
 63 |  'bottle': 56,
 64 |  'bucket': 57,
 65 |  'cushion': 58,
 66 |  'basket': 59,
 67 |  'shoe rack': 60,
 68 |  'telephone': 61,
 69 |  'file folder': 62,
 70 |  'cloth': 63,
 71 |  'blind rail': 64,
 72 |  'laptop': 65,
 73 |  'plant pot': 66,
 74 |  'exhaust fan': 67,
 75 |  'cup': 68,
 76 |  'coat hanger': 69,
 77 |  'light switch': 70,
 78 |  'speaker': 71,
 79 |  'table lamp': 72,
 80 |  'air vent': 73,
 81 |  'clothes hanger': 74,
 82 |  'kettle': 75,
 83 |  'smoke detector': 76,
 84 |  'container': 77,
 85 |  'power strip': 78,
 86 |  'slippers': 79,
 87 |  'paper bag': 80,
 88 |  'mouse': 81,
 89 |  'cutting board': 82,
 90 |  'toilet paper': 83,
 91 |  'paper towel': 84,
 92 |  'pot': 85,
 93 |  'clock': 86,
 94 |  'pan': 87,
 95 |  'tap': 88,
 96 |  'jar': 89,
 97 |  'soap dispenser': 90,
 98 |  'binder': 91,
 99 |  'bowl': 92,
100 |  'tissue box': 93,
101 |  'whiteboard eraser': 94,
102 |  'toilet brush': 95,
103 |  'spray bottle': 96,
104 |  'headphones': 97,
105 |  'stapler': 98,
106 |  'marker': 99}
107 | 
108 | def create_dir(path):
109 |     if not os.path.exists(path):
110 |         os.mkdir(path)
111 | 
112 | def load_txt(path):
113 |     res = []
114 | 
115 |     with open(path) as f:
116 |         for line in tqdm(f):
117 |             res.append(line.strip())
118 | 
119 |     return res
120 | 
121 | def create_dirs(path):
122 |     points = os.path.join(path, 'points')
123 |     create_dir(points)
124 |     
125 |     semantic_mask = os.path.join(path, 'semantic_mask')
126 |     create_dir(semantic_mask)
127 |     
128 |     instance_mask = os.path.join(path, 'instance_mask')
129 |     create_dir(instance_mask)
130 |     
131 |     bboxs = os.path.join(path, 'bboxs')
132 |     create_dir(bboxs)
133 | 
134 |     superpoints = os.path.join(path, 'superpoints')
135 |     create_dir(superpoints)
136 |     return {
137 |         'points': points,
138 |         'semantic_mask': semantic_mask,
139 |         'instance_mask': instance_mask,
140 |         'bboxs': bboxs,
141 |         'superpoints': superpoints
142 |     }
143 | 
144 | def create_metainfo():
145 | 
146 |     return {
147 |         'categories': OBJ2SEM,
148 |         'dataset': 'scannetpp',
149 |         'info_version': '1.0'
150 |     }
151 | 
152 | def create_data_list(split, splits, bins_path):
153 |     
154 |     scenes = splits[split]
155 |     final_list = []
156 |     for scene in tqdm(scenes):
157 |         lidar_points = {
158 |             'num_pts_feats': 6,
159 |             'lidar_path': f'{scene}.bin'
160 |         }
161 |         raw_bboxs = np.load(os.path.join(bins_path['bboxs'], f'{scene}.npy'))
162 |         instances = []
163 |         for rb in raw_bboxs:
164 |             if len(rb) == 0:
165 |                 instances = []       
166 |             else:
167 |                 instances.append({
168 |                     'bbox_3d': rb[:6].tolist(),
169 |                     'bbox_label_3d': int(rb[-1])
170 |                 })
171 |         final_list.append({
172 |             'lidar_points': lidar_points,
173 |             'instances': instances,
174 |             'pts_semantic_mask_path': f'{scene}.bin',
175 |             'pts_instance_mask_path': f'{scene}.bin',
176 |             'axis_align_matrix': np.eye(4)
177 |         })
178 | 
179 |     return final_list
180 | 
181 | def create_pkl_file(path_to_save, split, splits, 
182 |                     bins_path, pkl_prefix = 'scannetpp'):
183 |     metainfo = create_metainfo()
184 |     data_list = create_data_list(split, splits, bins_path)
185 |     anno = {
186 |         'metainfo': metainfo,
187 |         'data_list': data_list
188 |     }
189 |     filename = os.path.join(path_to_save, f'{pkl_prefix}_infos_{split}.pkl')
190 |     mmengine.dump(anno, filename, 'pkl')
191 | 
192 | if __name__ == '__main__':
193 |     parser = argparse.ArgumentParser()
194 |     parser.add_argument(
195 |         '--path_to_data',
196 |         required=True,
197 |         help='Path to preprocessed raw data',
198 |         type=str,
199 |     )
200 | 
201 |     parser.add_argument(
202 |         '--path_to_save_bins',
203 |         required=True,
204 |         help='Enter here the path where to save bins and pkls',
205 |         type=str,
206 |     )
207 | 
208 |     args = parser.parse_args()
209 |     print(args)
210 | 
211 |     path_to_raw_data = args.path_to_data
212 |     path_to_save_data = args.path_to_save_bins
213 |     create_dir(path_to_save_data)
214 |     bins_path = create_dirs(path_to_save_data)
215 | 
216 |     path_to_train_ids = os.path.join(path_to_raw_data, 'nvs_sem_train.txt')
217 |     train_scenes = load_txt(path_to_train_ids)
218 |     path_to_val_ids = os.path.join(path_to_raw_data, 'nvs_sem_val.txt')
219 |     val_scenes = load_txt(path_to_val_ids)
220 |     path_to_sem_test_ids = os.path.join(path_to_raw_data, 'sem_test.txt')
221 |     test_scenes = load_txt(path_to_sem_test_ids)
222 | 
223 |     splits = {
224 |         'train': train_scenes,
225 |         'val': val_scenes,
226 |         'test': test_scenes
227 |     }
228 | 
229 |     path_to_raw_data = os.path.join(path_to_raw_data, 'data')
230 |     scene_ids = os.listdir(path_to_raw_data)
231 |     
232 |     for si in tqdm(scene_ids):
233 |         temp_path = os.path.join(path_to_raw_data, si)
234 |         point_cloud = np.load(temp_path + f'/{si}_point_cloud.npy')
235 |         sem_label = np.load(temp_path + f'/{si}_semantic.npy')
236 |         ins_label = np.load(temp_path + f'/{si}_instance.npy')
237 |         bboxs = np.load(temp_path + f'/{si}_bboxs.npy')
238 |         superpoints = np.load(temp_path + f'/{si}_superpoints.npy')
239 |     
240 |         point_cloud.astype(np.float32).tofile(
241 |             os.path.join(bins_path['points'], f'{si}.bin'))
242 |         sem_label.astype(np.int64).tofile(
243 |             os.path.join(bins_path['semantic_mask'], f'{si}.bin'))
244 |         ins_label.astype(np.int64).tofile(
245 |             os.path.join(bins_path['instance_mask'], f'{si}.bin'))
246 |         superpoints.astype(np.int64).tofile(
247 |             os.path.join(bins_path['superpoints'], f'{si}.bin'))
248 |         np.save(os.path.join(bins_path['bboxs'], f'{si}.npy'), bboxs)
249 | 
250 |     create_pkl_file(path_to_save_data, 'train', splits, bins_path)
251 |     create_pkl_file(path_to_save_data, 'val', splits, bins_path)
252 |     create_pkl_file(path_to_save_data, 'test', splits, bins_path)
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 


--------------------------------------------------------------------------------
/data/scannetpp/preprocess_raw_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse 
  3 | import json
  4 | import numpy as np
  5 | from plyfile import PlyData
  6 | import pandas as pd
  7 | from tqdm.auto import tqdm
  8 | from concurrent.futures import ProcessPoolExecutor
  9 | import shutil
 10 | import segmentator
 11 | import torch
 12 | import trimesh
 13 | 
 14 | POINT_CLOUD_PFX = "mesh_aligned_0.05.ply"
 15 | SEGMENTS_ANNO_PFX = "segments_anno.json"
 16 | 
 17 | def _handle_id(scene_id):
 18 |     print(f'Processing: {scene_id}')
 19 |     if not os.path.isdir(os.path.join(PATH_TO_IDS, scene_id, 'scans')):
 20 |         return
 21 |     
 22 |     point_cloud, _ = read_plymesh(os.path.join(PATH_TO_IDS, scene_id, 
 23 |                                                'scans', POINT_CLOUD_PFX))
 24 | 
 25 |     mesh = trimesh.load_mesh(os.path.join(PATH_TO_IDS, scene_id, 
 26 |                                           'scans', POINT_CLOUD_PFX))
 27 |     vertices = mesh.vertices
 28 |     faces = mesh.faces
 29 |         
 30 |     vertices = torch.from_numpy(vertices.astype(np.float32))
 31 |     faces = torch.from_numpy(faces.astype(np.int64))
 32 |     super_points = segmentator.segment_mesh(vertices, faces).numpy()
 33 | 
 34 |     mapping_superpoints = {tuple(i.tolist()): 
 35 |                            super_points[idx] for idx, i in enumerate(vertices)}
 36 |     super_points = np.array([mapping_superpoints[tuple(i.tolist())] 
 37 |                              for i in point_cloud[:, :3]])
 38 | 
 39 |     assert point_cloud.shape[1] == 6
 40 |     assert point_cloud.shape[0] == super_points.shape[0]
 41 | 
 42 |     semantic = np.zeros((point_cloud.shape[0], 1)) - 1 # -1: unannotated
 43 |     instance = np.zeros((point_cloud.shape[0], 1)) - 1 # -1: unannotated
 44 |     if scene_id in TRAIN_IDS or scene_id in VAL_IDS:
 45 |         seg_anno = load_json(os.path.join(PATH_TO_IDS, scene_id, 
 46 |                                           'scans', SEGMENTS_ANNO_PFX))
 47 |         seg_groups = seg_anno['segGroups']
 48 |         obj_idx = 0
 49 |         bboxs = []
 50 |         for idx, group in enumerate(seg_groups):
 51 |             label = group['label']
 52 |             segments = np.array(group['segments'])
 53 | 
 54 |             if label in TOP100SEM2ID:
 55 |                 new_label = label
 56 |                 
 57 |             elif label in SEMANTIC_MAP_TO and label not in TOP100SEM2ID:
 58 |                 if SEMANTIC_MAP_TO[label] in TOP100SEM2ID:
 59 |                     new_label = SEMANTIC_MAP_TO[label]
 60 |                 else:
 61 |                     continue
 62 |             else:
 63 |                 continue
 64 | 
 65 |             label_id = TOP100SEM2ID[new_label]
 66 |             
 67 |             point_segments = point_cloud[segments]
 68 |             instance[segments] = obj_idx
 69 |             semantic[segments] = label_id
 70 |             xmin = np.min(point_segments[:,0])
 71 |             ymin = np.min(point_segments[:,1])
 72 |             zmin = np.min(point_segments[:,2])
 73 |             xmax = np.max(point_segments[:,0])
 74 |             ymax = np.max(point_segments[:,1])
 75 |             zmax = np.max(point_segments[:,2])
 76 |             
 77 |             bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, 
 78 |                              xmax-xmin, ymax-ymin, zmax-zmin, label_id])
 79 |             
 80 |             bboxs.append(bbox)
 81 |             obj_idx += 1
 82 |             
 83 |         bboxs = np.stack(bboxs)
 84 |         data = {
 85 |                 'point_cloud': point_cloud,
 86 |                 'semantic': semantic[:, 0].astype(int),
 87 |                 'instance': instance[:, 0].astype(int),
 88 |                 'bboxs': bboxs,
 89 |                 'super_points': super_points
 90 |                 }
 91 |         
 92 |     elif scene_id in SEM_TEST_IDS:
 93 | 
 94 |         data =  {
 95 |                 'point_cloud': point_cloud,
 96 |                 'semantic': semantic[:, 0].astype(int),
 97 |                 'instance': instance[:, 0].astype(int),
 98 |                 'bboxs': np.zeros((0,7)),
 99 |                 'super_points': super_points
100 |                 }
101 | 
102 |     output_path = os.path.join(OUTPUT_DIR_DATA, f'{scene_id}')
103 |     create_dir(os.path.join(output_path))
104 |     output_prefix = os.path.join(output_path, f'{scene_id}')
105 |     np.save(output_prefix+'_point_cloud.npy', data['point_cloud'])
106 |     np.save(output_prefix+'_semantic.npy', data['semantic'])
107 |     np.save(output_prefix+'_instance.npy', data['instance'])
108 |     np.save(output_prefix+'_bboxs.npy', data['bboxs'])
109 |     np.save(output_prefix+'_superpoints.npy', data['super_points'])
110 | 
111 | def create_dir(path):
112 |     if not os.path.exists(path):
113 |         os.mkdir(path)
114 | 
115 | def load_json(path):
116 |     with open(path) as jd:
117 |         return json.load(jd)
118 | 
119 | def load_txt(path):
120 |     res = []
121 | 
122 |     with open(path) as f:
123 |         for line in tqdm(f):
124 |             res.append(line.strip())
125 | 
126 |     return res
127 | 
128 | def read_plymesh(filepath):
129 |     """Read ply file and return it as numpy array. Returns None if emtpy."""
130 |     with open(filepath, 'rb') as f:
131 |         plydata = PlyData.read(f)
132 |     if plydata.elements:
133 |         vertices = pd.DataFrame(plydata['vertex'].data).values
134 |         faces = np.array([f[0] for f in plydata["face"].data])
135 |         return vertices, faces
136 |     
137 | 
138 | if __name__ == '__main__':
139 |     parser = argparse.ArgumentParser()
140 |     parser.add_argument(
141 |         '--path_to_data',
142 |         required=True,
143 |         help='Path to raw data',
144 |         type=str,
145 |     )
146 | 
147 |     parser.add_argument(
148 |         '--output_dir',
149 |         required=True,
150 |         help='Path to save preprocessed raw data',
151 |         type=str,
152 |     )
153 | 
154 |     parser.add_argument('--num_workers', default=20, type=int, 
155 |                         help='The number of parallel workers')
156 | 
157 |     args = parser.parse_args()
158 |     print(args)
159 |     PATH_TO_DATA = args.path_to_data
160 |     PATH_TO_IDS = os.path.join(PATH_TO_DATA, 'data')
161 |     OUTPUT_DIR = args.output_dir
162 |     create_dir(OUTPUT_DIR)
163 | 
164 |     OUTPUT_DIR_DATA = os.path.join(OUTPUT_DIR, 'data')
165 |     create_dir(OUTPUT_DIR_DATA)
166 | 
167 |     TOP100SEM2ID = {}
168 |     with open(os.path.join(PATH_TO_DATA , 
169 |                            'metadata/semantic_benchmark/top100.txt')) as f:
170 |         # check = f.read()
171 |         for idx, line in enumerate(f):
172 |             line = line.strip()
173 |             TOP100SEM2ID[line] = idx
174 | 
175 |     TOPINST2ID = {}
176 |     with open(os.path.join(PATH_TO_DATA, 
177 |             'metadata/semantic_benchmark/top100_instance.txt')) as f:
178 |         for idx, line in enumerate(f):
179 |             line = line.strip()
180 |             TOPINST2ID[line] = TOP100SEM2ID[line]
181 | 
182 |     MAPPING_BENCH = pd.read_csv(os.path.join(PATH_TO_DATA, 
183 |                     'metadata/semantic_benchmark/map_benchmark.csv'))
184 |     SEMANTIC_MAP_TO = MAPPING_BENCH[~MAPPING_BENCH['semantic_map_to'].isna()]
185 |     INSTANCE_MAP_TO = MAPPING_BENCH[~MAPPING_BENCH['instance_map_to'].isna()]
186 | 
187 |     SEMANTIC_MAP_TO = SEMANTIC_MAP_TO[['class','semantic_map_to']].values
188 |     SEMANTIC_MAP_TO = dict(zip(SEMANTIC_MAP_TO[:, 0], SEMANTIC_MAP_TO[:, 1]))
189 |     print(len(SEMANTIC_MAP_TO))
190 | 
191 |     INSTANCE_MAP_TO = INSTANCE_MAP_TO[['class','instance_map_to']].values
192 |     INSTANCE_MAP_TO = dict(zip(INSTANCE_MAP_TO[:, 0], INSTANCE_MAP_TO[:, 1]))
193 |     print(len(INSTANCE_MAP_TO))
194 | 
195 |     SCENE_IDS = os.listdir(os.path.join(PATH_TO_DATA, 'data'))
196 |     SCENE_IDS.remove('.ipynb_checkpoints')
197 |     
198 |     assert len(SCENE_IDS) == 380
199 | 
200 |     path_to_train_ids = os.path.join(PATH_TO_DATA, 'splits', 'nvs_sem_train.txt')
201 |     TRAIN_IDS = load_txt(path_to_train_ids)
202 |     path_to_val_ids = os.path.join(PATH_TO_DATA, 'splits', 'nvs_sem_val.txt')
203 |     VAL_IDS = load_txt(path_to_val_ids)
204 |     path_to_sem_test_ids = os.path.join(PATH_TO_DATA, 'splits', 'sem_test.txt')
205 |     SEM_TEST_IDS = load_txt(path_to_sem_test_ids)
206 | 
207 |     shutil.copytree(os.path.join(PATH_TO_DATA, 'splits'), 
208 |                     OUTPUT_DIR, dirs_exist_ok=True)
209 | 
210 |     pool = ProcessPoolExecutor(max_workers=args.num_workers)
211 |     print('Processing scenes...')
212 |     _ = list(pool.map(_handle_id, SCENE_IDS))
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/tools/create_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from os import path as osp
 4 | 
 5 | from indoor_converter import create_indoor_info_file
 6 | from update_infos_to_v2 import update_pkl_infos
 7 | 
 8 | 
 9 | def scannet_data_prep(root_path, info_prefix, out_dir, workers):
10 |     """Prepare the info file for scannet dataset.
11 | 
12 |     Args:
13 |         root_path (str): Path of dataset root.
14 |         info_prefix (str): The prefix of info filenames.
15 |         out_dir (str): Output directory of the generated info file.
16 |         workers (int): Number of threads to be used.
17 |     """
18 |     create_indoor_info_file(
19 |         root_path, info_prefix, out_dir, workers=workers)
20 |     info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl')
21 |     info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl')
22 |     info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl')
23 |     update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_train_path)
24 |     update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_val_path)
25 |     update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_test_path)
26 | 
27 | 
28 | parser = argparse.ArgumentParser(description='Data converter arg parser')
29 | parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
30 | parser.add_argument(
31 |     '--root-path',
32 |     type=str,
33 |     default='./data/kitti',
34 |     help='specify the root path of dataset')
35 | parser.add_argument(
36 |     '--out-dir',
37 |     type=str,
38 |     default='./data/kitti',
39 |     required=False,
40 |     help='name of info pkl')
41 | parser.add_argument('--extra-tag', type=str, default='kitti')
42 | parser.add_argument(
43 |     '--workers', type=int, default=4, help='number of threads to be used')
44 | args = parser.parse_args()
45 | 
46 | if __name__ == '__main__':
47 |     from mmdet3d.utils import register_all_modules
48 |     register_all_modules()
49 | 
50 |     if args.dataset in ('scannet', 'scannet200'):
51 |         scannet_data_prep(
52 |             root_path=args.root_path,
53 |             info_prefix=args.extra_tag,
54 |             out_dir=args.out_dir,
55 |             workers=args.workers)
56 |     else:
57 |         raise NotImplementedError(f'Don\'t support {args.dataset} dataset.')
58 | 


--------------------------------------------------------------------------------
/tools/indoor_converter.py:
--------------------------------------------------------------------------------
 1 | # Modified from mmdetection3d/tools/dataset_converters/indoor_converter.py
 2 | # We just support ScanNet 200.
 3 | import os
 4 | 
 5 | import mmengine
 6 | 
 7 | from scannet_data_utils import ScanNetData
 8 | 
 9 | 
10 | def create_indoor_info_file(data_path,
11 |                             pkl_prefix='sunrgbd',
12 |                             save_path=None,
13 |                             use_v1=False,
14 |                             workers=4):
15 |     """Create indoor information file.
16 | 
17 |     Get information of the raw data and save it to the pkl file.
18 | 
19 |     Args:
20 |         data_path (str): Path of the data.
21 |         pkl_prefix (str, optional): Prefix of the pkl to be saved.
22 |             Default: 'sunrgbd'.
23 |         save_path (str, optional): Path of the pkl to be saved. Default: None.
24 |         use_v1 (bool, optional): Whether to use v1. Default: False.
25 |         workers (int, optional): Number of threads to be used. Default: 4.
26 |     """
27 |     assert os.path.exists(data_path)
28 |     assert pkl_prefix in ['scannet', 'scannet200'], \
29 |         f'unsupported indoor dataset {pkl_prefix}'
30 |     save_path = data_path if save_path is None else save_path
31 |     assert os.path.exists(save_path)
32 | 
33 |     # generate infos for both detection and segmentation task
34 |     train_filename = os.path.join(
35 |         save_path, f'{pkl_prefix}_infos_train.pkl')
36 |     val_filename = os.path.join(
37 |         save_path, f'{pkl_prefix}_infos_val.pkl')
38 |     test_filename = os.path.join(
39 |         save_path, f'{pkl_prefix}_infos_test.pkl')
40 |     if pkl_prefix == 'scannet':
41 |         # ScanNet has a train-val-test split
42 |         train_dataset = ScanNetData(root_path=data_path, split='train')
43 |         val_dataset = ScanNetData(root_path=data_path, split='val')
44 |         test_dataset = ScanNetData(root_path=data_path, split='test')
45 |     else:  # ScanNet200
46 |         # ScanNet has a train-val-test split
47 |         train_dataset = ScanNetData(root_path=data_path, split='train',
48 |                                     scannet200=True, save_path=save_path)
49 |         val_dataset = ScanNetData(root_path=data_path, split='val',
50 |                                     scannet200=True, save_path=save_path)
51 |         test_dataset = ScanNetData(root_path=data_path, split='test',
52 |                                     scannet200=True, save_path=save_path)
53 | 
54 |     infos_train = train_dataset.get_infos(
55 |         num_workers=workers, has_label=True)
56 |     mmengine.dump(infos_train, train_filename, 'pkl')
57 |     print(f'{pkl_prefix} info train file is saved to {train_filename}')
58 | 
59 |     infos_val = val_dataset.get_infos(
60 |         num_workers=workers, has_label=True)
61 |     mmengine.dump(infos_val, val_filename, 'pkl')
62 |     print(f'{pkl_prefix} info val file is saved to {val_filename}')
63 | 
64 |     infos_test = test_dataset.get_infos(
65 |         num_workers=workers, has_label=False)
66 |     mmengine.dump(infos_test, test_filename, 'pkl')
67 |     print(f'{pkl_prefix} info test file is saved to {test_filename}')
68 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # This is an exact copy of tools/test.py from open-mmlab/mmdetection3d.
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | 
  6 | from mmengine.config import Config, ConfigDict, DictAction
  7 | from mmengine.registry import RUNNERS
  8 | from mmengine.runner import Runner
  9 | 
 10 | from mmdet3d.utils import replace_ceph_backend
 11 | 
 12 | 
 13 | # TODO: support fuse_conv_bn and format_only
 14 | def parse_args():
 15 |     parser = argparse.ArgumentParser(
 16 |         description='MMDet3D test (and eval) a model')
 17 |     parser.add_argument('config', help='test config file path')
 18 |     parser.add_argument('checkpoint', help='checkpoint file')
 19 |     parser.add_argument(
 20 |         '--work-dir',
 21 |         help='the directory to save the file containing evaluation metrics')
 22 |     parser.add_argument(
 23 |         '--ceph', action='store_true', help='Use ceph as data storage backend')
 24 |     parser.add_argument(
 25 |         '--show', action='store_true', help='show prediction results')
 26 |     parser.add_argument(
 27 |         '--show-dir',
 28 |         help='directory where painted images will be saved. '
 29 |         'If specified, it will be automatically saved '
 30 |         'to the work_dir/timestamp/show_dir')
 31 |     parser.add_argument(
 32 |         '--score-thr', type=float, default=0.1, help='bbox score threshold')
 33 |     parser.add_argument(
 34 |         '--task',
 35 |         type=str,
 36 |         choices=[
 37 |             'mono_det', 'multi-view_det', 'lidar_det', 'lidar_seg',
 38 |             'multi-modality_det'
 39 |         ],
 40 |         help='Determine the visualization method depending on the task.')
 41 |     parser.add_argument(
 42 |         '--wait-time', type=float, default=2, help='the interval of show (s)')
 43 |     parser.add_argument(
 44 |         '--cfg-options',
 45 |         nargs='+',
 46 |         action=DictAction,
 47 |         help='override some settings in the used config, the key-value pair '
 48 |         'in xxx=yyy format will be merged into config file. If the value to '
 49 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 50 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 51 |         'Note that the quotation marks are necessary and that no white space '
 52 |         'is allowed.')
 53 |     parser.add_argument(
 54 |         '--launcher',
 55 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 56 |         default='none',
 57 |         help='job launcher')
 58 |     parser.add_argument(
 59 |         '--tta', action='store_true', help='Test time augmentation')
 60 |     # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
 61 |     # will pass the `--local-rank` parameter to `tools/test.py` instead
 62 |     # of `--local_rank`.
 63 |     parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
 64 |     args = parser.parse_args()
 65 |     if 'LOCAL_RANK' not in os.environ:
 66 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 67 |     return args
 68 | 
 69 | 
 70 | def trigger_visualization_hook(cfg, args):
 71 |     default_hooks = cfg.default_hooks
 72 |     if 'visualization' in default_hooks:
 73 |         visualization_hook = default_hooks['visualization']
 74 |         # Turn on visualization
 75 |         visualization_hook['draw'] = True
 76 |         if args.show:
 77 |             visualization_hook['show'] = True
 78 |             visualization_hook['wait_time'] = args.wait_time
 79 |         if args.show_dir:
 80 |             visualization_hook['test_out_dir'] = args.show_dir
 81 |         all_task_choices = [
 82 |             'mono_det', 'multi-view_det', 'lidar_det', 'lidar_seg',
 83 |             'multi-modality_det'
 84 |         ]
 85 |         assert args.task in all_task_choices, 'You must set '\
 86 |             f"'--task' in {all_task_choices} in the command " \
 87 |             'if you want to use visualization hook'
 88 |         visualization_hook['vis_task'] = args.task
 89 |         visualization_hook['score_thr'] = args.score_thr
 90 |     else:
 91 |         raise RuntimeError(
 92 |             'VisualizationHook must be included in default_hooks.'
 93 |             'refer to usage '
 94 |             '"visualization=dict(type=\'VisualizationHook\')"')
 95 | 
 96 |     return cfg
 97 | 
 98 | 
 99 | def main():
100 |     args = parse_args()
101 | 
102 |     # load config
103 |     cfg = Config.fromfile(args.config)
104 | 
105 |     # TODO: We will unify the ceph support approach with other OpenMMLab repos
106 |     if args.ceph:
107 |         cfg = replace_ceph_backend(cfg)
108 | 
109 |     cfg.launcher = args.launcher
110 |     if args.cfg_options is not None:
111 |         cfg.merge_from_dict(args.cfg_options)
112 | 
113 |     # work_dir is determined in this priority: CLI > segment in file > filename
114 |     if args.work_dir is not None:
115 |         # update configs according to CLI args if args.work_dir is not None
116 |         cfg.work_dir = args.work_dir
117 |     elif cfg.get('work_dir', None) is None:
118 |         # use config filename as default work_dir if cfg.work_dir is None
119 |         cfg.work_dir = osp.join('./work_dirs',
120 |                                 osp.splitext(osp.basename(args.config))[0])
121 | 
122 |     cfg.load_from = args.checkpoint
123 | 
124 |     if args.show or args.show_dir:
125 |         # cfg = trigger_visualization_hook(cfg, args)
126 |         cfg.test_evaluator['vis_dir'] = args.show_dir
127 |     
128 |     if args.tta:
129 |         # Currently, we only support tta for 3D segmentation
130 |         # TODO: Support tta for 3D detection
131 |         assert 'tta_model' in cfg, 'Cannot find ``tta_model`` in config.'
132 |         assert 'tta_pipeline' in cfg, 'Cannot find ``tta_pipeline`` in config.'
133 |         cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline
134 |         cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
135 | 
136 |     # build the runner from config
137 |     if 'runner_type' not in cfg:
138 |         # build the default runner
139 |         runner = Runner.from_cfg(cfg)
140 |     else:
141 |         # build customized runner from the registry
142 |         # if 'runner_type' is set in the cfg
143 |         runner = RUNNERS.build(cfg)
144 | 
145 |     # start testing
146 |     runner.test()
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     main()
151 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # This is an exact copy of tools/train.py from open-mmlab/mmdetection3d.
  2 | import argparse
  3 | import logging
  4 | import os
  5 | import os.path as osp
  6 | 
  7 | from mmengine.config import Config, DictAction
  8 | from mmengine.logging import print_log
  9 | from mmengine.registry import RUNNERS
 10 | from mmengine.runner import Runner
 11 | 
 12 | from mmdet3d.utils import replace_ceph_backend
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='Train a 3D detector')
 17 |     parser.add_argument('config', help='train config file path')
 18 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 19 |     parser.add_argument(
 20 |         '--amp',
 21 |         action='store_true',
 22 |         default=False,
 23 |         help='enable automatic-mixed-precision training')
 24 |     parser.add_argument(
 25 |         '--auto-scale-lr',
 26 |         action='store_true',
 27 |         help='enable automatically scaling LR.')
 28 |     parser.add_argument(
 29 |         '--resume',
 30 |         nargs='?',
 31 |         type=str,
 32 |         const='auto',
 33 |         help='If specify checkpoint path, resume from it, while if not '
 34 |         'specify, try to auto resume from the latest checkpoint '
 35 |         'in the work directory.')
 36 |     parser.add_argument(
 37 |         '--ceph', action='store_true', help='Use ceph as data storage backend')
 38 |     parser.add_argument(
 39 |         '--cfg-options',
 40 |         nargs='+',
 41 |         action=DictAction,
 42 |         help='override some settings in the used config, the key-value pair '
 43 |         'in xxx=yyy format will be merged into config file. If the value to '
 44 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 45 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 46 |         'Note that the quotation marks are necessary and that no white space '
 47 |         'is allowed.')
 48 |     parser.add_argument(
 49 |         '--launcher',
 50 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 51 |         default='none',
 52 |         help='job launcher')
 53 |     # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
 54 |     # will pass the `--local-rank` parameter to `tools/train.py` instead
 55 |     # of `--local_rank`.
 56 |     parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
 57 |     args = parser.parse_args()
 58 |     if 'LOCAL_RANK' not in os.environ:
 59 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 60 |     return args
 61 | 
 62 | 
 63 | def main():
 64 |     args = parse_args()
 65 | 
 66 |     # load config
 67 |     cfg = Config.fromfile(args.config)
 68 | 
 69 |     # TODO: We will unify the ceph support approach with other OpenMMLab repos
 70 |     if args.ceph:
 71 |         cfg = replace_ceph_backend(cfg)
 72 | 
 73 |     cfg.launcher = args.launcher
 74 |     if args.cfg_options is not None:
 75 |         cfg.merge_from_dict(args.cfg_options)
 76 | 
 77 |     # work_dir is determined in this priority: CLI > segment in file > filename
 78 |     if args.work_dir is not None:
 79 |         # update configs according to CLI args if args.work_dir is not None
 80 |         cfg.work_dir = args.work_dir
 81 |     elif cfg.get('work_dir', None) is None:
 82 |         # use config filename as default work_dir if cfg.work_dir is None
 83 |         cfg.work_dir = osp.join('./work_dirs',
 84 |                                 osp.splitext(osp.basename(args.config))[0])
 85 | 
 86 |     # enable automatic-mixed-precision training
 87 |     if args.amp is True:
 88 |         optim_wrapper = cfg.optim_wrapper.type
 89 |         if optim_wrapper == 'AmpOptimWrapper':
 90 |             print_log(
 91 |                 'AMP training is already enabled in your config.',
 92 |                 logger='current',
 93 |                 level=logging.WARNING)
 94 |         else:
 95 |             assert optim_wrapper == 'OptimWrapper', (
 96 |                 '`--amp` is only supported when the optimizer wrapper type is '
 97 |                 f'`OptimWrapper` but got {optim_wrapper}.')
 98 |             cfg.optim_wrapper.type = 'AmpOptimWrapper'
 99 |             cfg.optim_wrapper.loss_scale = 'dynamic'
100 | 
101 |     # enable automatically scaling LR
102 |     if args.auto_scale_lr:
103 |         if 'auto_scale_lr' in cfg and \
104 |                 'enable' in cfg.auto_scale_lr and \
105 |                 'base_batch_size' in cfg.auto_scale_lr:
106 |             cfg.auto_scale_lr.enable = True
107 |         else:
108 |             raise RuntimeError('Can not find "auto_scale_lr" or '
109 |                                '"auto_scale_lr.enable" or '
110 |                                '"auto_scale_lr.base_batch_size" in your'
111 |                                ' configuration file.')
112 | 
113 |     # resume is determined in this priority: resume from > auto_resume
114 |     if args.resume == 'auto':
115 |         cfg.resume = True
116 |         cfg.load_from = None
117 |     elif args.resume is not None:
118 |         cfg.resume = True
119 |         cfg.load_from = args.resume
120 | 
121 |     # build the runner from config
122 |     if 'runner_type' not in cfg:
123 |         # build the default runner
124 |         runner = Runner.from_cfg(cfg)
125 |     else:
126 |         # build customized runner from the registry
127 |         # if 'runner_type' is set in the cfg
128 |         runner = RUNNERS.build(cfg)
129 | 
130 |     # start training
131 |     runner.train()
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     main()
136 | 


--------------------------------------------------------------------------------
/unidet3d/__init__.py:
--------------------------------------------------------------------------------
 1 | from .unidet3d import UniDet3D
 2 | from .spconv_unet import SpConvUNet
 3 | from .encoder import UniDet3DEncoder
 4 | from .criterion import UniDet3DCriterion
 5 | from .loading import LoadAnnotations3D_, NormalizePointsColor_, DenormalizePointsColor
 6 | from .formatting import Pack3DDetInputs_
 7 | from .transforms_3d import PointDetClassMappingScanNet
 8 | from .data_preprocessor import Det3DDataPreprocessor_
 9 | from .scannet_dataset import ScanNetSegDataset_, ScanNetDetDataset
10 | from .s3dis_dataset import S3DISSegDetDataset
11 | from .arkitscenes_dataset import ARKitScenesOfflineDataset
12 | from .multiscan_dataset import MultiScan_
13 | from .rscan_dataset import ThreeRScan_
14 | from .scannetpp_dataset import Scannetpp_
15 | from .structures import InstanceData_
16 | from .axis_aligned_iou_loss import UniDet3DAxisAlignedIoULoss
17 | from .rotated_iou_loss import UniDet3DRotatedIoU3DLoss
18 | from .indoor_metric import IndoorMetric_
19 | from .concat_dataset import ConcatDataset_


--------------------------------------------------------------------------------
/unidet3d/arkitscenes_dataset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os.path as osp
  3 | 
  4 | from mmdet3d.registry import DATASETS
  5 | from mmdet3d.datasets import Det3DDataset
  6 | from mmdet3d.structures import DepthInstance3DBoxes
  7 | from mmengine.logging import print_log
  8 | import logging
  9 | import numpy as np
 10 | 
 11 | @DATASETS.register_module()
 12 | class ARKitScenesOfflineDataset(Det3DDataset):
 13 |     r"""ARKitScenes dataset (offline benchmark).
 14 | 
 15 |     Args:
 16 |         partition(float): Defaults to 1, the part of 
 17 |             the dataset that will be used.
 18 |         data_prefix (dict): Prefix for data. Defaults to
 19 |             dict(pts='offline_prepared_data').
 20 |         box_type_3d (str): Type of 3D box of this dataset.
 21 |             Based on the `box_type_3d`, the dataset will encapsulate the box
 22 |             to its original format then converted them to `box_type_3d`.
 23 |             Defaults to 'Depth'.
 24 |     """
 25 |     METAINFO = {
 26 |         'classes': ('cabinet', 'refrigerator', 'shelf', 'stove', 'bed',
 27 |                     'sink', 'washer', 'toilet', 'bathtub', 'oven',
 28 |                     'dishwasher', 'fireplace', 'stool', 'chair', 'table',
 29 |                     'tv_monitor', 'sofa')
 30 |     }
 31 | 
 32 |     def __init__(self,
 33 |                  partition: float = 1,
 34 |                  data_prefix: dict = dict(pts='offline_prepared_data'),
 35 |                  box_type_3d: str = 'Depth',
 36 |                  **kwargs) -> None:
 37 |         self.partition = partition
 38 |         super().__init__(
 39 |             data_prefix=data_prefix,
 40 |             box_type_3d=box_type_3d,
 41 |             **kwargs)
 42 |     
 43 |     def parse_ann_info(self, info: dict) -> dict:
 44 |         """Process the `instances` in data info to `ann_info`.
 45 | 
 46 |         Args:
 47 |             info (dict): Info dict.
 48 | 
 49 |         Returns:
 50 |             dict: Processed `ann_info`
 51 |         """
 52 |         ann_info = super().parse_ann_info(info)
 53 |         # empty gt
 54 |         if ann_info is None:
 55 |             ann_info = dict()
 56 |             ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
 57 |             ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
 58 |         
 59 |         ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
 60 |             ann_info['gt_bboxes_3d'],
 61 |             origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
 62 | 
 63 |         return ann_info
 64 | 
 65 |     def parse_data_info(self, info: dict) -> dict:
 66 |         """Process the raw data info.
 67 | 
 68 |         Args:
 69 |             info (dict): Raw info dict.
 70 | 
 71 |         Returns:
 72 |             dict: Has `ann_info` in training stage. And
 73 |             all path has been converted to absolute path.
 74 |         """
 75 |         info['super_pts_path'] = osp.join(
 76 |             self.data_prefix.get('sp_pts_mask', ''), 
 77 |             info['lidar_points']['lidar_path']) #info['super_pts_path']
 78 | 
 79 |         info = super().parse_data_info(info)
 80 | 
 81 |         return info
 82 | 
 83 |     def __getitem__(self, idx: int) -> dict:
 84 |         """Get the idx-th image and data information of dataset after
 85 |         ``self.pipeline``, and ``full_init`` will be called if the dataset has
 86 |         not been fully initialized.
 87 | 
 88 |         During training phase, if ``self.pipeline`` get ``None``,
 89 |         ``self._rand_another`` will be called until a valid image is fetched or
 90 |          the maximum limit of refetech is reached.
 91 | 
 92 |         Args:
 93 |             idx (int): The index of self.data_list.
 94 | 
 95 |         Returns:
 96 |             dict: The idx-th image and data information of dataset after
 97 |             ``self.pipeline``.
 98 |         """
 99 |         # Performing full initialization by calling `__getitem__` will consume
100 |         # extra memory. If a dataset is not fully initialized by setting
101 |         # `lazy_init=True` and then fed into the dataloader. Different workers
102 |         # will simultaneously read and parse the annotation. It will cost more
103 |         # time and memory, although this may work. Therefore, it is recommended
104 |         # to manually call `full_init` before dataset fed into dataloader to
105 |         # ensure all workers use shared RAM from master process.
106 | 
107 |         if not self.test_mode:
108 |             if self.serialize_data:
109 |                 dataset_len = len(self.data_address)
110 |             else:
111 |                 dataset_len = len(self.data_list)
112 |             idx = np.random.randint(0, dataset_len)
113 |         if not self._fully_initialized:
114 |             print_log(
115 |                 'Please call `full_init()` method manually to accelerate '
116 |                 'the speed.',
117 |                 logger='current',
118 |                 level=logging.WARNING)
119 |             self.full_init()
120 | 
121 |         if self.test_mode:
122 |             data = self.prepare_data(idx)
123 |             if data is None:
124 |                 raise Exception('Test time pipline should not get `None` '
125 |                                 'data_sample')
126 |             return data
127 | 
128 |         for _ in range(self.max_refetch + 1):
129 |             data = self.prepare_data(idx)
130 |             # Broken images or random augmentations may cause the returned data
131 |             # to be None
132 |             if data is None:
133 |                 idx = self._rand_another()
134 |                 continue
135 |             return data
136 | 
137 |     def __len__(self) -> int:
138 |         """Get the length of filtered dataset and automatically call
139 |         ``full_init`` if the  dataset has not been fully init.
140 | 
141 |         Returns:
142 |             int: The length of filtered dataset.
143 |         """
144 | 
145 |         if self.serialize_data:
146 |             dataset_len = len(self.data_address)
147 |         else:
148 |             dataset_len = len(self.data_list)
149 |         if not self.test_mode:
150 |             return int(self.partition * dataset_len)
151 |         else:
152 |             return dataset_len


--------------------------------------------------------------------------------
/unidet3d/axis_aligned_iou_loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from typing import Optional
  3 | 
  4 | import torch
  5 | from mmdet.models.losses.utils import weighted_loss
  6 | from torch import Tensor
  7 | from torch import nn as nn
  8 | 
  9 | from mmdet3d.models import axis_aligned_iou_loss
 10 | from mmdet3d.registry import MODELS
 11 | from mmdet3d.structures import AxisAlignedBboxOverlaps3D
 12 | 
 13 | 
 14 | @weighted_loss
 15 | def axis_aligned_diou_loss(pred: Tensor, target: Tensor) -> Tensor:
 16 |     """Calculate the DIoU loss (1-DIoU) of two sets of axis aligned bounding
 17 |     boxes. Note that predictions and targets are one-to-one corresponded.
 18 | 
 19 |     Args:
 20 |         pred (torch.Tensor): Bbox predictions with shape [..., 6]
 21 |             (x1, y1, z1, x2, y2, z2).
 22 |         target (torch.Tensor): Bbox targets (gt) with shape [..., 6]
 23 |             (x1, y1, z1, x2, y2, z2).
 24 | 
 25 |     Returns:
 26 |         torch.Tensor: DIoU loss between predictions and targets.
 27 |     """
 28 |     axis_aligned_iou = AxisAlignedBboxOverlaps3D()(
 29 |         pred, target, is_aligned=True)
 30 |     iou_loss = 1 - axis_aligned_iou
 31 | 
 32 |     xp1, yp1, zp1, xp2, yp2, zp2 = pred.split(1, dim=-1)
 33 |     xt1, yt1, zt1, xt2, yt2, zt2 = target.split(1, dim=-1)
 34 | 
 35 |     xpc = (xp1 + xp2) / 2
 36 |     ypc = (yp1 + yp2) / 2
 37 |     zpc = (zp1 + zp2) / 2
 38 |     xtc = (xt1 + xt2) / 2
 39 |     ytc = (yt1 + yt2) / 2
 40 |     ztc = (zt1 + zt2) / 2
 41 |     r2 = (xpc - xtc)**2 + (ypc - ytc)**2 + (zpc - ztc)**2
 42 | 
 43 |     x_min = torch.minimum(xp1, xt1)
 44 |     x_max = torch.maximum(xp2, xt2)
 45 |     y_min = torch.minimum(yp1, yt1)
 46 |     y_max = torch.maximum(yp2, yt2)
 47 |     z_min = torch.minimum(zp1, zt1)
 48 |     z_max = torch.maximum(zp2, zt2)
 49 |     c2 = (x_min - x_max)**2 + (y_min - y_max)**2 + (z_min - z_max)**2
 50 | 
 51 |     diou_loss = iou_loss + (r2 / c2)[:, 0]
 52 | 
 53 |     return diou_loss
 54 | 
 55 | 
 56 | @MODELS.register_module()
 57 | class UniDet3DAxisAlignedIoULoss(nn.Module):
 58 |     """Calculate the IoU loss (1-IoU) of axis aligned bounding boxes. The only
 59 |     difference with original AxisAlignedIoULoss is the addition of DIoU mode.
 60 |     These classes should be merged in the future.
 61 | 
 62 |     Args:
 63 |         mode (str): 'iou' for intersection over union or 'diou' for
 64 |             distance-iou loss. Defaults to 'iou'.
 65 |         reduction (str): Method to reduce losses.
 66 |             The valid reduction method are 'none', 'sum' or 'mean'.
 67 |             Defaults to 'mean'.
 68 |         loss_weight (float): Weight of loss. Defaults to 1.0.
 69 |     """
 70 | 
 71 |     def __init__(self,
 72 |                  mode: str = 'iou',
 73 |                  reduction: str = 'mean',
 74 |                  loss_weight: float = 1.0) -> None:
 75 |         super(UniDet3DAxisAlignedIoULoss, self).__init__()
 76 |         assert mode in ['iou', 'diou']
 77 |         self.loss = axis_aligned_iou_loss if mode == 'iou' \
 78 |             else axis_aligned_diou_loss
 79 |         assert reduction in ['none', 'sum', 'mean']
 80 |         self.reduction = reduction
 81 |         self.loss_weight = loss_weight
 82 | 
 83 |     def forward(self,
 84 |                 pred: Tensor,
 85 |                 target: Tensor,
 86 |                 weight: Optional[Tensor] = None,
 87 |                 avg_factor: Optional[float] = None,
 88 |                 reduction_override: Optional[str] = None,
 89 |                 **kwargs) -> Tensor:
 90 |         """Forward function of loss calculation.
 91 | 
 92 |         Args:
 93 |             pred (Tensor): Bbox predictions with shape [..., 3].
 94 |             target (Tensor): Bbox targets (gt) with shape [..., 3].
 95 |             weight (Tensor, optional): Weight of loss.
 96 |                 Defaults to None.
 97 |             avg_factor (float, optional): Average factor that is used to
 98 |                 average the loss. Defaults to None.
 99 |             reduction_override (str, optional): Method to reduce losses.
100 |                 The valid reduction method are 'none', 'sum' or 'mean'.
101 |                 Defaults to None.
102 | 
103 |         Returns:
104 |             Tensor: IoU loss between predictions and targets.
105 |         """
106 |         assert reduction_override in (None, 'none', 'mean', 'sum')
107 |         reduction = (
108 |             reduction_override if reduction_override else self.reduction)
109 |         if (weight is not None) and (not torch.any(weight > 0)) and (
110 |                 reduction != 'none'):
111 |             return (pred * weight).sum()
112 |         return self.loss(
113 |             pred,
114 |             target,
115 |             weight=weight,
116 |             avg_factor=avg_factor,
117 |             reduction=reduction) * self.loss_weight


--------------------------------------------------------------------------------
/unidet3d/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | from mmengine.dataset.dataset_wrapper import ConcatDataset 
 2 | from mmengine.dataset.base_dataset import BaseDataset
 3 | from mmdet3d.registry import DATASETS
 4 | 
 5 | 
 6 | @DATASETS.register_module()
 7 | class ConcatDataset_(ConcatDataset):
 8 |     """A wrapper of concatenated dataset.
 9 | 
10 |     Args:
11 |         datasets (Sequence[BaseDataset] or Sequence[dict]): A list of datasets
12 |             which will be concatenated.
13 |         lazy_init (bool, optional): Whether to load annotation during
14 |             instantiation. Defaults to False.
15 |         ignore_keys (List[str] or str): Ignore the keys that can be
16 |             unequal in `dataset.metainfo`. Defaults to None.
17 |             `New in version 0.3.0.`
18 |     """
19 | 
20 |     def __init__(self,
21 |                  datasets,
22 |                  lazy_init=False,
23 |                  ignore_keys=None):
24 |         self.datasets = []
25 |         for i, dataset in enumerate(datasets):
26 |             if isinstance(dataset, dict):
27 |                 self.datasets.append(DATASETS.build(dataset))
28 |             elif isinstance(dataset, BaseDataset):
29 |                 self.datasets.append(dataset)
30 |             else:
31 |                 raise TypeError(
32 |                     'elements in datasets sequence should be config or '
33 |                     f'`BaseDataset` instance, but got {type(dataset)}')
34 |         if ignore_keys is None:
35 |             self.ignore_keys = []
36 |         elif isinstance(ignore_keys, str):
37 |             self.ignore_keys = [ignore_keys]
38 |         elif isinstance(ignore_keys, list):
39 |             self.ignore_keys = ignore_keys
40 |         else:
41 |             raise TypeError('ignore_keys should be a list or str, '
42 |                             f'but got {type(ignore_keys)}')
43 | 
44 |         meta_keys: set = set()
45 |         for dataset in self.datasets:
46 |             meta_keys |= dataset.metainfo.keys()
47 |         # Only use metainfo of first dataset.
48 |         self._metainfo = self.datasets[0].metainfo
49 | 
50 |         self._fully_initialized = False
51 |         if not lazy_init:
52 |             self.full_init()
53 | 


--------------------------------------------------------------------------------
/unidet3d/data_preprocessor.py:
--------------------------------------------------------------------------------
 1 | # Copied from mmdet3d/models/data_preprocessors/data_preprocessor.py
 2 | from mmdet3d.models.data_preprocessors.data_preprocessor import \
 3 |     Det3DDataPreprocessor
 4 | from mmdet3d.registry import MODELS
 5 | 
 6 | 
 7 | @MODELS.register_module()
 8 | class Det3DDataPreprocessor_(Det3DDataPreprocessor):
 9 |     """
10 |     We add only this 2 lines:
11 |     if 'elastic_coords' in inputs:
12 |         batch_inputs['elastic_coords'] = inputs['elastic_coords']
13 |     """
14 |     def simple_process(self, data, training=False):
15 |         """Perform normalization, padding and bgr2rgb conversion for img data
16 |         based on ``BaseDataPreprocessor``, and voxelize point cloud if `voxel`
17 |         is set to be True.
18 | 
19 |         Args:
20 |             data (dict): Data sampled from dataloader.
21 |             training (bool): Whether to enable training time augmentation.
22 |                 Defaults to False.
23 | 
24 |         Returns:
25 |             dict: Data in the same format as the model input.
26 |         """
27 |         if 'img' in data['inputs']:
28 |             batch_pad_shape = self._get_pad_shape(data)
29 | 
30 |         data = self.collate_data(data)
31 |         inputs, data_samples = data['inputs'], data['data_samples']
32 |         batch_inputs = dict()
33 | 
34 |         if 'points' in inputs:
35 |             batch_inputs['points'] = inputs['points']
36 | 
37 |             if self.voxel:
38 |                 voxel_dict = self.voxelize(inputs['points'], data_samples)
39 |                 batch_inputs['voxels'] = voxel_dict
40 | 
41 |         if 'elastic_coords' in inputs:
42 |             batch_inputs['elastic_coords'] = inputs['elastic_coords']
43 | 
44 |         if 'imgs' in inputs:
45 |             imgs = inputs['imgs']
46 | 
47 |             if data_samples is not None:
48 |                 # NOTE the batched image size information may be useful, e.g.
49 |                 # in DETR, this is needed for the construction of masks, which
50 |                 # is then used for the transformer_head.
51 |                 batch_input_shape = tuple(imgs[0].size()[-2:])
52 |                 for data_sample, pad_shape in zip(data_samples,
53 |                                                   batch_pad_shape):
54 |                     data_sample.set_metainfo({
55 |                         'batch_input_shape': batch_input_shape,
56 |                         'pad_shape': pad_shape
57 |                     })
58 | 
59 |                 if hasattr(self, 'boxtype2tensor') and self.boxtype2tensor:
60 |                     from mmdet.models.utils.misc import \
61 |                         samplelist_boxtype2tensor
62 |                     samplelist_boxtype2tensor(data_samples)
63 |                 elif hasattr(self, 'boxlist2tensor') and self.boxlist2tensor:
64 |                     from mmdet.models.utils.misc import \
65 |                         samplelist_boxlist2tensor
66 |                     samplelist_boxlist2tensor(data_samples)
67 |                 if self.pad_mask:
68 |                     self.pad_gt_masks(data_samples)
69 | 
70 |                 if self.pad_seg:
71 |                     self.pad_gt_sem_seg(data_samples)
72 | 
73 |             if training and self.batch_augments is not None:
74 |                 for batch_aug in self.batch_augments:
75 |                     imgs, data_samples = batch_aug(imgs, data_samples)
76 |             batch_inputs['imgs'] = imgs
77 | 
78 |         return {'inputs': batch_inputs, 'data_samples': data_samples}
79 | 


--------------------------------------------------------------------------------
/unidet3d/formatting.py:
--------------------------------------------------------------------------------
  1 | # Adapted from mmdet3d/datasets/transforms/formating.py
  2 | import numpy as np
  3 | from .structures import InstanceData_
  4 | from mmdet3d.datasets.transforms import Pack3DDetInputs
  5 | from mmdet3d.datasets.transforms.formating import to_tensor
  6 | from mmdet3d.registry import TRANSFORMS
  7 | from mmdet3d.structures import BaseInstance3DBoxes, Det3DDataSample, PointData
  8 | from mmdet3d.structures.points import BasePoints
  9 | 
 10 | 
 11 | @TRANSFORMS.register_module()
 12 | class Pack3DDetInputs_(Pack3DDetInputs):
 13 |     """Just add elastic_coords, sp_pts_mask, and gt_sp_masks.
 14 |     """
 15 |     INPUTS_KEYS = ['points', 'img', 'elastic_coords']
 16 |     SEG_KEYS = [
 17 |         'gt_seg_map',
 18 |         'pts_instance_mask',
 19 |         'pts_semantic_mask',
 20 |         'gt_semantic_seg',
 21 |         'sp_pts_mask',
 22 |     ]
 23 |     INSTANCEDATA_3D_KEYS = [
 24 |         'gt_bboxes_3d', 'gt_labels_3d', 'attr_labels', 'depths', 'centers_2d',
 25 |         'gt_sp_masks'
 26 |     ]
 27 | 
 28 |     def pack_single_results(self, results: dict) -> dict:
 29 |         """Method to pack the single input data. when the value in this dict is
 30 |         a list, it usually is in Augmentations Testing.
 31 | 
 32 |         Args:
 33 |             results (dict): Result dict from the data pipeline.
 34 | 
 35 |         Returns:
 36 |             dict: A dict contains
 37 | 
 38 |             - 'inputs' (dict): The forward data of models. It usually contains
 39 |               following keys:
 40 | 
 41 |                 - points
 42 |                 - img
 43 | 
 44 |             - 'data_samples' (:obj:`Det3DDataSample`): The annotation info
 45 |               of the sample.
 46 |         """
 47 |         # Format 3D data
 48 |         if 'points' in results:
 49 |             if isinstance(results['points'], BasePoints):
 50 |                 results['points'] = results['points'].tensor
 51 | 
 52 |         if 'img' in results:
 53 |             if isinstance(results['img'], list):
 54 |                 # process multiple imgs in single frame
 55 |                 imgs = np.stack(results['img'], axis=0)
 56 |                 if imgs.flags.c_contiguous:
 57 |                     imgs = to_tensor(imgs).permute(0, 3, 1, 2).contiguous()
 58 |                 else:
 59 |                     imgs = to_tensor(
 60 |                         np.ascontiguousarray(imgs.transpose(0, 3, 1, 2)))
 61 |                 results['img'] = imgs
 62 |             else:
 63 |                 img = results['img']
 64 |                 if len(img.shape) < 3:
 65 |                     img = np.expand_dims(img, -1)
 66 |                 # To improve the computational speed by by 3-5 times, apply:
 67 |                 # `torch.permute()` rather than `np.transpose()`.
 68 |                 # Refer to https://github.com/open-mmlab/mmdetection/pull/9533
 69 |                 # for more details
 70 |                 if img.flags.c_contiguous:
 71 |                     img = to_tensor(img).permute(2, 0, 1).contiguous()
 72 |                 else:
 73 |                     img = to_tensor(
 74 |                         np.ascontiguousarray(img.transpose(2, 0, 1)))
 75 |                 results['img'] = img
 76 | 
 77 |         for key in [
 78 |                 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',
 79 |                 'gt_bboxes_labels', 'attr_labels', 'pts_instance_mask',
 80 |                 'pts_semantic_mask', 'sp_pts_mask', 'gt_sp_masks',
 81 |                 'elastic_coords', 'centers_2d', 'depths', 'gt_labels_3d'
 82 |         ]:
 83 |             if key not in results:
 84 |                 continue
 85 |             if isinstance(results[key], list):
 86 |                 results[key] = [to_tensor(res) for res in results[key]]
 87 |             else:
 88 |                 results[key] = to_tensor(results[key])
 89 |         if 'gt_bboxes_3d' in results:
 90 |             if not isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):
 91 |                 results['gt_bboxes_3d'] = to_tensor(results['gt_bboxes_3d'])
 92 | 
 93 |         if 'gt_semantic_seg' in results:
 94 |             results['gt_semantic_seg'] = to_tensor(
 95 |                 results['gt_semantic_seg'][None])
 96 |         if 'gt_seg_map' in results:
 97 |             results['gt_seg_map'] = results['gt_seg_map'][None, ...]
 98 | 
 99 |         data_sample = Det3DDataSample()
100 |         gt_instances_3d = InstanceData_()
101 |         gt_instances = InstanceData_()
102 |         gt_pts_seg = PointData()
103 | 
104 |         img_metas = {}
105 |         for key in self.meta_keys:
106 |             if key in results:
107 |                 img_metas[key] = results[key]
108 |         data_sample.set_metainfo(img_metas)
109 | 
110 |         inputs = {}
111 |         for key in self.keys:
112 |             if key in results:
113 |                 if key in self.INPUTS_KEYS:
114 |                     inputs[key] = results[key]
115 |                 elif key in self.INSTANCEDATA_3D_KEYS:
116 |                     gt_instances_3d[self._remove_prefix(key)] = results[key]
117 |                 elif key in self.INSTANCEDATA_2D_KEYS:
118 |                     if key == 'gt_bboxes_labels':
119 |                         gt_instances['labels'] = results[key]
120 |                     else:
121 |                         gt_instances[self._remove_prefix(key)] = results[key]
122 |                 elif key in self.SEG_KEYS:
123 |                     gt_pts_seg[self._remove_prefix(key)] = results[key]
124 |                 else:
125 |                     raise NotImplementedError(f'Please modified '
126 |                                               f'`Pack3DDetInputs` '
127 |                                               f'to put {key} to '
128 |                                               f'corresponding field')
129 | 
130 |         data_sample.gt_instances_3d = gt_instances_3d
131 |         data_sample.gt_instances = gt_instances
132 |         data_sample.gt_pts_seg = gt_pts_seg
133 |         if 'eval_ann_info' in results:
134 |             data_sample.eval_ann_info = results['eval_ann_info']
135 |         else:
136 |             data_sample.eval_ann_info = None
137 | 
138 |         packed_results = dict()
139 |         packed_results['data_samples'] = data_sample
140 |         packed_results['inputs'] = inputs
141 | 
142 |         return packed_results
143 | 


--------------------------------------------------------------------------------
/unidet3d/image_vis.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import copy
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | from matplotlib import pyplot as plt
  8 | 
  9 | 
 10 | def project_pts_on_img(points,
 11 |                        raw_img,
 12 |                        lidar2img_rt,
 13 |                        max_distance=70,
 14 |                        thickness=-1):
 15 |     """Project the 3D points cloud on 2D image.
 16 | 
 17 |     Args:
 18 |         points (numpy.array): 3D points cloud (x, y, z) to visualize.
 19 |         raw_img (numpy.array): The numpy array of image.
 20 |         lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
 21 |             according to the camera intrinsic parameters.
 22 |         max_distance (float, optional): the max distance of the points cloud.
 23 |             Default: 70.
 24 |         thickness (int, optional): The thickness of 2D points. Default: -1.
 25 |     """
 26 |     img = raw_img.copy()
 27 |     num_points = points.shape[0]
 28 |     pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
 29 |     pts_2d = pts_4d @ lidar2img_rt.T
 30 | 
 31 |     # cam_points is Tensor of Nx4 whose last column is 1
 32 |     # transform camera coordinate to image coordinate
 33 |     pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
 34 |     pts_2d[:, 0] /= pts_2d[:, 2]
 35 |     pts_2d[:, 1] /= pts_2d[:, 2]
 36 | 
 37 |     fov_inds = ((pts_2d[:, 0] < img.shape[1])
 38 |                 & (pts_2d[:, 0] >= 0)
 39 |                 & (pts_2d[:, 1] < img.shape[0])
 40 |                 & (pts_2d[:, 1] >= 0))
 41 | 
 42 |     imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d
 43 | 
 44 |     cmap = plt.cm.get_cmap('hsv', 256)
 45 |     cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
 46 |     for i in range(imgfov_pts_2d.shape[0]):
 47 |         depth = imgfov_pts_2d[i, 2]
 48 |         color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
 49 |         cv2.circle(
 50 |             img,
 51 |             center=(int(np.round(imgfov_pts_2d[i, 0])),
 52 |                     int(np.round(imgfov_pts_2d[i, 1]))),
 53 |             radius=1,
 54 |             color=tuple(color),
 55 |             thickness=thickness,
 56 |         )
 57 |     cv2.imshow('project_pts_img', img.astype(np.uint8))
 58 |     cv2.waitKey(100)
 59 | 
 60 | 
 61 | def plot_rect3d_on_img(img,
 62 |                        num_rects,
 63 |                        rect_corners,
 64 |                        color=(0, 255, 0),
 65 |                        thickness=1):
 66 |     """Plot the boundary lines of 3D rectangular on 2D images.
 67 | 
 68 |     Args:
 69 |         img (numpy.array): The numpy array of image.
 70 |         num_rects (int): Number of 3D rectangulars.
 71 |         rect_corners (numpy.array): Coordinates of the corners of 3D
 72 |             rectangulars. Should be in the shape of [num_rect, 8, 2].
 73 |         color (tuple[int], optional): The color to draw bboxes.
 74 |             Default: (0, 255, 0).
 75 |         thickness (int, optional): The thickness of bboxes. Default: 1.
 76 |     """
 77 |     line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
 78 |                     (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
 79 |     for i in range(num_rects):
 80 |         corners = rect_corners[i].astype(np.int)
 81 |         for start, end in line_indices:
 82 |             cv2.line(img, (corners[start, 0], corners[start, 1]),
 83 |                      (corners[end, 0], corners[end, 1]), color, thickness,
 84 |                      cv2.LINE_AA)
 85 | 
 86 |     return img.astype(np.uint8)
 87 | 
 88 | 
 89 | def draw_lidar_bbox3d_on_img(bboxes3d,
 90 |                              raw_img,
 91 |                              lidar2img_rt,
 92 |                              img_metas,
 93 |                              color=(0, 255, 0),
 94 |                              thickness=1):
 95 |     """Project the 3D bbox on 2D plane and draw on input image.
 96 | 
 97 |     Args:
 98 |         bboxes3d (:obj:`LiDARInstance3DBoxes`):
 99 |             3d bbox in lidar coordinate system to visualize.
100 |         raw_img (numpy.array): The numpy array of image.
101 |         lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
102 |             according to the camera intrinsic parameters.
103 |         img_metas (dict): Useless here.
104 |         color (tuple[int], optional): The color to draw bboxes.
105 |             Default: (0, 255, 0).
106 |         thickness (int, optional): The thickness of bboxes. Default: 1.
107 |     """
108 |     img = raw_img.copy()
109 |     corners_3d = bboxes3d.corners
110 |     num_bbox = corners_3d.shape[0]
111 |     pts_4d = np.concatenate(
112 |         [corners_3d.reshape(-1, 3),
113 |          np.ones((num_bbox * 8, 1))], axis=-1)
114 |     lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
115 |     if isinstance(lidar2img_rt, torch.Tensor):
116 |         lidar2img_rt = lidar2img_rt.cpu().numpy()
117 |     pts_2d = pts_4d @ lidar2img_rt.T
118 | 
119 |     pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
120 |     pts_2d[:, 0] /= pts_2d[:, 2]
121 |     pts_2d[:, 1] /= pts_2d[:, 2]
122 |     imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
123 | 
124 |     return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
125 | 
126 | 
127 | # TODO: remove third parameter in all functions here in favour of img_metas
128 | def draw_depth_bbox3d_on_img(bboxes3d,
129 |                              raw_img,
130 |                              calibs,
131 |                              img_metas,
132 |                              color=(0, 255, 0),
133 |                              thickness=1):
134 |     """Project the 3D bbox on 2D plane and draw on input image.
135 | 
136 |     Args:
137 |         bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
138 |             3d bbox in depth coordinate system to visualize.
139 |         raw_img (numpy.array): The numpy array of image.
140 |         calibs (dict): Camera calibration information, Rt and K.
141 |         img_metas (dict): Used in coordinates transformation.
142 |         color (tuple[int], optional): The color to draw bboxes.
143 |             Default: (0, 255, 0).
144 |         thickness (int, optional): The thickness of bboxes. Default: 1.
145 |     """
146 |     from mmdet3d.core.bbox import points_cam2img
147 |     from mmdet3d.models import apply_3d_transformation
148 | 
149 |     img = raw_img.copy()
150 |     img_metas = copy.deepcopy(img_metas)
151 |     corners_3d = bboxes3d.corners
152 |     num_bbox = corners_3d.shape[0]
153 |     points_3d = corners_3d.reshape(-1, 3)
154 | 
155 |     # first reverse the data transformations
156 |     xyz_depth = apply_3d_transformation(
157 |         points_3d, 'DEPTH', img_metas, reverse=True)
158 | 
159 |     # project to 2d to get image coords (uv)
160 |     uv_origin = points_cam2img(xyz_depth,
161 |                                xyz_depth.new_tensor(img_metas['depth2img']))
162 |     uv_origin = (uv_origin - 1).round()
163 |     imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
164 | 
165 |     return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
166 | 
167 | 
168 | def draw_camera_bbox3d_on_img(bboxes3d,
169 |                               raw_img,
170 |                               cam2img,
171 |                               img_metas,
172 |                               color=(0, 255, 0),
173 |                               thickness=1):
174 |     """Project the 3D bbox on 2D plane and draw on input image.
175 | 
176 |     Args:
177 |         bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
178 |             3d bbox in camera coordinate system to visualize.
179 |         raw_img (numpy.array): The numpy array of image.
180 |         cam2img (dict): Camera intrinsic matrix,
181 |             denoted as `K` in depth bbox coordinate system.
182 |         img_metas (dict): Useless here.
183 |         color (tuple[int], optional): The color to draw bboxes.
184 |             Default: (0, 255, 0).
185 |         thickness (int, optional): The thickness of bboxes. Default: 1.
186 |     """
187 |     from mmdet3d.core.bbox import points_cam2img
188 | 
189 |     img = raw_img.copy()
190 |     cam2img = copy.deepcopy(cam2img)
191 |     corners_3d = bboxes3d.corners
192 |     num_bbox = corners_3d.shape[0]
193 |     points_3d = corners_3d.reshape(-1, 3)
194 |     if not isinstance(cam2img, torch.Tensor):
195 |         cam2img = torch.from_numpy(np.array(cam2img))
196 | 
197 |     assert (cam2img.shape == torch.Size([3, 3])
198 |             or cam2img.shape == torch.Size([4, 4]))
199 |     cam2img = cam2img.float().cpu()
200 | 
201 |     # project to 2d to get image coords (uv)
202 |     uv_origin = points_cam2img(points_3d, cam2img)
203 |     uv_origin = (uv_origin - 1).round()
204 |     imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
205 | 
206 |     return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)


--------------------------------------------------------------------------------
/unidet3d/indoor_metric.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from typing import Dict, List, Optional, Sequence
  3 | 
  4 | from mmengine.evaluator import BaseMetric
  5 | from mmengine.logging import MMLogger
  6 | 
  7 | from .indoor_eval import indoor_eval
  8 | from mmdet3d.registry import METRICS
  9 | from mmdet3d.structures import get_box_type
 10 | from .show_results import show_result_v2
 11 | from pathlib import Path
 12 | 
 13 | @METRICS.register_module()
 14 | class IndoorMetric_(BaseMetric):
 15 |     """Indoor scene evaluation metric.
 16 | 
 17 |     Args:
 18 |         iou_thr (float or List[float]): List of iou threshold when calculate
 19 |             the metric. Defaults to [0.25, 0.5].
 20 |         collect_device (str): Device name used for collecting results from
 21 |             different ranks during distributed training. Must be 'cpu' or
 22 |             'gpu'. Defaults to 'cpu'.
 23 |         prefix (str, optional): The prefix that will be added in the metric
 24 |             names to disambiguate homonymous metrics of different evaluators.
 25 |             If prefix is not provided in the argument, self.default_prefix will
 26 |             be used instead. Defaults to None.
 27 |     """
 28 | 
 29 |     def __init__(self,
 30 |                  datasets,
 31 |                  datasets_classes,
 32 |                  vis_dir: str = None,
 33 |                  iou_thr: List[float] = [0.25, 0.5],
 34 |                  collect_device: str = 'cpu',
 35 |                  prefix: Optional[str] = None) -> None:
 36 |         super(IndoorMetric_, self).__init__(
 37 |             prefix=prefix, collect_device=collect_device)
 38 |         self.iou_thr = [iou_thr] if isinstance(iou_thr, float) else iou_thr
 39 |         self.datasets = datasets
 40 |         self.datasets_classes = datasets_classes
 41 |         self.vis_dir = vis_dir
 42 | 
 43 |     def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
 44 |         """Process one batch of data samples and predictions.
 45 | 
 46 |         The processed results should be stored in ``self.results``, which will
 47 |         be used to compute the metrics when all batches have been processed.
 48 | 
 49 |         Args:
 50 |             data_batch (dict): A batch of data from the dataloader.
 51 |             data_samples (Sequence[dict]): A batch of outputs from the model.
 52 |         """
 53 |         for data_sample in data_samples:
 54 |             pred_3d = data_sample['pred_instances_3d']
 55 |             pred_3d['dataset'] = self.get_dataset(data_sample['lidar_path'])
 56 |             eval_ann_info = data_sample['eval_ann_info']
 57 |             cpu_pred_3d = dict()
 58 |             for k, v in pred_3d.items():
 59 |                 if hasattr(v, 'to'):
 60 |                     cpu_pred_3d[k] = v.to('cpu')
 61 |                 else:
 62 |                     cpu_pred_3d[k] = v
 63 |             self.results.append((eval_ann_info, cpu_pred_3d))
 64 | 
 65 |     def compute_metrics(self, results: list) -> Dict[str, float]:
 66 |         """Compute the metrics from processed results.
 67 | 
 68 |         Args:
 69 |             results (list): The processed results of each batch.
 70 | 
 71 |         Returns:
 72 |             Dict[str, float]: The computed metrics. The keys are the names of
 73 |             the metrics, and the values are corresponding results.
 74 |         """
 75 |         logger: MMLogger = MMLogger.get_current_instance()
 76 |         ann_infos = [[] for _ in self.datasets]
 77 |         pred_results = [[] for _ in self.datasets]
 78 | 
 79 |         for eval_ann, sinlge_pred_results in results:
 80 |             idx = self.datasets.index(sinlge_pred_results['dataset'])
 81 |             ann_infos[idx].append(eval_ann)
 82 |             pred_results[idx].append(sinlge_pred_results)
 83 |             if self.vis_dir is not None:
 84 |                 self.vis_results(eval_ann, sinlge_pred_results)
 85 | 
 86 |         # some checkpoints may not record the key "box_type_3d"
 87 |         box_type_3d, box_mode_3d = get_box_type(
 88 |             self.dataset_meta.get('box_type_3d', 'depth'))
 89 | 
 90 |         ret_dict = {}
 91 |         for i in range(len(self.datasets)):
 92 |             ret_dict[self.datasets[i]] = indoor_eval(
 93 |                                                 ann_infos[i],
 94 |                                                 pred_results[i],
 95 |                                                 self.iou_thr,
 96 |                                                 self.datasets_classes[i],
 97 |                                                 logger=logger,
 98 |                                                 box_mode_3d=box_mode_3d)
 99 | 
100 |         return ret_dict
101 | 
102 |     def get_dataset(self, lidar_path):
103 |         for dataset in self.datasets:
104 |             if dataset in lidar_path.split('/'):
105 |                 return dataset
106 | 
107 |     def vis_results(self, eval_ann, sinlge_pred_results):
108 |         pts = sinlge_pred_results['points'].numpy()
109 |         pts[:, 3:] *= 127.5
110 |         pts[:, 3:] += 127.5
111 |         show_result_v2(pts, eval_ann['gt_bboxes_3d'].corners, 
112 |                     eval_ann['gt_labels_3d'],
113 |                     sinlge_pred_results['bboxes_3d'].corners, 
114 |                     sinlge_pred_results['labels_3d'],
115 |                     Path(self.vis_dir) / sinlge_pred_results['dataset'],
116 |                     eval_ann['lidar_idx'])


--------------------------------------------------------------------------------
/unidet3d/loading.py:
--------------------------------------------------------------------------------
  1 | # Adapted from mmdet3d/datasets/transforms/loading.py
  2 | import mmengine
  3 | import numpy as np
  4 | 
  5 | from mmdet3d.datasets.transforms import LoadAnnotations3D
  6 | from mmdet3d.datasets.transforms.loading import get
  7 | from mmdet3d.datasets.transforms.loading import NormalizePointsColor
  8 | from mmdet3d.registry import TRANSFORMS
  9 | 
 10 | 
 11 | @TRANSFORMS.register_module()
 12 | class LoadAnnotations3D_(LoadAnnotations3D):
 13 |     """Just add super point mask loading.
 14 |     
 15 |     Args:
 16 |         with_sp_mask_3d (bool): Whether to load super point maks. 
 17 |     """
 18 | 
 19 |     def __init__(self, with_sp_mask_3d, **kwargs):
 20 |         self.with_sp_mask_3d = with_sp_mask_3d
 21 |         super().__init__(**kwargs)
 22 | 
 23 |     def _load_sp_pts_3d(self, results):
 24 |         """Private function to load 3D superpoints mask annotations.
 25 | 
 26 |         Args:
 27 |             results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
 28 | 
 29 |         Returns:
 30 |             dict: The dict containing loaded 3D mask annotations.
 31 |         """
 32 |         sp_pts_mask_path = results['super_pts_path']
 33 | 
 34 |         try:
 35 |             mask_bytes = get(
 36 |                 sp_pts_mask_path, backend_args=self.backend_args)
 37 |             # add .copy() to fix read-only bug
 38 |             sp_pts_mask = np.frombuffer(
 39 |                 mask_bytes, dtype=np.int64).copy()
 40 |         except ConnectionError:
 41 |             mmengine.check_file_exist(sp_pts_mask_path)
 42 |             sp_pts_mask = np.fromfile(
 43 |                 sp_pts_mask_path, dtype=np.int64)
 44 | 
 45 |         results['sp_pts_mask'] = sp_pts_mask
 46 | 
 47 |         # 'eval_ann_info' will be passed to evaluator
 48 |         if 'eval_ann_info' in results:
 49 |             results['eval_ann_info']['sp_pts_mask'] = sp_pts_mask
 50 |             results['eval_ann_info']['lidar_idx'] = \
 51 |                 sp_pts_mask_path.split("/")[-1][:-4]
 52 |         return results
 53 | 
 54 |     def transform(self, results: dict) -> dict:
 55 |         """Function to load multiple types annotations.
 56 | 
 57 |         Args:
 58 |             results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
 59 | 
 60 |         Returns:
 61 |             dict: The dict containing loaded 3D bounding box, label, mask and
 62 |                 semantic segmentation annotations.
 63 |         """
 64 |         results = super().transform(results)
 65 |         if self.with_sp_mask_3d:
 66 |             results = self._load_sp_pts_3d(results)
 67 |         return results
 68 | 
 69 | 
 70 | @TRANSFORMS.register_module()
 71 | class NormalizePointsColor_(NormalizePointsColor):
 72 |     """Just add color_std parameter.
 73 | 
 74 |     Args:
 75 |         color_mean (list[float]): Mean color of the point cloud.
 76 |         color_std (list[float]): Std color of the point cloud.
 77 |             Default value is from SPFormer preprocessing.
 78 |     """
 79 | 
 80 |     def __init__(self, color_mean, color_std=127.5):
 81 |         self.color_mean = color_mean
 82 |         self.color_std = color_std
 83 | 
 84 |     def transform(self, input_dict):
 85 |         """Call function to normalize color of points.
 86 | 
 87 |         Args:
 88 |             results (dict): Result dict containing point clouds data.
 89 | 
 90 |         Returns:
 91 |             dict: The result dict containing the normalized points.
 92 |             Updated key and value are described below.
 93 |                 - points (:obj:`BasePoints`): Points after color normalization.
 94 |         """
 95 |         points = input_dict['points']
 96 |         assert points.attribute_dims is not None and \
 97 |                'color' in points.attribute_dims.keys(), \
 98 |                'Expect points have color attribute'
 99 |         if self.color_mean is not None:
100 |             points.color = points.color - \
101 |                            points.color.new_tensor(self.color_mean)
102 |         if self.color_std is not None:
103 |             points.color = points.color / \
104 |                 points.color.new_tensor(self.color_std)
105 |         input_dict['points'] = points
106 |         return input_dict
107 | 
108 | 
109 | @TRANSFORMS.register_module()
110 | class DenormalizePointsColor(NormalizePointsColor):
111 |     """Denormalize points colors.
112 | 
113 |     Args:
114 |         color_mean (list[float]): Mean color of the point cloud.
115 |         color_std (list[float]): Std color of the point cloud.
116 |             Default value is from SPFormer preprocessing.
117 |     """
118 | 
119 |     def __init__(self, color_mean, color_std):
120 |         self.color_mean = color_mean
121 |         self.color_std = color_std
122 | 
123 |     def transform(self, input_dict):
124 |         """Call function to normalize color of points.
125 | 
126 |         Args:
127 |             results (dict): Result dict containing point clouds data.
128 | 
129 |         Returns:
130 |             dict: The result dict containing the normalized points.
131 |             Updated key and value are described below.
132 |                 - points (:obj:`BasePoints`): Points after color normalization.
133 |         """
134 |         points = input_dict['points']
135 |         assert points.attribute_dims is not None and \
136 |                'color' in points.attribute_dims.keys(), \
137 |                'Expect points have color attribute'
138 |         if self.color_std is not None:
139 |             points.color = points.color * \
140 |                 points.color.new_tensor(self.color_std)
141 |         if self.color_mean is not None:
142 |             points.color = points.color + \
143 |                            points.color.new_tensor(self.color_mean)
144 | 
145 |         input_dict['points'] = points
146 |         return input_dict


--------------------------------------------------------------------------------
/unidet3d/multiscan_dataset.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | from mmdet3d.datasets import Det3DDataset
  3 | from mmdet3d.registry import DATASETS
  4 | from mmdet3d.structures import DepthInstance3DBoxes
  5 | import os.path as osp
  6 | from mmengine.logging import print_log
  7 | import logging
  8 | import numpy as np
  9 | 
 10 | @DATASETS.register_module()
 11 | class MultiScan(Det3DDataset):
 12 |     """MultiScan dataset.
 13 | 
 14 |     Args:
 15 |         data_prefix (dict): Prefix for data. Defaults to
 16 |             dict(pts='points', pts_instance_mask='instance_mask',
 17 |                      pts_semantic_mask='semantic_mask').
 18 |         box_type_3d (str): Type of 3D box of this dataset.
 19 |             Based on the `box_type_3d`, the dataset will encapsulate the box
 20 |             to its original format then converted them to `box_type_3d`.
 21 |             Defaults to 'Depth'.
 22 |     """
 23 |     METAINFO = {
 24 |         'classes':
 25 |         # ('door', 'table',  'chair',  'cabinet',  'window',  'sofa',  'microwave',  'pillow',  
 26 |         #  'tv_monitor',  'curtain',  'trash_can',  'suitcase',  'sink',  'backpack',  'bed',  
 27 |         #  'refrigerator',  'toilet',  'no_target')
 28 |         ('door', 'table',  'chair',  'cabinet',  'window',  'sofa',  'microwave',  'pillow',  
 29 |          'tv_monitor',  'curtain',  'trash_can',  'suitcase',  'sink',  'backpack',  'bed',  
 30 |          'refrigerator',  'toilet')
 31 |     }
 32 |     
 33 |     def __init__(self,
 34 |                  data_prefix=dict(
 35 |                      pts='points',
 36 |                      pts_instance_mask='instance_mask',
 37 |                      pts_semantic_mask='semantic_mask'),
 38 |                  box_type_3d='Depth',
 39 |                  **kwargs):
 40 |         super().__init__(
 41 |             data_prefix=data_prefix, box_type_3d=box_type_3d, **kwargs)
 42 | 
 43 |     def parse_ann_info(self, info):
 44 |         """Process the `instances` in data info to `ann_info`.
 45 | 
 46 |         Args:
 47 |             info (dict): Info dict.
 48 | 
 49 |         Returns:
 50 |             dict: Processed `ann_info`
 51 |         """
 52 |         ann_info = super().parse_ann_info(info)
 53 |         if ann_info is None:
 54 |             ann_info = dict()
 55 |             ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
 56 |             ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
 57 |         
 58 |         ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
 59 |             ann_info['gt_bboxes_3d'],
 60 |             origin=(0.5, 0.5, 0.5), box_dim=6,
 61 |             with_yaw=False).convert_to(self.box_mode_3d)
 62 | 
 63 |         return ann_info
 64 | 
 65 | @DATASETS.register_module()
 66 | class MultiScan_(MultiScan):
 67 |     """MultiScan dataset with partition.
 68 | 
 69 |     Args:
 70 |         partition(float): Defaults to 1, the part of 
 71 |             the dataset that will be used.
 72 |     """
 73 |     METAINFO = {
 74 |         'classes':
 75 |         ('door', 'table',  'chair',  'cabinet',  'window',  'sofa',  'microwave',  'pillow',  
 76 |          'tv_monitor',  'curtain',  'trash_can',  'suitcase',  'sink',  'backpack',  'bed',  
 77 |          'refrigerator',  'toilet'), 
 78 |         'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)
 79 |     }
 80 | 
 81 |     def __init__(self,
 82 |                  partition: float = 1,
 83 |                  **kwargs) -> None:
 84 |         self.partition = partition
 85 |         super().__init__(**kwargs)
 86 | 
 87 |     def parse_ann_info(self, info: dict) -> Union[dict, None]:
 88 |         """Process the `instances` in data info to `ann_info`.
 89 | 
 90 |         In `Custom3DDataset`, we simply concatenate all the field
 91 |         in `instances` to `np.ndarray`, you can do the specific
 92 |         process in subclass. You have to convert `gt_bboxes_3d`
 93 |         to different coordinates according to the task.
 94 | 
 95 |         Args:
 96 |             info (dict): Info dict.
 97 | 
 98 |         Returns:
 99 |             dict or None: Processed `ann_info`.
100 |         """
101 |         ids = {c: i for i, c in enumerate(self.metainfo['valid_class_ids'])}
102 |         instances = []
103 |         for instance in info['instances']:
104 |             if instance['bbox_label_3d'] in ids:
105 |                 instance['bbox_label_3d'] = ids[instance['bbox_label_3d']]
106 |                 instances.append(instance)
107 |         info['instances'] = instances
108 |         return super().parse_ann_info(info)
109 | 
110 |     def __getitem__(self, idx: int) -> dict:
111 |         """Get the idx-th image and data information of dataset after
112 |         ``self.pipeline``, and ``full_init`` will be called if the dataset has
113 |         not been fully initialized.
114 | 
115 |         During training phase, if ``self.pipeline`` get ``None``,
116 |         ``self._rand_another`` will be called until a valid image is fetched or
117 |          the maximum limit of refetech is reached.
118 | 
119 |         Args:
120 |             idx (int): The index of self.data_list.
121 | 
122 |         Returns:
123 |             dict: The idx-th image and data information of dataset after
124 |             ``self.pipeline``.
125 |         """
126 |         # Performing full initialization by calling `__getitem__` will consume
127 |         # extra memory. If a dataset is not fully initialized by setting
128 |         # `lazy_init=True` and then fed into the dataloader. Different workers
129 |         # will simultaneously read and parse the annotation. It will cost more
130 |         # time and memory, although this may work. Therefore, it is recommended
131 |         # to manually call `full_init` before dataset fed into dataloader to
132 |         # ensure all workers use shared RAM from master process.
133 | 
134 |         if not self.test_mode:
135 |             if self.serialize_data:
136 |                 dataset_len = len(self.data_address)
137 |             else:
138 |                 dataset_len = len(self.data_list)
139 |             idx = np.random.randint(0, dataset_len)
140 |         if not self._fully_initialized:
141 |             print_log(
142 |                 'Please call `full_init()` method manually to accelerate '
143 |                 'the speed.',
144 |                 logger='current',
145 |                 level=logging.WARNING)
146 |             self.full_init()
147 | 
148 |         if self.test_mode:
149 |             data = self.prepare_data(idx)
150 |             if data is None:
151 |                 raise Exception('Test time pipline should not get `None` '
152 |                                 'data_sample')
153 |             return data
154 | 
155 |         for _ in range(self.max_refetch + 1):
156 |             data = self.prepare_data(idx)
157 |             # Broken images or random augmentations may cause the returned data
158 |             # to be None
159 |             if data is None:
160 |                 idx = self._rand_another()
161 |                 continue
162 |             return data
163 | 
164 |     def __len__(self) -> int:
165 |         """Get the length of filtered dataset and automatically call
166 |         ``full_init`` if the  dataset has not been fully init.
167 | 
168 |         Returns:
169 |             int: The length of filtered dataset.
170 |         """
171 | 
172 |         if self.serialize_data:
173 |             dataset_len = len(self.data_address)
174 |         else:
175 |             dataset_len = len(self.data_list)
176 |         if not self.test_mode:
177 |             return int(self.partition * dataset_len)
178 |         else:
179 |             return dataset_len
180 | 
181 |     def parse_data_info(self, info: dict) -> dict:
182 |         """Process the raw data info.
183 | 
184 |         Args:
185 |             info (dict): Raw info dict.
186 | 
187 |         Returns:
188 |             dict: Has `ann_info` in training stage. And
189 |             all path has been converted to absolute path.
190 |         """
191 |         info['super_pts_path'] = osp.join(
192 |             self.data_prefix.get('sp_pts_mask', ''), 
193 |             info['lidar_points']['lidar_path']) #info['super_pts_path']
194 | 
195 |         info = super().parse_data_info(info)
196 | 
197 |         return info


--------------------------------------------------------------------------------
/unidet3d/rotated_iou_loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from typing import Optional
  3 | 
  4 | import torch
  5 | from mmcv.ops.diff_iou_rotated import box2corners, oriented_box_intersection_2d
  6 | from mmdet.models.losses.utils import weighted_loss
  7 | from torch import Tensor
  8 | from torch import nn as nn
  9 | 
 10 | from mmdet3d.models import rotated_iou_3d_loss
 11 | from mmdet3d.registry import MODELS
 12 | 
 13 | 
 14 | def diff_diou_rotated_3d(box3d1: Tensor, box3d2: Tensor) -> Tensor:
 15 |     """Calculate differentiable DIoU of rotated 3d boxes.
 16 | 
 17 |     Args:
 18 |         box3d1 (Tensor): (B, N, 3+3+1) First box (x,y,z,w,h,l,alpha).
 19 |         box3d2 (Tensor): (B, N, 3+3+1) Second box (x,y,z,w,h,l,alpha).
 20 |     Returns:
 21 |         Tensor: (B, N) DIoU.
 22 |     """
 23 |     box1 = box3d1[..., [0, 1, 3, 4, 6]]
 24 |     box2 = box3d2[..., [0, 1, 3, 4, 6]]
 25 |     corners1 = box2corners(box1)
 26 |     corners2 = box2corners(box2)
 27 |     intersection, _ = oriented_box_intersection_2d(corners1, corners2)
 28 |     zmax1 = box3d1[..., 2] + box3d1[..., 5] * 0.5
 29 |     zmin1 = box3d1[..., 2] - box3d1[..., 5] * 0.5
 30 |     zmax2 = box3d2[..., 2] + box3d2[..., 5] * 0.5
 31 |     zmin2 = box3d2[..., 2] - box3d2[..., 5] * 0.5
 32 |     z_overlap = (torch.min(zmax1, zmax2) -
 33 |                  torch.max(zmin1, zmin2)).clamp_(min=0.)
 34 |     intersection_3d = intersection * z_overlap
 35 |     volume1 = box3d1[..., 3] * box3d1[..., 4] * box3d1[..., 5]
 36 |     volume2 = box3d2[..., 3] * box3d2[..., 4] * box3d2[..., 5]
 37 |     union_3d = volume1 + volume2 - intersection_3d
 38 | 
 39 |     x1_max = torch.max(corners1[..., 0], dim=2)[0]
 40 |     x1_min = torch.min(corners1[..., 0], dim=2)[0]
 41 |     y1_max = torch.max(corners1[..., 1], dim=2)[0]
 42 |     y1_min = torch.min(corners1[..., 1], dim=2)[0]
 43 | 
 44 |     x2_max = torch.max(corners2[..., 0], dim=2)[0]
 45 |     x2_min = torch.min(corners2[..., 0], dim=2)[0]
 46 |     y2_max = torch.max(corners2[..., 1], dim=2)[0]
 47 |     y2_min = torch.min(corners2[..., 1], dim=2)[0]
 48 | 
 49 |     x_max = torch.max(x1_max, x2_max)
 50 |     x_min = torch.min(x1_min, x2_min)
 51 |     y_max = torch.max(y1_max, y2_max)
 52 |     y_min = torch.min(y1_min, y2_min)
 53 | 
 54 |     z_max = torch.max(zmax1, zmax2)
 55 |     z_min = torch.min(zmin1, zmin2)
 56 | 
 57 |     r2 = ((box1[..., :3] - box2[..., :3])**2).sum(dim=-1)
 58 |     c2 = (x_min - x_max)**2 + (y_min - y_max)**2 + (z_min - z_max)**2
 59 | 
 60 |     return intersection_3d / union_3d - r2 / c2
 61 | 
 62 | 
 63 | @weighted_loss
 64 | def rotated_diou_3d_loss(pred: Tensor, target: Tensor) -> Tensor:
 65 |     """Calculate the DIoU loss (1-DIoU) of two sets of rotated bounding boxes.
 66 |     Note that predictions and targets are one-to-one corresponded.
 67 | 
 68 |     Args:
 69 |         pred (torch.Tensor): Bbox predictions with shape [N, 7]
 70 |             (x, y, z, w, l, h, alpha).
 71 |         target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
 72 |             (x, y, z, w, l, h, alpha).
 73 | 
 74 |     Returns:
 75 |         torch.Tensor: IoU loss between predictions and targets.
 76 |     """
 77 |     if len(pred.shape) == 2:
 78 |         pred, target = pred.unsqueeze(0), target.unsqueeze(0)
 79 |         diou_loss = 1 - diff_diou_rotated_3d(pred, target)[0]
 80 |     else:
 81 |         diou_loss = 1 - diff_diou_rotated_3d(pred, target)
 82 |     return diou_loss
 83 | 
 84 | 
 85 | @MODELS.register_module()
 86 | class UniDet3DRotatedIoU3DLoss(nn.Module):
 87 |     """Calculate the IoU loss (1-IoU) of rotated bounding boxes. The only
 88 |     difference with original RotatedIoU3DLoss is the addition of DIoU mode.
 89 |     These classes should be merged in the future.
 90 | 
 91 |     Args:
 92 |         mode (str): 'iou' for intersection over union or 'diou' for
 93 |             distance-iou loss. Defaults to 'iou'.
 94 |         reduction (str): Method to reduce losses.
 95 |             The valid reduction method are 'none', 'sum' or 'mean'.
 96 |             Defaults to 'mean'.
 97 |         loss_weight (float): Weight of loss. Defaults to 1.0.
 98 |     """
 99 | 
100 |     def __init__(self,
101 |                  mode: str = 'iou',
102 |                  reduction: str = 'mean',
103 |                  loss_weight: float = 1.0) -> None:
104 |         super(UniDet3DRotatedIoU3DLoss, self).__init__()
105 |         assert mode in ['iou', 'diou']
106 |         self.loss = rotated_iou_3d_loss if mode == 'iou' \
107 |             else rotated_diou_3d_loss
108 |         assert reduction in ['none', 'sum', 'mean']
109 |         self.reduction = reduction
110 |         self.loss_weight = loss_weight
111 | 
112 |     def forward(self,
113 |                 pred: Tensor,
114 |                 target: Tensor,
115 |                 weight: Optional[Tensor] = None,
116 |                 avg_factor: Optional[float] = None,
117 |                 reduction_override: Optional[str] = None,
118 |                 **kwargs) -> Tensor:
119 |         """Forward function of loss calculation.
120 | 
121 |         Args:
122 |             pred (Tensor): Bbox predictions with shape [..., 7]
123 |                 (x, y, z, w, l, h, alpha).
124 |             target (Tensor): Bbox targets (gt) with shape [..., 7]
125 |                 (x, y, z, w, l, h, alpha).
126 |             weight (Tensor, optional): Weight of loss.
127 |                 Defaults to None.
128 |             avg_factor (float, optional): Average factor that is used to
129 |                 average the loss. Defaults to None.
130 |             reduction_override (str, optional): Method to reduce losses.
131 |                 The valid reduction method are 'none', 'sum' or 'mean'.
132 |                 Defaults to None.
133 | 
134 |         Returns:
135 |             Tensor: IoU loss between predictions and targets.
136 |         """
137 |         if weight is not None and not torch.any(weight > 0):
138 |             return pred.sum() * weight.sum()  # 0
139 |         assert reduction_override in (None, 'none', 'mean', 'sum')
140 |         reduction = (
141 |             reduction_override if reduction_override else self.reduction)
142 |         if weight is not None and weight.dim() > 1:
143 |             weight = weight.mean(-1)
144 |         loss = self.loss_weight * self.loss(
145 |             pred,
146 |             target,
147 |             weight,
148 |             reduction=reduction,
149 |             avg_factor=avg_factor,
150 |             **kwargs)
151 | 
152 |         return loss


--------------------------------------------------------------------------------
/unidet3d/rscan_dataset.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | import numpy as np
  3 | from mmdet3d.datasets import Det3DDataset
  4 | from mmdet3d.registry import DATASETS
  5 | from mmdet3d.structures import DepthInstance3DBoxes
  6 | import os.path as osp
  7 | from mmengine.logging import print_log
  8 | import logging
  9 | import numpy as np
 10 | 
 11 | @DATASETS.register_module()
 12 | class RScan(Det3DDataset):
 13 |     """RScan dataset.
 14 | 
 15 |     Args:
 16 |         data_prefix (dict): Prefix for data. Defaults to
 17 |             dict(pts='points', pts_instance_mask='instance_mask',
 18 |                      pts_semantic_mask='semantic_mask').
 19 |         box_type_3d (str): Type of 3D box of this dataset.
 20 |             Based on the `box_type_3d`, the dataset will encapsulate the box
 21 |             to its original format then converted them to `box_type_3d`.
 22 |             Defaults to 'Depth'.
 23 |     """
 24 |     METAINFO = {
 25 |         'classes':
 26 |         ('wall',  'floor',  'cabinet',  'bed',  'chair',  'sofa',  'table',  'door',  'window',  'bookshelf',  'picture',  
 27 |          'counter',  'blinds',  'desk',  'shelves',  'curtain',  'dresser',  'pillow',  'mirror',  'floor mat',  'clothes',  
 28 |          'ceiling',  'books',  'fridge',  'television',  'paper',  'towel',  'shower curtain',  'box',  'whiteboard',  'person',  
 29 |          'night stand',  'toilet',  'sink',  'lamp',  'bathtub',  'bag',  'structure',  'furniture',  'prop')
 30 |     }
 31 |     
 32 |     def __init__(self,
 33 |                  data_prefix=dict(
 34 |                      pts='points',
 35 |                      pts_instance_mask='instance_mask',
 36 |                      pts_semantic_mask='semantic_mask'),
 37 |                  box_type_3d='Depth',
 38 |                  **kwargs):
 39 |         super().__init__(
 40 |             data_prefix=data_prefix, box_type_3d=box_type_3d, **kwargs)
 41 | 
 42 |     def parse_ann_info(self, info):
 43 |         """Process the `instances` in data info to `ann_info`.
 44 | 
 45 |         Args:
 46 |             info (dict): Info dict.
 47 | 
 48 |         Returns:
 49 |             dict: Processed `ann_info`
 50 |         """
 51 |         ann_info = super().parse_ann_info(info)
 52 |         if ann_info is None:
 53 |             ann_info = dict()
 54 |             ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
 55 |             ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
 56 |         
 57 |         ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
 58 |             ann_info['gt_bboxes_3d'],
 59 |             origin=(0.5, 0.5, 0.5), box_dim=6,
 60 |             with_yaw=False).convert_to(self.box_mode_3d)
 61 | 
 62 |         return ann_info
 63 | 
 64 | @DATASETS.register_module()
 65 | class ThreeRScan_(RScan):
 66 |     """3RScan dataset with partition.
 67 | 
 68 |     Args:
 69 |         partition(float): Defaults to 1, the part of 
 70 |             the dataset that will be used.
 71 |     """
 72 |     METAINFO = {
 73 |         'classes':
 74 |         ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
 75 |          'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
 76 |         'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'), 
 77 |         'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)
 78 |     }
 79 |     def __init__(self,
 80 |                  partition: float = 1,
 81 |                  **kwargs) -> None:
 82 |         self.partition = partition
 83 |         super().__init__(**kwargs)
 84 | 
 85 |     def parse_ann_info(self, info: dict) -> Union[dict, None]:
 86 |         """Process the `instances` in data info to `ann_info`.
 87 | 
 88 |         In `Custom3DDataset`, we simply concatenate all the field
 89 |         in `instances` to `np.ndarray`, you can do the specific
 90 |         process in subclass. You have to convert `gt_bboxes_3d`
 91 |         to different coordinates according to the task.
 92 | 
 93 |         Args:
 94 |             info (dict): Info dict.
 95 | 
 96 |         Returns:
 97 |             dict or None: Processed `ann_info`.
 98 |         """
 99 |         ids = {c: i for i, c in enumerate(self.metainfo['valid_class_ids'])}
100 |         instances = []
101 |         for instance in info['instances']:
102 |             if instance['bbox_label_3d'] in ids:
103 |                 instance['bbox_label_3d'] = ids[instance['bbox_label_3d']]
104 |                 instances.append(instance)
105 |         info['instances'] = instances
106 |         return super().parse_ann_info(info)
107 | 
108 |     def parse_data_info(self, info: dict) -> dict:
109 |         """Process the raw data info.
110 | 
111 |         Args:
112 |             info (dict): Raw info dict.
113 | 
114 |         Returns:
115 |             dict: Has `ann_info` in training stage. And
116 |             all path has been converted to absolute path.
117 |         """
118 |         info['super_pts_path'] = osp.join(
119 |             self.data_prefix.get('sp_pts_mask', ''), 
120 |             info['lidar_points']['lidar_path']) #info['super_pts_path']
121 | 
122 |         info = super().parse_data_info(info)
123 | 
124 |         return info
125 | 
126 |     def __getitem__(self, idx: int) -> dict:
127 |         """Get the idx-th image and data information of dataset after
128 |         ``self.pipeline``, and ``full_init`` will be called if the dataset has
129 |         not been fully initialized.
130 | 
131 |         During training phase, if ``self.pipeline`` get ``None``,
132 |         ``self._rand_another`` will be called until a valid image is fetched or
133 |          the maximum limit of refetech is reached.
134 | 
135 |         Args:
136 |             idx (int): The index of self.data_list.
137 | 
138 |         Returns:
139 |             dict: The idx-th image and data information of dataset after
140 |             ``self.pipeline``.
141 |         """
142 |         # Performing full initialization by calling `__getitem__` will consume
143 |         # extra memory. If a dataset is not fully initialized by setting
144 |         # `lazy_init=True` and then fed into the dataloader. Different workers
145 |         # will simultaneously read and parse the annotation. It will cost more
146 |         # time and memory, although this may work. Therefore, it is recommended
147 |         # to manually call `full_init` before dataset fed into dataloader to
148 |         # ensure all workers use shared RAM from master process.
149 | 
150 |         if not self.test_mode:
151 |             if self.serialize_data:
152 |                 dataset_len = len(self.data_address)
153 |             else:
154 |                 dataset_len = len(self.data_list)
155 |             idx = np.random.randint(0, dataset_len)
156 | 
157 |         if not self._fully_initialized:
158 |             print_log(
159 |                 'Please call `full_init()` method manually to accelerate '
160 |                 'the speed.',
161 |                 logger='current',
162 |                 level=logging.WARNING)
163 |             self.full_init()
164 | 
165 |         if self.test_mode:
166 |             data = self.prepare_data(idx)
167 |             if data is None:
168 |                 raise Exception('Test time pipline should not get `None` '
169 |                                 'data_sample')
170 |             return data
171 | 
172 |         for _ in range(self.max_refetch + 1):
173 |             data = self.prepare_data(idx)
174 |             # Broken images or random augmentations may cause the returned data
175 |             # to be None
176 |             if data is None:
177 |                 idx = self._rand_another()
178 |                 continue
179 |             return data
180 | 
181 |     def __len__(self) -> int:
182 |         """Get the length of filtered dataset and automatically call
183 |         ``full_init`` if the  dataset has not been fully init.
184 | 
185 |         Returns:
186 |             int: The length of filtered dataset.
187 |         """
188 | 
189 |         if self.serialize_data:
190 |             dataset_len = len(self.data_address)
191 |         else:
192 |             dataset_len = len(self.data_list)
193 |         if not self.test_mode:
194 |             return int(self.partition * dataset_len)
195 |         else:
196 |             return dataset_len
197 | 


--------------------------------------------------------------------------------
/unidet3d/s3dis_dataset.py:
--------------------------------------------------------------------------------
  1 | from mmdet3d.registry import DATASETS
  2 | from mmdet3d.datasets.s3dis_dataset import S3DISDataset
  3 | import os.path as osp
  4 | from mmengine.logging import print_log
  5 | import logging
  6 | import numpy as np
  7 | 
  8 | @DATASETS.register_module()
  9 | class S3DISSegDetDataset(S3DISDataset):
 10 |     """S3DISSegDetDataset dataset.
 11 | 
 12 |     Args:
 13 |         partition(float): Defaults to 1, the part of 
 14 |             the dataset that will be used.
 15 |     """
 16 |     def __init__(self,
 17 |                  partition: float = 1,
 18 |                  **kwargs) -> None:
 19 |         self.partition = partition
 20 |         super().__init__(**kwargs)
 21 | 
 22 |     def parse_data_info(self, info: dict) -> dict:
 23 |         """Process the raw data info.
 24 | 
 25 |         Args:
 26 |             info (dict): Raw info dict.
 27 | 
 28 |         Returns:
 29 |             dict: Has `ann_info` in training stage. And
 30 |             all path has been converted to absolute path.
 31 |         """
 32 |         info['super_pts_path'] = osp.join(
 33 |             self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path'])
 34 | 
 35 |         info = super().parse_data_info(info)
 36 | 
 37 |         return info
 38 | 
 39 |     def __getitem__(self, idx: int) -> dict:
 40 |         """Get the idx-th image and data information of dataset after
 41 |         ``self.pipeline``, and ``full_init`` will be called if the dataset has
 42 |         not been fully initialized.
 43 | 
 44 |         During training phase, if ``self.pipeline`` get ``None``,
 45 |         ``self._rand_another`` will be called until a valid image is fetched or
 46 |          the maximum limit of refetech is reached.
 47 | 
 48 |         Args:
 49 |             idx (int): The index of self.data_list.
 50 | 
 51 |         Returns:
 52 |             dict: The idx-th image and data information of dataset after
 53 |             ``self.pipeline``.
 54 |         """
 55 |         # Performing full initialization by calling `__getitem__` will consume
 56 |         # extra memory. If a dataset is not fully initialized by setting
 57 |         # `lazy_init=True` and then fed into the dataloader. Different workers
 58 |         # will simultaneously read and parse the annotation. It will cost more
 59 |         # time and memory, although this may work. Therefore, it is recommended
 60 |         # to manually call `full_init` before dataset fed into dataloader to
 61 |         # ensure all workers use shared RAM from master process.
 62 | 
 63 |         if not self.test_mode:
 64 |             if self.serialize_data:
 65 |                 dataset_len = len(self.data_address)
 66 |             else:
 67 |                 dataset_len = len(self.data_list)
 68 |             idx = np.random.randint(0, dataset_len)
 69 |         if not self._fully_initialized:
 70 |             print_log(
 71 |                 'Please call `full_init()` method manually to accelerate '
 72 |                 'the speed.',
 73 |                 logger='current',
 74 |                 level=logging.WARNING)
 75 |             self.full_init()
 76 | 
 77 |         if self.test_mode:
 78 |             data = self.prepare_data(idx)
 79 |             if data is None:
 80 |                 raise Exception('Test time pipline should not get `None` '
 81 |                                 'data_sample')
 82 |             return data
 83 | 
 84 |         for _ in range(self.max_refetch + 1):
 85 |             data = self.prepare_data(idx)
 86 |             # Broken images or random augmentations may cause the returned data
 87 |             # to be None
 88 |             if data is None:
 89 |                 idx = self._rand_another()
 90 |                 continue
 91 |             return data
 92 | 
 93 |     def __len__(self) -> int:
 94 |         """Get the length of filtered dataset and automatically call
 95 |         ``full_init`` if the  dataset has not been fully init.
 96 | 
 97 |         Returns:
 98 |             int: The length of filtered dataset.
 99 |         """
100 | 
101 |         if self.serialize_data:
102 |             dataset_len = len(self.data_address)
103 |         else:
104 |             dataset_len = len(self.data_list)
105 |         if not self.test_mode:
106 |             return int(self.partition * dataset_len)
107 |         else:
108 |             return dataset_len
109 | 


--------------------------------------------------------------------------------
/unidet3d/scannet_dataset.py:
--------------------------------------------------------------------------------
  1 | from os import path as osp
  2 | import numpy as np
  3 | import warnings
  4 | 
  5 | from mmdet3d.datasets.scannet_dataset import ScanNetSegDataset
  6 | from mmdet3d.structures import DepthInstance3DBoxes
  7 | from mmdet3d.registry import DATASETS
  8 | 
  9 | 
 10 | @DATASETS.register_module()
 11 | class ScanNetSegDataset_(ScanNetSegDataset):
 12 |     """We just add super_pts_path."""
 13 | 
 14 |     def get_scene_idxs(self, *args, **kwargs):
 15 |         """Compute scene_idxs for data sampling."""
 16 |         return np.arange(len(self)).astype(np.int32)
 17 | 
 18 |     def parse_data_info(self, info: dict) -> dict:
 19 |         """Process the raw data info.
 20 | 
 21 |         Args:
 22 |             info (dict): Raw info dict.
 23 | 
 24 |         Returns:
 25 |             dict: Has `ann_info` in training stage. And
 26 |             all path has been converted to absolute path.
 27 |         """
 28 |         info['super_pts_path'] = osp.join(
 29 |             self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path'])
 30 | 
 31 |         info = super().parse_data_info(info)
 32 | 
 33 |         return info
 34 | 
 35 | @DATASETS.register_module()
 36 | class ScanNetDetDataset(ScanNetSegDataset_):
 37 |     """Dataset with loading gt_bboxes_3d, gt_labels_3d and
 38 |     axis-align matrix for evaluating SPFormer/OneFormer with
 39 |     IndoorMetric. We just copy some functions from Det3DDataset
 40 |     and comment some lines in them.
 41 |     """
 42 |     @staticmethod
 43 |     def _get_axis_align_matrix(info: dict) -> np.ndarray:
 44 |         """Get axis_align_matrix from info. If not exist, return identity mat.
 45 | 
 46 |         Args:
 47 |             info (dict): Info of a single sample data.
 48 | 
 49 |         Returns:
 50 |             np.ndarray: 4x4 transformation matrix.
 51 |         """
 52 |         if 'axis_align_matrix' in info:
 53 |             return np.array(info['axis_align_matrix'])
 54 |         else:
 55 |             warnings.warn(
 56 |                 'axis_align_matrix is not found in ScanNet data info, please '
 57 |                 'use new pre-process scripts to re-generate ScanNet data')
 58 |             return np.eye(4).astype(np.float32)
 59 | 
 60 |     def parse_data_info(self, info: dict) -> dict:
 61 |         """Process the raw data info.
 62 | 
 63 |         The only difference with it in `Det3DDataset`
 64 |         is the specific process for `axis_align_matrix'.
 65 | 
 66 |         Args:
 67 |             info (dict): Raw info dict.
 68 | 
 69 |         Returns:
 70 |             dict: Has `ann_info` in training stage. And
 71 |             all path has been converted to absolute path.
 72 |         """
 73 | 
 74 |         info['axis_align_matrix'] = self._get_axis_align_matrix(info)
 75 |         # info['super_pts_path'] = osp.join(
 76 |         #     self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path'])
 77 | 
 78 |         info = super().parse_data_info(info)
 79 | 
 80 |         if not self.test_mode:
 81 |             # used in training
 82 |             info['ann_info'] = self.parse_ann_info(info)
 83 |         if self.test_mode and self.load_eval_anns:
 84 |             info['eval_ann_info'] = self.parse_ann_info(info)
 85 | 
 86 |         return info
 87 |     
 88 |     def _det3d_parse_ann_info(self, info):
 89 |         """Process the `instances` in data info to `ann_info`.
 90 | 
 91 |         In `Custom3DDataset`, we simply concatenate all the field
 92 |         in `instances` to `np.ndarray`, you can do the specific
 93 |         process in subclass. You have to convert `gt_bboxes_3d`
 94 |         to different coordinates according to the task.
 95 | 
 96 |         Args:
 97 |             info (dict): Info dict.
 98 | 
 99 |         Returns:
100 |             dict or None: Processed `ann_info`.
101 |         """
102 |         # add s or gt prefix for most keys after concat
103 |         # we only process 3d annotations here, the corresponding
104 |         # 2d annotation process is in the `LoadAnnotations3D`
105 |         # in `transforms`
106 |         name_mapping = {
107 |             'bbox_label_3d': 'gt_labels_3d',
108 |             'bbox_label': 'gt_bboxes_labels',
109 |             'bbox': 'gt_bboxes',
110 |             'bbox_3d': 'gt_bboxes_3d',
111 |             'depth': 'depths',
112 |             'center_2d': 'centers_2d',
113 |             'attr_label': 'attr_labels',
114 |             'velocity': 'velocities',
115 |         }
116 |         instances = info['instances']
117 |         # empty gt
118 |         if len(instances) == 0:
119 |             return None
120 |         else:
121 |             keys = list(instances[0].keys())
122 |             ann_info = dict()
123 |             for ann_name in keys:
124 |                 temp_anns = [item[ann_name] for item in instances]
125 |                 # map the original dataset label to training label
126 |                 # if 'label' in ann_name and ann_name != 'attr_label':
127 |                 #     temp_anns = [
128 |                 #         self.label_mapping[item] for item in temp_anns
129 |                 #     ]
130 |                 if ann_name in name_mapping:
131 |                     mapped_ann_name = name_mapping[ann_name]
132 |                 else:
133 |                     mapped_ann_name = ann_name
134 | 
135 |                 if 'label' in ann_name:
136 |                     temp_anns = np.array(temp_anns).astype(np.int64)
137 |                 elif ann_name in name_mapping:
138 |                     temp_anns = np.array(temp_anns).astype(np.float32)
139 |                 else:
140 |                     temp_anns = np.array(temp_anns)
141 | 
142 |                 ann_info[mapped_ann_name] = temp_anns
143 |             ann_info['instances'] = info['instances']
144 | 
145 |             # for label in ann_info['gt_labels_3d']:
146 |             #     if label != -1:
147 |             #         cat_name = self.metainfo['classes'][label]
148 |             #         self.num_ins_per_cat[cat_name] += 1
149 | 
150 |         return ann_info
151 | 
152 |     def parse_ann_info(self, info: dict) -> dict:
153 |         """Process the `instances` in data info to `ann_info`.
154 | 
155 |         Args:
156 |             info (dict): Info dict.
157 | 
158 |         Returns:
159 |             dict: Processed `ann_info`.
160 |         """
161 |         ann_info = self._det3d_parse_ann_info(info)
162 |         # empty gt
163 |         if ann_info is None:
164 |             ann_info = dict()
165 |             ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
166 |             ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
167 |         # to target box structure
168 | 
169 |         ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
170 |             ann_info['gt_bboxes_3d'],
171 |             box_dim=ann_info['gt_bboxes_3d'].shape[-1],
172 |             with_yaw=False,
173 |             origin=(0.5, 0.5, 0.5))  # .convert_to(self.box_mode_3d)
174 | 
175 |         return ann_info


--------------------------------------------------------------------------------
/unidet3d/structures.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sized
 2 | from mmengine.structures import InstanceData
 3 | 
 4 | 
 5 | class InstanceData_(InstanceData):
 6 |     """We only remove a single assert from __setattr__."""
 7 | 
 8 |     def __setattr__(self, name: str, value: Sized):
 9 |         """setattr is only used to set data.
10 | 
11 |         The value must have the attribute of `__len__` and have the same length
12 |         of `InstanceData`.
13 |         """
14 |         if name in ('_metainfo_fields', '_data_fields'):
15 |             if not hasattr(self, name):
16 |                 super(InstanceData, self).__setattr__(name, value)
17 |             else:
18 |                 raise AttributeError(f'{name} has been used as a '
19 |                                      'private attribute, which is immutable.')
20 | 
21 |         else:
22 |             assert isinstance(value,
23 |                               Sized), 'value must contain `__len__` attribute'
24 | 
25 |             super(InstanceData, self).__setattr__(name, value)
26 | 


--------------------------------------------------------------------------------