├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── configs
├── unidet3d_1xb8_scannet.py
└── unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py
├── data
├── 3rscan
│ ├── README.md
│ ├── meta_data
│ │ ├── 3RScan.json
│ │ ├── 3RScan.v2_Semantic-Classes-Mapping.csv
│ │ ├── bbox_size.pkl
│ │ ├── camera_pose.pkl
│ │ ├── nyu40_labels.csv
│ │ ├── reference_axis_align_matrix.pkl
│ │ ├── scans.txt
│ │ ├── split
│ │ │ ├── 3rscan_test.txt
│ │ │ ├── 3rscan_train.txt
│ │ │ ├── 3rscan_val.txt
│ │ │ ├── test.txt
│ │ │ ├── train.txt
│ │ │ └── val.txt
│ │ ├── test.txt
│ │ ├── train.txt
│ │ └── val.txt
│ ├── prepare_bins_pkls.py
│ ├── preprocess_raw_data.py
│ └── utils.py
├── arkitscenes
│ ├── README.md
│ ├── arkitscenes_data_utils.py
│ ├── data_prepare_offline.py
│ ├── misc.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── box_utils.py
│ │ ├── pc_utils.py
│ │ ├── rotation.py
│ │ ├── taxonomy.py
│ │ └── tenFpsDataLoader.py
├── multiscan
│ ├── README.md
│ └── prepare_bins_pkls.py
├── s3dis
│ ├── README.md
│ └── remap_superpoints.py
├── scannet
│ ├── README.md
│ ├── batch_load_scannet_data.py
│ ├── load_scannet_data.py
│ ├── meta_data
│ │ ├── scannet_means.npz
│ │ ├── scannet_train.txt
│ │ ├── scannetv2-labels.combined.tsv
│ │ ├── scannetv2_test.txt
│ │ ├── scannetv2_train.txt
│ │ └── scannetv2_val.txt
│ └── scannet_utils.py
└── scannetpp
│ ├── README.md
│ ├── prepare_bins_pkls.py
│ └── preprocess_raw_data.py
├── tools
├── create_data.py
├── indoor_converter.py
├── scannet_data_utils.py
├── test.py
├── train.py
└── update_infos_to_v2.py
└── unidet3d
├── __init__.py
├── arkitscenes_dataset.py
├── axis_aligned_iou_loss.py
├── concat_dataset.py
├── criterion.py
├── data_preprocessor.py
├── encoder.py
├── formatting.py
├── image_vis.py
├── indoor_eval.py
├── indoor_metric.py
├── loading.py
├── multiscan_dataset.py
├── rotated_iou_loss.py
├── rscan_dataset.py
├── s3dis_dataset.py
├── scannet_dataset.py
├── scannetpp_dataset.py
├── show_results.py
├── spconv_unet.py
├── structures.py
├── transforms_3d.py
└── unidet3d.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | work_dirs
3 | .vscode
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 | *.ipynb
8 | *ipynb_checkpoints
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
2 |
3 | # Install base apt packages
4 | RUN apt-get update \
5 | && DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libopenblas-dev
6 |
7 | # Install MinkowskiEngine
8 | RUN TORCH_CUDA_ARCH_LIST="6.1 7.0 8.6 9.0" \
9 | pip install git+https://github.com/daizhirui/MinkowskiEngine.git@ce930eeb403a8e3f99693662ec5ce329a0ab3528 -v --no-deps \
10 | --global-option="--blas=openblas" \
11 | --global-option="--force_cuda"
12 |
13 | # Install OpenMMLab projects
14 | RUN pip install --no-deps \
15 | mmengine==0.9.0 \
16 | mmdet==3.3.0 \
17 | mmsegmentation==1.2.0 \
18 | mmdet3d==1.4.0 \
19 | mmpretrain==1.2.0
20 |
21 | # Install mmcv
22 | RUN git clone https://github.com/open-mmlab/mmcv.git \
23 | && cd mmcv \
24 | && git reset --hard 780ffed9f3736fedadf18b51266ecbf521e64cf6 \
25 | && sed -i "s/'-std=c++14'] if cuda_args else/'-std=c++14', '-arch=sm_90'] if cuda_args else/g" setup.py \
26 | && TORCH_CUDA_ARCH_LIST="6.1 7.0 8.6 9.0" \
27 | && pip install -v -e . --no-deps \
28 | && cd ..
29 |
30 | # Install torch-scatter
31 | RUN pip install torch-scatter==2.1.2 -f https://data.pyg.org/whl/torch-2.1.0+cu121.html --no-deps
32 |
33 | # Install ScanNet superpoint segmentator
34 | RUN git clone https://github.com/Karbo123/segmentator.git \
35 | && cd segmentator/csrc \
36 | && git reset --hard 76efe46d03dd27afa78df972b17d07f2c6cfb696 \
37 | && sed -i "s/set(CMAKE_CXX_STANDARD 14)/set(CMAKE_CXX_STANDARD 17)/g" CMakeLists.txt \
38 | && mkdir build \
39 | && cd build \
40 | && cmake .. \
41 | -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` \
42 | -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
43 | -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \
44 | -DCMAKE_INSTALL_PREFIX=`python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())'` \
45 | && make \
46 | && make install \
47 | && cd ../../..
48 |
49 | # Install remaining python packages
50 | RUN pip install --no-deps \
51 | spconv-cu120==2.3.6 \
52 | addict==2.4.0 \
53 | yapf==0.33.0 \
54 | termcolor==2.3.0 \
55 | packaging==23.1 \
56 | numpy==1.24.1 \
57 | rich==13.3.5 \
58 | opencv-python==4.7.0.72 \
59 | pycocotools==2.0.6 \
60 | Shapely==1.8.5 \
61 | scipy==1.10.1 \
62 | terminaltables==3.1.10 \
63 | numba==0.57.0 \
64 | llvmlite==0.40.0 \
65 | pccm==0.4.7 \
66 | ccimport==0.4.2 \
67 | pybind11==2.10.4 \
68 | ninja==1.11.1 \
69 | lark==1.1.5 \
70 | cumm-cu120==0.5.1 \
71 | pyquaternion==0.9.9 \
72 | lyft-dataset-sdk==0.0.8 \
73 | pandas==2.0.1 \
74 | python-dateutil==2.8.2 \
75 | matplotlib==3.5.2 \
76 | pyparsing==3.0.9 \
77 | cycler==0.11.0 \
78 | kiwisolver==1.4.4 \
79 | scikit-learn==1.2.2 \
80 | joblib==1.2.0 \
81 | threadpoolctl==3.1.0 \
82 | cachetools==5.3.0 \
83 | nuscenes-devkit==1.1.10 \
84 | trimesh==3.21.6 \
85 | open3d==0.17.0 \
86 | plotly==5.18.0 \
87 | dash==2.14.2 \
88 | plyfile==1.0.2 \
89 | flask==3.0.0 \
90 | werkzeug==3.0.1 \
91 | click==8.1.7 \
92 | blinker==1.7.0 \
93 | itsdangerous==2.1.2 \
94 | importlib_metadata==2.1.2 \
95 | zipp==3.17.0 \
96 | natsort==8.4.0 \
97 | timm==0.9.16 \
98 | imageio==2.34.0 \
99 | portalocker==2.8.2 \
100 | ftfy==6.2.0 \
101 | regex==2024.4.16
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## UniDet3D: Multi-dataset Indoor 3D Object Detection
2 |
3 | **News**:
4 | * :fire: December, 2024. UniDet3D is now accepted at AAAI 2025.
5 | * :fire: September, 2024. UniDet3D is state-of-the-art in 6 indoor benchmarks:
6 | ScanNet [](https://paperswithcode.com/sota/3d-object-detection-on-scannetv2?p=unidet3d-multi-dataset-indoor-3d-object)
7 | ARKitScenes [](https://paperswithcode.com/sota/3d-object-detection-on-arkitscenes?p=unidet3d-multi-dataset-indoor-3d-object)
8 | S3DIS [](https://paperswithcode.com/sota/3d-object-detection-on-s3dis?p=unidet3d-multi-dataset-indoor-3d-object)
9 | MultiScan [](https://paperswithcode.com/sota/3d-object-detection-on-multiscan?p=unidet3d-multi-dataset-indoor-3d-object)
10 | 3RScan [](https://paperswithcode.com/sota/3d-object-detection-on-3rscan?p=unidet3d-multi-dataset-indoor-3d-object)
11 | ScanNet++ [](https://paperswithcode.com/sota/3d-object-detection-on-scannet-1?p=unidet3d-multi-dataset-indoor-3d-object).
12 |
13 | This repository contains an implementation of UniDet3D, a multi-dataset indoor 3D object detection method introduced in our paper:
14 |
15 | > **UniDet3D: Multi-dataset Indoor 3D Object Detection**
16 | > [Maksim Kolodiazhnyi](https://github.com/col14m),
17 | > [Anna Vorontsova](https://github.com/highrut),
18 | > [Matvey Skripkin](https://scholar.google.com/citations?user=hAlwb4wAAAAJ),
19 | > [Danila Rukhovich](https://github.com/filaPro),
20 | > [Anton Konushin](https://scholar.google.com/citations?user=ZT_k-wMAAAAJ)
21 | >
22 | > Artificial Intelligence Research Institute
23 | > https://arxiv.org/abs/2409.04234
24 |
25 | ### Installation
26 |
27 | For convenience, we provide a [Dockerfile](Dockerfile).
28 | This implementation is based on [mmdetection3d](https://github.com/open-mmlab/mmdetection3d) framework `v1.1.0`. If not using Docker, please follow [getting_started.md](https://github.com/open-mmlab/mmdetection3d/blob/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/docs/en/get_started.md) for the installation instructions.
29 |
30 |
31 | ### Getting Started
32 |
33 | Please see [test_train.md](https://github.com/open-mmlab/mmdetection3d/blob/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/docs/en/user_guides/train_test.md) for some basic usage examples.
34 |
35 | #### Data Preprocessing
36 |
37 | UniDet3D is trained and tested using 6 datasets: [ScanNet](data/scannet), [ARKitScenes](data/arkitscenes), [S3DIS](data/s3dis), [MultiScan](data/multiscan), [3RScan](data/3rscan), and [ScanNet++](data/scannetpp).
38 | Preprocessed data can be found at our [Hugging Face](https://huggingface.co/datasets/maksimko123/UniDet3D). Download each archive, unpack, and move into the corresponding directory in [data](data). Please comply with the license agreement before downloading the data.
39 |
40 | Alternatively, you can preprocess the data by youself.
41 | Training data for 3D object detection methods that do not requires superpoints, e.g. [TR3D](https://github.com/SamsungLabs/tr3d) or [FCAF3D](https://github.com/SamsungLabs/fcaf3d), can be prepared according to the [instructions](data).
42 |
43 | Superpoints for ScanNet and MultiScan are provided as a part of the original annotation. For the rest datasets, you can either download pre-computed superpoints at our [Hugging Face](https://huggingface.co/datasets/maksimko123/UniDet3D), or compute them using [superpoint_transformer](https://github.com/drprojects/superpoint_transformer).
44 |
45 | #### Training
46 |
47 | Before training, please download the backbone [checkpoint](https://github.com/filapro/oneformer3d/releases/download/v1.0/oneformer3d_1xb4_scannet.pth) and save it under `work_dirs/tmp`.
48 |
49 | To train UniDet3D on 6 datasets jointly, simply run the [training](tools/train.py) script:
50 |
51 | ```bash
52 | python tools/train.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py
53 | ```
54 |
55 | UniDet3D can also be trained on individual datasets, e.g., we provide a [config](configs/unidet3d_1xb8_scannet.py) for training using ScanNet solely.
56 |
57 |
58 | #### Testing
59 |
60 | To test a trained model, you can run the [testing](tools/test.py) script:
61 |
62 | ```bash
63 | python tools/test.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py \
64 | work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes/epoch_1024.pth
65 | ```
66 |
67 | UniDet3D can also be tested on individual datasets. To this end, simply remove the unwanted datasets from `val_dataloader.dataset.datasets` in the config file.
68 |
69 | #### Visualization
70 |
71 | To visualize ground truth and predicted boxes, run the [testing](tools/test.py) script with additional arguments:
72 |
73 | ```bash
74 | python tools/test.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py \
75 | work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes/latest.pth --show \
76 | --show-dir work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes
77 | ```
78 | You can also set `score_thr` in configs to `0.3` for better visualizations.
79 |
80 | ### Trained Model
81 |
82 | Please refer to the UniDet3D [checkpoint](https://github.com/filapro/unidet3d/releases/download/v1.0/unidet3d.pth) and [log file](https://github.com/filapro/unidet3d/releases/download/v1.0/log.txt). The corresponding metrics are given below (they might slightly deviate from the values reported in the paper due to the randomized training/testing procedure).
83 |
84 | | Dataset | mAP25 | mAP50 |
85 | |:-----------:|:-----------------:|:-----------------:|
86 | | ScanNet | 77.0 | 65.9 |
87 | | ARKitScenes | 60.1 | 47.2 |
88 | | S3DIS | 76.7 | 65.3 |
89 | | MultiScan | 62.6 | 52.3 |
90 | | 3RScan | 63.6 | 44.9 |
91 | | ScanNet++ | 24.0 | 16.8 |
92 |
93 | ### Predictions Example
94 |
95 |
96 |
97 |
98 |
99 | ### Citation
100 |
101 | If you find this work useful for your research, please cite our paper:
102 |
103 | ```
104 | @inproceedings{kolodiazhnyi2025unidet3d,
105 | title={Unidet3d: Multi-dataset indoor 3d object detection},
106 | author={Kolodiazhnyi, Maksim and Vorontsova, Anna and Skripkin, Matvey and Rukhovich, Danila and Konushin, Anton},
107 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
108 | volume={39},
109 | number={4},
110 | pages={4365--4373},
111 | year={2025}
112 | }
113 | ```
114 |
--------------------------------------------------------------------------------
/data/3rscan/README.md:
--------------------------------------------------------------------------------
1 | ## Prepare 3RScan Data for Indoor 3D Detection
2 |
3 | 1. Download data from the official [3RScan](https://waldjohannau.github.io/RIO/).
4 |
5 | 2. Preprocess raw data by running:
6 |
7 | ```bash
8 | python preprocess_raw_data.py --dataset_root path_to_dataset --output_root path_to_save_preprocessed_raw_data
9 | ```
10 |
11 | 3. Generate bins and pkls data by running:
12 |
13 | ```bash
14 | python prepare_bins_pkls.py --path_to_data path_to_preprocessed_raw_data --path_to_save_bins path_to_save_bins
15 | ```
16 |
17 | Overall you achieve the following file structure in `bins` directory:
18 | ```
19 | bins
20 | ├── bboxs
21 | │ ├── xxxxx_xx.npy
22 | ├── instance_mask
23 | │ ├── xxxxx_xx.bin
24 | ├── points
25 | │ ├── xxxxx_xx.bin
26 | ├── semantic_mask
27 | │ ├── xxxxx_xx.bin
28 | ├── superpoints
29 | │ ├── xxxxx_xx.bin
30 | ├── 3rscan_infos_train.pkl
31 | ├── 3rscan_infos_val.pkl
32 | ├── 3rscan_infos_test.pkl
33 | ```
34 |
--------------------------------------------------------------------------------
/data/3rscan/meta_data/bbox_size.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/bbox_size.pkl
--------------------------------------------------------------------------------
/data/3rscan/meta_data/camera_pose.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/camera_pose.pkl
--------------------------------------------------------------------------------
/data/3rscan/meta_data/nyu40_labels.csv:
--------------------------------------------------------------------------------
1 | nyu40id,nyu40class,mappedId,mappedIdConsecutive,weight
2 | 1,wall,(ignore),19,0.0
3 | 2,floor,(ignore),19,0.0
4 | 3,cabinet,3,1,3.9644974086960434
5 | 4,bed,4,2,5.459494152836571
6 | 5,chair,5,3,2.241522691584157
7 | 6,sofa,6,4,4.820655512680854
8 | 7,table,7,5,3.565918577548873
9 | 8,door,8,6,3.538498341919445
10 | 9,window,9,7,4.636521236560596
11 | 10,bookshelf,10,8,5.445050937449535
12 | 11,picture,11,9,5.079250281008131
13 | 12,counter,12,10,6.2030429647735845
14 | 13,blinds,(ignore),19,0.0
15 | 14,desk,14,11,4.622662494840168
16 | 15,shelves,(ignore),19,0.0
17 | 16,curtain,16,12,5.956294301248057
18 | 17,dresser,(ignore),19,0.0
19 | 18,pillow,(ignore),19,0.0
20 | 19,mirror,(ignore),19,0.0
21 | 20,floor_mat,(ignore),19,0.0
22 | 21,clothes,(ignore),19,0.0
23 | 22,ceiling,(ignore),19,0.0
24 | 23,books,(ignore),19,0.0
25 | 24,refridgerator,24,13,5.459141107819665
26 | 25,television,(ignore),19,0.0
27 | 26,paper,(ignore),19,0.0
28 | 27,towel,(ignore),19,0.0
29 | 28,shower_curtain,28,14,6.724871661883906
30 | 29,box,(ignore),19,0.0
31 | 30,whiteboard,(ignore),19,0.0
32 | 31,person,(ignore),19,0.0
33 | 32,night_stand,(ignore),19,0.0
34 | 33,toilet,33,15,5.832442848923174
35 | 34,sink,34,16,5.064773947290611
36 | 35,lamp,(ignore),19,0.0
37 | 36,bathtub,36,17,6.738988357113375
38 | 37,bag,(ignore),19,0.0
39 | 38,otherstructure,(ignore),19,0.0
40 | 39,otherfurniture,39,18,3.375217918833916
41 | 40,otherprop,(ignore),19,0.0
--------------------------------------------------------------------------------
/data/3rscan/meta_data/reference_axis_align_matrix.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/reference_axis_align_matrix.pkl
--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/3rscan_test.txt:
--------------------------------------------------------------------------------
1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660
2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9
3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46
4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d
5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba
6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34
7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51
8 | 10b17942-3938-2467-8933-5d40ada6d445
9 | 13af338e-7397-2e54-84fc-fa003f91ac0c
10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3
11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91
12 | 20c993bb-698f-29c5-8569-0db38352364f
13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47
14 | 352e9c30-69fb-27a7-8b19-c703f0e190da
15 | 38770cb0-86d7-27b8-8466-1782505891fd
16 | 422885b9-192d-25fc-84f4-6f7c1afd29af
17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa
18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878
19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4
20 | 5630cfdc-12bf-2860-87b7-c7eab95718be
21 | 634b2183-f5d0-2fb7-87ef-c536720918dc
22 | 634d11cd-6833-255d-8c5c-2b0cf661494a
23 | 6a360555-fa53-2915-9721-bff473f98125
24 | 6bde608b-9162-246f-8d16-901b429b2563
25 | 6bde60d2-9162-246f-8dbf-dbd911924def
26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4
27 | 75c259a1-9ca2-2844-973c-adc28f935d5d
28 | 7747a514-9431-24e8-8505-5979f3f20906
29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e
30 | 8eabc463-5af7-2f32-8537-22977a89efdd
31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751
32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96
33 | ad408c83-84db-2095-8aa4-924f966af2dc
34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91
35 | b1f2330c-d255-2761-965e-d203c6e253c3
36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4
37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca
38 | bf9a3da4-45a5-2e80-8082-be634b241693
39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe
40 | c7895f2f-339c-2d13-8388-28e95af958de
41 | c7895f80-339c-2d13-8253-64b490296e49
42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b
43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f
44 | ddc7379d-765b-241a-9f0b-50b72d6cd829
45 | ebc42041-82a4-2113-8583-cc8c1be818b3
46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74
--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/3rscan_val.txt:
--------------------------------------------------------------------------------
1 | 095821f7-e2c2-2de1-9568-b9ce59920e29
2 | 2e369567-e133-204c-909a-c5da44bb58df
3 | 095821f9-e2c2-2de1-9707-8f735cd1c148
4 | 095821fb-e2c2-2de1-94df-20f2cb423bcb
5 | 0988ea72-eb32-2e61-8344-99e2283c2728
6 | 9766cbe5-6321-2e2f-8040-4e5b7a5d8ba1
7 | 9766cbf5-6321-2e2f-8131-78c4e204635d
8 | 9766cbf7-6321-2e2f-81e1-2b46533c64dd
9 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f
10 | 0cac7532-8d6f-2d13-8cea-1e70d5ae4856
11 | 0cac7534-8d6f-2d13-8de7-8a915ed90050
12 | 0cac7584-8d6f-2d13-8df8-c05e4307b418
13 | 0cac7582-8d6f-2d13-8d4b-e4041cb166c4
14 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc
15 | 0cac75de-8d6f-2d13-8e1a-b574569c3885
16 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721
17 | 7272e17c-a01b-20f6-8b2f-e659331ae41a
18 | 10b17940-3938-2467-8a7a-958300ba83d3
19 | c2d9933f-1947-2fbf-807f-c44bc1aed269
20 | c2d9933d-1947-2fbf-81fa-c8a7f9625eea
21 | 5630cfe7-12bf-2860-8710-52729dc36cc6
22 | c2d99349-1947-2fbf-837e-a0bd5e027c52
23 | c2d99347-1947-2fbf-834b-f95790c125dd
24 | c2d99345-1947-2fbf-818d-90ea82acef29
25 | c2d99343-1947-2fbf-808f-92dbb7d47aa5
26 | c2d99341-1947-2fbf-817a-5aa9b44f724f
27 | 6e67e55f-1209-2cd0-8194-8c6278434c80
28 | 137a8158-1db5-2cc0-8003-31c12610471e
29 | 5630cfd3-12bf-2860-8749-9dacb499fb14
30 | c92fb5b7-f771-2064-87a9-31c819832405
31 | f2c76ff1-2239-29d0-87f5-8a0346584384
32 | f2c76fed-2239-29d0-8598-9ed42cec9dc5
33 | f2c76feb-2239-29d0-8418-72b6051fc144
34 | f2c76fe9-2239-29d0-87ec-f2c7ced812c1
35 | f2c76fe7-2239-29d0-84f5-144c30fd7451
36 | f2c76fe5-2239-29d0-8593-1a2555125595
37 | d04eb40f-1d53-27ea-8a41-47892bde7017
38 | 6e67e550-1209-2cd0-8294-7cc2564cf82c
39 | 10b1792a-3938-2467-8b4e-a93da27a0985
40 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a
41 | 1d233fe6-e280-2b1a-8caf-eb0d13a59ad6
42 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2
43 | 1d234026-e280-2b1a-8fe1-713e28269f4d
44 | 1d234024-e280-2b1a-8c28-2743fefed020
45 | 20c993b7-698f-29c5-847d-c8cb8a685f5a
46 | 20c993b3-698f-29c5-859c-dca8ddecf220
47 | 280d8ebb-6cc6-2788-9153-98959a2da801
48 | 4731976c-f9f7-2a1a-95cc-31c4d1751d0b
49 | 1d2f850c-d757-207c-8fba-60b90a7d4691
50 | ea318260-0a4c-2749-9389-4c16c782c4b1
51 | 10b17957-3938-2467-88a5-9e9254930dad
52 | 321c8680-a5a8-2a84-85c2-816a26d59516
53 | 321c867e-a5a8-2a84-851a-818df115be05
54 | 38770ca1-86d7-27b8-8619-ab66f67d9adf
55 | 38770ca3-86d7-27b8-85a7-7d840ffdec6a
56 | 38770ca5-86d7-27b8-871c-57fdbfe87905
57 | 4138582f-a238-2435-8332-6902542c2823
58 | 5341b7a5-8a66-2cdd-8751-70b98263cb8d
59 | 8eabc445-5af7-2f32-85ae-90deb8eb1b0b
60 | 422885b3-192d-25fc-84c9-9b80eea1752d
61 | 422885c5-192d-25fc-85e6-12a3d65c8e7b
62 | 4238490c-60a7-271e-9f38-3c651e3b3912
63 | 4238490a-60a7-271e-9c04-3846221dc354
64 | 42384908-60a7-271e-9c46-01e562c8974c
65 | 10b17963-3938-2467-8a48-0d4af350ce92
66 | 43b8cae1-6678-2e38-9865-c19c07c25015
67 | 43b8cadf-6678-2e38-9920-064144c99406
68 | 43b8cae3-6678-2e38-9b67-5905de29f6d7
69 | 4a9a43d2-7736-2874-874d-d0fad0570e19
70 | 4a9a43d4-7736-2874-87a6-0c3089281af8
71 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f
72 | 4d3d82ae-8cf4-2e04-80de-20f96c814d9c
73 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4
74 | 4fbad32f-465b-2a5d-8408-146ab1d72808
75 | 4fbad331-465b-2a5d-8488-852fcda9513c
76 | 9af05c68-5794-2e19-8c5a-979f448da545
77 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75
78 | 7272e16c-a01b-20f6-8961-a0927b4a7629
79 | 5341b7e3-8a66-2cdd-8709-66a2159f0017
80 | 4138585b-a238-2435-810b-9728fc989b2f
81 | 5630cfcf-12bf-2860-8784-83d28a611a83
82 | 5630cfd1-12bf-2860-86b2-e7a96bc32c19
83 | bf9a3d9e-45a5-2e80-83c6-4e427c5586a2
84 | 10b1792e-3938-2467-8bb3-172148ae5a67
85 | 10b17944-3938-2467-8bac-5552375e4467
86 | 6bde6081-9162-246f-8c4e-ffaf709d17b1
87 | 6bde607b-9162-246f-8e65-76e3ef265504
88 | 6bde607d-9162-246f-8f84-98cf7ac2374c
89 | 6bde6083-9162-246f-8c9c-e170212059b2
90 | 6bde60cb-9162-246f-8cf5-d04f7426e56f
91 | 6bde60cd-9162-246f-8fad-fca80b4d6ad8
92 | 6bde60cf-9162-246f-8f98-6355d75494c2
93 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662
94 | 68bae75f-3567-2f7c-829d-7422117729f3
95 | 742e8f17-be0a-294e-9dd3-52492d308e2b
96 | 742e8f19-be0a-294e-9eb6-50dc474b110e
97 | 75c25975-9ca2-2844-9769-84677f46d4cf
98 | 8eabc455-5af7-2f32-8606-a0bdbe6c537d
99 | 7747a50c-9431-24e8-877d-e60c3a341cc2
100 | 7747a4ec-9431-24e8-848f-897279a1e9fe
101 | 7747a510-9431-24e8-8705-907ee78be2a2
102 | 8e0f1c28-9e28-2339-8584-ff06ff93c341
103 | 20c993bd-698f-29c5-8494-5556ba7d3fe9
104 | 20c993bf-698f-29c5-8549-a69fd169c1e1
105 | 8eabc45f-5af7-2f32-8528-640861d2a135
106 | 75c25989-9ca2-2844-97b4-31b81f7554b8
107 | 41385849-a238-2435-81d0-ceb0eba4541a
108 | 5341b7b3-8a66-2cdd-856d-9d70e194568b
109 | 8eabc461-5af7-2f32-8663-ce5a10fd97b3
110 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7
111 | a0905fdb-66f7-2272-9fc5-7c0008d5e87b
112 | a0905fdd-66f7-2272-9cdb-89360888ea67
113 | ab835fae-54c6-29a1-995e-b06cfc555786
114 | ab835faa-54c6-29a1-9b55-1a5217fcba19
115 | d7dc987e-a34a-2794-85c8-f75389b27532
116 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5
117 | 0cac75e8-8d6f-2d13-8fc4-acdbf00437c8
118 | 0cac75ea-8d6f-2d13-8e50-c5faf0159e32
119 | c7895f63-339c-2d13-81a3-0b07b1eb23b4
120 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e
121 | 0cac761b-8d6f-2d13-8f16-23a7d73c54fe
122 | 0cac7619-8d6f-2d13-8f36-ac562ec9a4de
123 | b1f23308-d255-2761-94da-981d962c6bf8
124 | 77361fd4-d054-2a22-88c4-b5b404f904ca
125 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64
126 | ba6fdaac-a4c1-2dca-8380-f16765679fd7
127 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8
128 | bcb0fe04-4f39-2c70-9f03-d0eec592de24
129 | bf9a3da2-45a5-2e80-8219-1f0a216399fe
130 | cdcaf5bd-ddd8-2ed6-97c3-489e105e4dde
131 | 10b1794e-3938-2467-89a7-ebc89e84cf88
132 | bf9a3de9-45a5-2e80-8022-277108d67404
133 | bf9a3ddf-45a5-2e80-8007-8e9e7f323e52
134 | bf9a3ddd-45a5-2e80-80bc-647365c7ca08
135 | bf9a3dd9-45a5-2e80-817c-f918e193231b
136 | c7895f27-339c-2d13-836b-c12dca280261
137 | c7895f2b-339c-2d13-8248-b0507e050314
138 | c7895f29-339c-2d13-83e9-90dbe61fa8be
139 | c7895f7c-339c-2d13-819f-3bb0b26c91f6
140 | c7895f7a-339c-2d13-82ac-09ef1c9001ba
141 | c7895f78-339c-2d13-82bb-cc990cbbc90f
142 | c92fb5b5-f771-2064-8570-dbe16cb33764
143 | 5630cfde-12bf-2860-8563-d68bdd98fab0
144 | 10b1793e-3938-2467-8b92-f56541e7ef9e
145 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42
146 | d7d40d4c-7a5d-2b36-95c1-5f6c9147caf0
147 | d7d40d50-7a5d-2b36-9446-7d636174329f
148 | ddc73797-765b-241a-9e2c-097c5989baf6
149 | 2451c048-fae8-24f6-9043-f1604dbada2c
150 | ddc7379b-765b-241a-9f45-c37e41608726
151 | ddc73799-765b-241a-9c30-f75dcb7627d4
152 | 0cac75b7-8d6f-2d13-8cb2-0b4e06913140
153 | c7895f07-339c-2d13-8176-7418b6e8d7ce
154 | e61b0e04-bada-2f31-82d6-72831a602ba7
155 | e61b0e02-bada-2f31-82d0-80fc5c70bd6f
156 | fcf66d7b-622d-291c-86b8-7db96aebcee3
157 | 787ed58c-9d98-2c97-83b9-b48a609ace15
--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/test.txt:
--------------------------------------------------------------------------------
1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660
2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9
3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46
4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d
5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba
6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34
7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51
8 | 10b17942-3938-2467-8933-5d40ada6d445
9 | 13af338e-7397-2e54-84fc-fa003f91ac0c
10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3
11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91
12 | 20c993bb-698f-29c5-8569-0db38352364f
13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47
14 | 352e9c30-69fb-27a7-8b19-c703f0e190da
15 | 38770cb0-86d7-27b8-8466-1782505891fd
16 | 422885b9-192d-25fc-84f4-6f7c1afd29af
17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa
18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878
19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4
20 | 5630cfdc-12bf-2860-87b7-c7eab95718be
21 | 634b2183-f5d0-2fb7-87ef-c536720918dc
22 | 634d11cd-6833-255d-8c5c-2b0cf661494a
23 | 6a360555-fa53-2915-9721-bff473f98125
24 | 6bde608b-9162-246f-8d16-901b429b2563
25 | 6bde60d2-9162-246f-8dbf-dbd911924def
26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4
27 | 75c259a1-9ca2-2844-973c-adc28f935d5d
28 | 7747a514-9431-24e8-8505-5979f3f20906
29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e
30 | 8eabc463-5af7-2f32-8537-22977a89efdd
31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751
32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96
33 | ad408c83-84db-2095-8aa4-924f966af2dc
34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91
35 | b1f2330c-d255-2761-965e-d203c6e253c3
36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4
37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca
38 | bf9a3da4-45a5-2e80-8082-be634b241693
39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe
40 | c7895f2f-339c-2d13-8388-28e95af958de
41 | c7895f80-339c-2d13-8253-64b490296e49
42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b
43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f
44 | ddc7379d-765b-241a-9f0b-50b72d6cd829
45 | ebc42041-82a4-2113-8583-cc8c1be818b3
46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74
--------------------------------------------------------------------------------
/data/3rscan/meta_data/split/val.txt:
--------------------------------------------------------------------------------
1 | 0988ea72-eb32-2e61-8344-99e2283c2728
2 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f
3 | 0cac7584-8d6f-2d13-8df8-c05e4307b418
4 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc
5 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721
6 | 10b17940-3938-2467-8a7a-958300ba83d3
7 | 137a8158-1db5-2cc0-8003-31c12610471e
8 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a
9 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2
10 | 20c993b7-698f-29c5-847d-c8cb8a685f5a
11 | 280d8ebb-6cc6-2788-9153-98959a2da801
12 | 321c8680-a5a8-2a84-85c2-816a26d59516
13 | 38770ca1-86d7-27b8-8619-ab66f67d9adf
14 | 422885b3-192d-25fc-84c9-9b80eea1752d
15 | 4238490c-60a7-271e-9f38-3c651e3b3912
16 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4
17 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75
18 | 5341b7e3-8a66-2cdd-8709-66a2159f0017
19 | 5630cfcf-12bf-2860-8784-83d28a611a83
20 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f
21 | 6bde6081-9162-246f-8c4e-ffaf709d17b1
22 | 6bde60cb-9162-246f-8cf5-d04f7426e56f
23 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662
24 | 75c25975-9ca2-2844-9769-84677f46d4cf
25 | 7747a50c-9431-24e8-877d-e60c3a341cc2
26 | 8e0f1c28-9e28-2339-8584-ff06ff93c341
27 | 8eabc45f-5af7-2f32-8528-640861d2a135
28 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7
29 | ab835fae-54c6-29a1-995e-b06cfc555786
30 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e
31 | b1f23308-d255-2761-94da-981d962c6bf8
32 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64
33 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8
34 | bf9a3da2-45a5-2e80-8219-1f0a216399fe
35 | bf9a3de9-45a5-2e80-8022-277108d67404
36 | c7895f27-339c-2d13-836b-c12dca280261
37 | c7895f7c-339c-2d13-819f-3bb0b26c91f6
38 | c92fb5b5-f771-2064-8570-dbe16cb33764
39 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42
40 | ddc73797-765b-241a-9e2c-097c5989baf6
41 | fcf66d7b-622d-291c-86b8-7db96aebcee3
42 | 4138582f-a238-2435-8332-6902542c2823
43 | 43b8cae1-6678-2e38-9865-c19c07c25015
44 | 4a9a43d2-7736-2874-874d-d0fad0570e19
45 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5
46 | 095821f7-e2c2-2de1-9568-b9ce59920e29
47 | e61b0e04-bada-2f31-82d6-72831a602ba7
--------------------------------------------------------------------------------
/data/3rscan/meta_data/test.txt:
--------------------------------------------------------------------------------
1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660
2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9
3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46
4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d
5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba
6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34
7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51
8 | 10b17942-3938-2467-8933-5d40ada6d445
9 | 13af338e-7397-2e54-84fc-fa003f91ac0c
10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3
11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91
12 | 20c993bb-698f-29c5-8569-0db38352364f
13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47
14 | 352e9c30-69fb-27a7-8b19-c703f0e190da
15 | 38770cb0-86d7-27b8-8466-1782505891fd
16 | 422885b9-192d-25fc-84f4-6f7c1afd29af
17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa
18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878
19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4
20 | 5630cfdc-12bf-2860-87b7-c7eab95718be
21 | 634b2183-f5d0-2fb7-87ef-c536720918dc
22 | 634d11cd-6833-255d-8c5c-2b0cf661494a
23 | 6a360555-fa53-2915-9721-bff473f98125
24 | 6bde608b-9162-246f-8d16-901b429b2563
25 | 6bde60d2-9162-246f-8dbf-dbd911924def
26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4
27 | 75c259a1-9ca2-2844-973c-adc28f935d5d
28 | 7747a514-9431-24e8-8505-5979f3f20906
29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e
30 | 8eabc463-5af7-2f32-8537-22977a89efdd
31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751
32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96
33 | ad408c83-84db-2095-8aa4-924f966af2dc
34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91
35 | b1f2330c-d255-2761-965e-d203c6e253c3
36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4
37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca
38 | bf9a3da4-45a5-2e80-8082-be634b241693
39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe
40 | c7895f2f-339c-2d13-8388-28e95af958de
41 | c7895f80-339c-2d13-8253-64b490296e49
42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b
43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f
44 | ddc7379d-765b-241a-9f0b-50b72d6cd829
45 | ebc42041-82a4-2113-8583-cc8c1be818b3
46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74
--------------------------------------------------------------------------------
/data/3rscan/meta_data/val.txt:
--------------------------------------------------------------------------------
1 | 0988ea72-eb32-2e61-8344-99e2283c2728
2 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f
3 | 0cac7584-8d6f-2d13-8df8-c05e4307b418
4 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc
5 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721
6 | 10b17940-3938-2467-8a7a-958300ba83d3
7 | 137a8158-1db5-2cc0-8003-31c12610471e
8 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a
9 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2
10 | 20c993b7-698f-29c5-847d-c8cb8a685f5a
11 | 280d8ebb-6cc6-2788-9153-98959a2da801
12 | 321c8680-a5a8-2a84-85c2-816a26d59516
13 | 38770ca1-86d7-27b8-8619-ab66f67d9adf
14 | 422885b3-192d-25fc-84c9-9b80eea1752d
15 | 4238490c-60a7-271e-9f38-3c651e3b3912
16 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4
17 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75
18 | 5341b7e3-8a66-2cdd-8709-66a2159f0017
19 | 5630cfcf-12bf-2860-8784-83d28a611a83
20 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f
21 | 6bde6081-9162-246f-8c4e-ffaf709d17b1
22 | 6bde60cb-9162-246f-8cf5-d04f7426e56f
23 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662
24 | 75c25975-9ca2-2844-9769-84677f46d4cf
25 | 7747a50c-9431-24e8-877d-e60c3a341cc2
26 | 8e0f1c28-9e28-2339-8584-ff06ff93c341
27 | 8eabc45f-5af7-2f32-8528-640861d2a135
28 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7
29 | ab835fae-54c6-29a1-995e-b06cfc555786
30 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e
31 | b1f23308-d255-2761-94da-981d962c6bf8
32 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64
33 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8
34 | bf9a3da2-45a5-2e80-8219-1f0a216399fe
35 | bf9a3de9-45a5-2e80-8022-277108d67404
36 | c7895f27-339c-2d13-836b-c12dca280261
37 | c7895f7c-339c-2d13-819f-3bb0b26c91f6
38 | c92fb5b5-f771-2064-8570-dbe16cb33764
39 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42
40 | ddc73797-765b-241a-9e2c-097c5989baf6
41 | fcf66d7b-622d-291c-86b8-7db96aebcee3
42 | 4138582f-a238-2435-8332-6902542c2823
43 | 43b8cae1-6678-2e38-9865-c19c07c25015
44 | 4a9a43d2-7736-2874-874d-d0fad0570e19
45 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5
46 | 095821f7-e2c2-2de1-9568-b9ce59920e29
47 | e61b0e04-bada-2f31-82d6-72831a602ba7
--------------------------------------------------------------------------------
/data/3rscan/prepare_bins_pkls.py:
--------------------------------------------------------------------------------
1 | import mmengine
2 | import os
3 | from tqdm.auto import tqdm
4 | import numpy as np
5 | import argparse
6 |
7 | COLOR_TO_LABEL = {
8 | (0, 0, 0): 'unknown',
9 | (174, 199, 232): 'wall',
10 | (152, 223, 138): 'floor',
11 | (31, 119, 180): 'cabinet',
12 | (255, 187, 120): 'bed',
13 | (188, 189, 34): 'chair',
14 | (140, 86, 75): 'sofa',
15 | (255, 152, 150): 'table',
16 | (214, 39, 40): 'door',
17 | (197, 176, 213): 'window',
18 | (148, 103, 189): 'bookshelf',
19 | (196, 156, 148): 'picture',
20 | (23, 190, 207): 'counter',
21 | (178, 76, 76): 'blinds',
22 | (247, 182, 210): 'desk',
23 | (66, 188, 102): 'shelves',
24 | (219, 219, 141): 'curtain',
25 | (140, 57, 197): 'dresser',
26 | (202, 185, 52): 'pillow',
27 | (51, 176, 203): 'mirror',
28 | (200, 54, 131): 'floor mat',
29 | (92, 193, 61): 'clothes',
30 | (78, 71, 183): 'ceiling',
31 | (172, 114, 82): 'books',
32 | (255, 127, 14): 'fridge',
33 | (91, 163, 138): 'television',
34 | (153, 98, 156): 'paper',
35 | (140, 153, 101): 'towel',
36 | (158, 218, 229): 'shower curtain',
37 | (100, 125, 154): 'box',
38 | (178, 127, 135): 'whiteboard',
39 | (120, 185, 128): 'person',
40 | (146, 111, 194): 'night stand',
41 | (44, 160, 44): 'toilet',
42 | (112, 128, 144): 'sink',
43 | (96, 207, 209): 'lamp',
44 | (227, 119, 194): 'bathtub',
45 | (213, 92, 176): 'bag',
46 | (94, 106, 211): 'structure',
47 | (82, 84, 163): 'furniture',
48 | (100, 85, 144): 'prop'
49 | }
50 |
51 | OBJ2SEM = {v: idx for idx, (k, v) in enumerate(COLOR_TO_LABEL.items())}
52 | OBJ2SEM['unknown'] = -1
53 | REMAIN_BB_LABELS = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]
54 |
55 | def create_dir(path):
56 | if not os.path.exists(path):
57 | os.mkdir(path)
58 |
59 |
60 | def _filter_bb(bb):
61 | final = []
62 | for i in bb:
63 | if i[-1] in REMAIN_BB_LABELS:
64 | final.append(i)
65 |
66 | if len(final) == 0:
67 | return np.zeros((0,7))
68 |
69 | return np.stack(final)
70 |
71 | def create_dirs(path):
72 | points = os.path.join(path, 'points')
73 | create_dir(points)
74 |
75 | semantic_mask = os.path.join(path, 'semantic_mask')
76 | create_dir(semantic_mask)
77 |
78 | instance_mask = os.path.join(path, 'instance_mask')
79 | create_dir(instance_mask)
80 |
81 | bboxs = os.path.join(path, 'bboxs')
82 | create_dir(bboxs)
83 |
84 | superpoints = os.path.join(path, 'superpoints')
85 | create_dir(superpoints)
86 | return {
87 | 'points': points,
88 | 'semantic_mask': semantic_mask,
89 | 'instance_mask': instance_mask,
90 | 'bboxs': bboxs,
91 | 'superpoints': superpoints
92 | }
93 |
94 |
95 |
96 | def rearrange_sup(sup):
97 | sup = sup.copy()
98 | unique_super = np.unique(sup)
99 |
100 | for idx, un in enumerate(unique_super):
101 | ind = np.where(sup == un)[0]
102 | sup[ind] = idx
103 |
104 | return sup
105 |
106 |
107 | def create_metainfo():
108 |
109 | return {
110 | 'categories': OBJ2SEM,
111 | 'dataset': '3RScan',
112 | 'info_version': '1.0'
113 | }
114 |
115 | def create_data_list(split, splits, bins_path):
116 | scenes = splits[split]
117 | final_list = []
118 | for scene in tqdm(scenes):
119 |
120 | lidar_points = {
121 | 'num_pts_feats': 6,
122 | 'lidar_path': f'{scene}.bin'
123 | }
124 | raw_bboxs = np.load(os.path.join(bins_path['bboxs'], f'{scene}.npy'))
125 | instances = []
126 | for rb in raw_bboxs:
127 | if len(rb) == 0:
128 | instances = []
129 |
130 | else:
131 | instances.append({
132 | 'bbox_3d': rb[:6].tolist(),
133 | 'bbox_label_3d': int(rb[-1])
134 | })
135 |
136 | final_list.append({
137 | 'lidar_points': lidar_points,
138 | 'instances': instances,
139 | 'pts_semantic_mask_path': f'{scene}.bin',
140 | 'pts_instance_mask_path': f'{scene}.bin',
141 | 'axis_align_matrix': np.eye(4)
142 | })
143 |
144 | return final_list
145 |
146 | def create_pkl_file(path_to_save, split, splits, bins_path, pkl_prefix = '3rscan'):
147 | metainfo = create_metainfo()
148 | data_list = create_data_list(split, splits, bins_path)
149 | anno = {
150 | 'metainfo': metainfo,
151 | 'data_list': data_list
152 | }
153 | filename = os.path.join(path_to_save, f'{pkl_prefix}_infos_{split}.pkl')
154 | mmengine.dump(anno, filename, 'pkl')
155 |
156 |
157 | if __name__ == '__main__':
158 | parser = argparse.ArgumentParser()
159 | parser.add_argument(
160 | '--path_to_data',
161 | required=True,
162 | help='Path to preprocessed raw data',
163 | type=str,
164 | )
165 |
166 | parser.add_argument(
167 | '--path_to_save_bins',
168 | required=True,
169 | help='Enter here the path where to save bins and pkls',
170 | type=str,
171 | )
172 |
173 | parser.add_argument(
174 | '--path_to_splits',
175 | default='meta_data/split/',
176 | help='Path to train/val/test splits',
177 | type=str,
178 | )
179 |
180 | args = parser.parse_args()
181 | print(args)
182 |
183 | path_to_splits = args.path_to_splits
184 | path_to_raw_data = args.path_to_data
185 |
186 | path_to_save_data = args.path_to_save_bins
187 | create_dir(path_to_save_data)
188 | bins_path = create_dirs(path_to_save_data)
189 |
190 | with open(path_to_splits + '/train.txt') as train_file:
191 | train_scenes = train_file.read().splitlines()
192 | with open(path_to_splits + '/val.txt') as val_file:
193 | val_scenes = val_file.read().splitlines()
194 | with open(path_to_splits + '/test.txt') as test_file:
195 | test_scenes = test_file.read().splitlines()
196 |
197 | splits = {
198 | 'train': train_scenes,
199 | 'val': val_scenes,
200 | 'test': test_scenes
201 | }
202 |
203 | scene_ids = os.listdir(path_to_raw_data)
204 |
205 | for si in tqdm(scene_ids):
206 | temp_path = os.path.join(path_to_raw_data, si)
207 | point_cloud = np.load(temp_path + f'/{si}_aligned_vert.npy')
208 | sem_label = np.load(temp_path + f'/{si}_sem_label.npy')[:, 0]
209 | ins_label = np.load(temp_path + f'/{si}_ins_label.npy')[:, 0]
210 | bboxs = np.load(temp_path + f'/{si}_aligned_bbox.npy')
211 | superpoints = np.load(temp_path + f'/{si}_superpoints.npy')
212 | superpoints = rearrange_sup(superpoints)
213 | bboxs = _filter_bb(bboxs)
214 |
215 | superpoints = np.load(temp_path + f'/{si}_superpoints.npy')
216 | superpoints = rearrange_sup(superpoints)
217 |
218 | point_cloud.astype(np.float32).tofile(os.path.join(bins_path['points'],
219 | f'{si}.bin'))
220 | sem_label.astype(np.int64).tofile(os.path.join(bins_path['semantic_mask'],
221 | f'{si}.bin'))
222 | ins_label.astype(np.int64).tofile(os.path.join(bins_path['instance_mask'],
223 | f'{si}.bin'))
224 | superpoints.astype(np.int64).tofile(os.path.join(bins_path['superpoints'],
225 | f'{si}.bin'))
226 | np.save(os.path.join(bins_path['bboxs'], f'{si}.npy'), bboxs)
227 |
228 |
229 | create_pkl_file(path_to_save_data, 'train', splits, bins_path)
230 | create_pkl_file(path_to_save_data, 'val', splits, bins_path)
231 | create_pkl_file(path_to_save_data, 'test', splits, bins_path)
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
--------------------------------------------------------------------------------
/data/3rscan/preprocess_raw_data.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings("ignore", category=DeprecationWarning)
3 |
4 | import pickle
5 | import sys
6 | import os
7 | import argparse
8 | import glob
9 | import json
10 | import numpy as np
11 | import pandas as pd
12 | from concurrent.futures import ProcessPoolExecutor
13 | from itertools import repeat
14 |
15 | from utils import read_objmesh, point_indices_from_group
16 |
17 | #CLOUD_FILE_PFIX = 'mesh.refined.v2.color'
18 | CLOUD_FILE_PFIX = 'mesh.refined.v2'
19 | AGGREGATIONS_FILE_PFIX = 'semseg.v2.json'
20 | SEGMENTS_FILE_PFIX = 'mesh.refined.0.010000.segs.v2.json'
21 |
22 |
23 | def create_dir(path):
24 | if not os.path.exists(path):
25 | os.mkdir(path)
26 |
27 | def read_transform_matrix(Scan3RJson_PATH):
28 | rescan2ref = {}
29 | with open(Scan3RJson_PATH , "r") as read_file:
30 | data = json.load(read_file)
31 | for scene in data:
32 | for scans in scene["scans"]:
33 | if "transform" in scans:
34 | rescan2ref[scans["reference"]] = \
35 | np.array(scans["transform"]).reshape(4,4).T
36 | return rescan2ref
37 |
38 | def get_reference_dic(Scan3RJson_PATH):
39 | meta_data = json.load(open(Scan3RJson_PATH))
40 | reference_dic = {}
41 | for record in meta_data:
42 | reference = record['reference']
43 | reference_dic[reference] = reference
44 | if 'scans' not in record:
45 | continue
46 | for scan in record['scans']:
47 | reference_dic[scan['reference']] = reference
48 | return reference_dic
49 |
50 | def handle_process(scene_path, output_path, labels_pd,
51 | train_scenes, val_scenes, test_scenes):
52 | scene_id = scene_path.split('/')[-1]
53 | obj_path = os.path.join(scene_path, f'{CLOUD_FILE_PFIX}.obj')
54 | aggregations_file = os.path.join(scene_path, f'{AGGREGATIONS_FILE_PFIX}')
55 | segments_file = os.path.join(scene_path, f'{SEGMENTS_FILE_PFIX}')
56 | # Rotating the mesh to axis aligned
57 | rot_matrix = rescan2ref.get(scene_id, np.identity(4))
58 |
59 | ref_scene_id = reference_dic[scene_id]
60 | ref_rot_matrix = reference_axis_align_matrix_dic[ref_scene_id]
61 |
62 | if scene_id in train_scenes:
63 | split_name = 'train'
64 | elif scene_id in val_scenes:
65 | split_name = 'val'
66 | elif scene_id in test_scenes:
67 | split_name = 'test'
68 | else:
69 | print('*', scene_id,
70 | 'does not exist in [train, val, test] that have seg files')
71 | return
72 |
73 | print('Processing: ', scene_id, 'in', split_name)
74 |
75 | pointcloud, faces_array = read_objmesh(obj_path)
76 | points = pointcloud[:, :3]
77 | colors = pointcloud[:, 3:6]
78 |
79 | # Rotate PC to axis aligned
80 | r_points = pointcloud[:, :3].transpose()
81 | r_points = np.append(r_points, np.ones((1,
82 | r_points.shape[1])), axis=0)
83 | # reference align
84 | r_points = np.dot(rot_matrix, r_points)
85 | # reference axis align
86 | r_points = np.dot(ref_rot_matrix, r_points)
87 | ##### !
88 | aligned_pointcloud = np.append(r_points.transpose()[:, :3],
89 | pointcloud[:, 3:], axis=1)
90 |
91 | # Generate new labels
92 | labelled_pc = np.zeros((pointcloud.shape[0], 1)) - 1 # -1: unannotated
93 | instance_ids = np.zeros((pointcloud.shape[0], 1)) - 1 # -1: unannotated
94 |
95 | if os.path.isfile(aggregations_file):
96 | # Load segments file
97 | with open(segments_file) as f:
98 | segments = json.load(f)
99 | seg_indices = np.array(segments['segIndices'])
100 | # Load Aggregations file
101 | with open(aggregations_file) as f:
102 | aggregation = json.load(f)
103 | seg_groups = np.array(aggregation['segGroups'])
104 |
105 | num_instances = len(seg_groups)
106 | instance_bboxes = np.zeros((num_instances, 7))
107 | aligned_instance_bboxes = np.zeros((num_instances, 7))
108 |
109 | for obj_idx, group in enumerate(seg_groups):
110 | segment_points, aligned_segment_points, p_inds, label_id = \
111 | point_indices_from_group(pointcloud, aligned_pointcloud,
112 | seg_indices, group, labels_pd)
113 | labelled_pc[p_inds] = label_id
114 |
115 | if len(segment_points) == 0: continue
116 |
117 | xmin = np.min(segment_points[:,0])
118 | ymin = np.min(segment_points[:,1])
119 | zmin = np.min(segment_points[:,2])
120 | xmax = np.max(segment_points[:,0])
121 | ymax = np.max(segment_points[:,1])
122 | zmax = np.max(segment_points[:,2])
123 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2,
124 | (zmin+zmax)/2, xmax-xmin,
125 | ymax-ymin, zmax-zmin, label_id]) # also include object id
126 | instance_bboxes[obj_idx,:] = bbox
127 |
128 | if len(aligned_segment_points) == 0: continue
129 |
130 | instance_ids[p_inds] = obj_idx
131 | xmin = np.min(aligned_segment_points[:,0])
132 | ymin = np.min(aligned_segment_points[:,1])
133 | zmin = np.min(aligned_segment_points[:,2])
134 | xmax = np.max(aligned_segment_points[:,0])
135 | ymax = np.max(aligned_segment_points[:,1])
136 | zmax = np.max(aligned_segment_points[:,2])
137 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2,
138 | (zmin+zmax)/2, xmax-xmin, ymax-ymin,
139 | zmax-zmin, label_id]) # also include object id
140 | aligned_instance_bboxes[obj_idx,:] = bbox
141 | else:
142 | # use zero as placeholders for the test scene
143 | #print("use placeholders")
144 | instance_bboxes = np.zeros((1, 7))
145 | aligned_instance_bboxes = np.zeros((1, 7))
146 |
147 | labelled_pc = labelled_pc.astype(int)
148 | instance_ids = instance_ids.astype(int)
149 | assert np.all(instance_ids[np.where(labelled_pc == -1)[0]] == -1)
150 | if -1 in np.unique(instance_ids):
151 | assert len(instance_bboxes) == len(np.unique(instance_ids)[1:])
152 |
153 | else:
154 | assert len(instance_bboxes) == len(np.unique(instance_ids))
155 |
156 | if (np.any(np.isnan(pointcloud)) or not np.all(np.isfinite(pointcloud))):
157 | raise ValueError('nan')
158 |
159 | output_path = os.path.join(output_path, f'{scene_id}')
160 | create_dir(os.path.join(output_path))
161 | output_prefix = os.path.join(output_path, f'{scene_id}')
162 | np.save(output_prefix+'_aligned_vert.npy', aligned_pointcloud[:, :6])
163 | np.save(output_prefix+'_sem_label.npy', labelled_pc)
164 | np.save(output_prefix+'_ins_label.npy', instance_ids)
165 | np.save(output_prefix+'_aligned_bbox.npy', aligned_instance_bboxes)
166 | np.save(output_prefix+'_superpoints.npy', seg_indices)
167 |
168 |
169 | if __name__ == '__main__':
170 | parser = argparse.ArgumentParser()
171 | parser.add_argument('--dataset_root', default='../data/3rscan/',
172 | help='Path to the 3RScan dataset containing scene folders')
173 | parser.add_argument('--output_root', default='preprocessed_raw_data',
174 | help='Output path where processed data will be located')
175 | parser.add_argument('--label_map_file',
176 | default='meta_data/3RScan.v2_Semantic-Classes-Mapping.csv',
177 | help='path to scannetv2-labels.combined.tsv')
178 | parser.add_argument('--num_workers', default=12,
179 | type=int, help='The number of parallel workers')
180 | parser.add_argument('--splits_path', default='meta_data/split',
181 | help='Where the txt files with the train/val splits live')
182 | config = parser.parse_args()
183 |
184 | # Load label map
185 | labels_pd = pd.read_csv(config.label_map_file, sep=',', header=1)
186 |
187 | # Load train/val splits
188 | with open(config.splits_path + '/train.txt') as train_file:
189 | train_scenes = train_file.read().splitlines()
190 | with open(config.splits_path + '/val.txt') as val_file:
191 | val_scenes = val_file.read().splitlines()
192 | with open(config.splits_path + '/test.txt') as test_file:
193 | test_scenes = test_file.read().splitlines()
194 |
195 | META_FILE = 'meta_data/3RScan.json'
196 | rescan2ref = read_transform_matrix(META_FILE)
197 | reference_dic = get_reference_dic(META_FILE)
198 |
199 | with open('./meta_data/reference_axis_align_matrix.pkl', 'rb') as f:
200 | reference_axis_align_matrix_dic = pickle.load(f)
201 |
202 | os.makedirs(config.output_root, exist_ok=True)
203 |
204 | # Load scene paths
205 | scene_paths = sorted(glob.glob(config.dataset_root + '/*'))
206 |
207 | # Preprocess data.
208 | pool = ProcessPoolExecutor(max_workers=config.num_workers)
209 | print('Processing scenes...')
210 | _ = list(pool.map(handle_process, scene_paths,
211 | repeat(config.output_root), repeat(labels_pd),
212 | repeat(train_scenes), repeat(val_scenes),
213 | repeat(test_scenes)))
214 |
--------------------------------------------------------------------------------
/data/arkitscenes/README.md:
--------------------------------------------------------------------------------
1 | ## Prepare ARKitScenes Data for Indoor 3D Detection
2 |
3 | For now we only support offline benchmark with a single reconstructed point clound for each scene. Online benchmark for single RGB-D frame detection can be supported in the future. The `utils` directory is used unchanged from [ARKitScenes](https://github.com/apple/ARKitScenes/tree/main/threedod/benchmark_scripts/utils), except fixing a single [issue](https://github.com/apple/ARKitScenes/issues/53).
4 |
5 | 1. Download data from the official [ARKitScenes](https://github.com/apple/ARKitScenes). From their repo you may run:
6 | ```
7 | python download_data.py 3dod --video-id-csv threedod/3dod_train_val_splits.csv
8 | ```
9 |
10 | After this step you have the following file structure here:
11 | ```
12 | 3dod
13 | ├── metadata.csv
14 | ├── Training
15 | │ ├── xxxxxxxx
16 | │ │ ├── xxxxxxxx_3dod_annotation.json
17 | │ │ ├── xxxxxxxx_3dod_mesh.ply
18 | │ │ ├── xxxxxxxx_frames
19 | ├── Validation
20 | │ ├── xxxxxxxx
21 | │ │ ├── xxxxxxxx_3dod_annotation.json
22 | │ │ ├── xxxxxxxx_3dod_mesh.ply
23 | │ │ ├── xxxxxxxx_frames
24 | ```
25 |
26 | 2. Preprocess data for offline benchmark with our adapted script:
27 | ```
28 | python data_prepare_offline.py
29 | ```
30 | After this step you have the following file structure here:
31 | ```
32 | offline_prepared_data
33 | ├── xxxxxxxx_point.npy
34 | ├── xxxxxxxx_bbox.npy
35 | ├── xxxxxxxx_label.npy
36 | ```
37 |
38 | 3. Enter the project root directory, generate training and validation data by running:
39 | ```
40 | python tools/create_data.py arkitscenes --root-path ./data/arkitscenes --out-dir ./data/arkitscenes --extra-tag arkitscenes-offline
41 | ```
42 | Overall you achieve the following file structure in `data` directory:
43 | ```
44 | arkitscenes
45 | ├── offline_prepared_data
46 | │ ├── xxxxxxxx_point.bin
47 | ├── arkitscenes_offline_train_infos.pkl
48 | ├── arkitscenes_offline_val_infos.pkl
49 |
50 | ```
51 |
--------------------------------------------------------------------------------
/data/arkitscenes/arkitscenes_data_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | from concurrent import futures as futures
3 | from os import path as osp
4 | import mmengine
5 | import numpy as np
6 | from typing import List, Optional
7 |
8 |
9 | class ARKitScenesOfflineData:
10 | """ARKitScenesOfflineData
11 | Generate arkitscenes infos (offline benchmark) for indoor_converter.
12 |
13 | Args:
14 | root_path (str): Root path of the raw data.
15 | split (str): Stplit type 'train' or 'val'.
16 | """
17 | def __init__(self, root_path: str, split: str):
18 | self.split = split
19 | raw_path = os.path.join(root_path, '3dod')
20 | self.data_path = os.path.join(root_path, 'offline_prepared_data')
21 | assert split in ['train', 'val']
22 | class_names = [
23 | 'cabinet', 'refrigerator', 'shelf', 'stove', 'bed',
24 | 'sink', 'washer', 'toilet', 'bathtub', 'oven',
25 | 'dishwasher', 'fireplace', 'stool', 'chair', 'table',
26 | 'tv_monitor', 'sofa'
27 | ]
28 | self.name2class = {
29 | name: i
30 | for i, name in enumerate(class_names)
31 | }
32 | all_id_list = set(
33 | map(lambda x: x.split('_')[0],
34 | os.listdir(self.data_path)))
35 | split_dir = 'Training' if split == 'train' else 'Validation'
36 | split_id_list = set(os.listdir(osp.join(raw_path, split_dir)))
37 | self.sample_id_list = all_id_list & split_id_list
38 | print(f'{split}, raw ids: {len(split_id_list)}, '
39 | f'processed ids: {len(self.sample_id_list)}')
40 |
41 | def __len__(self) -> int:
42 | """Length of the dataset."""
43 | return len(self.sample_id_list)
44 |
45 | def get_infos(self,
46 | num_workers: int = 4,
47 | has_label: bool = True,
48 | sample_id_list: Optional[List[str]] = None) -> dict:
49 | """Get data infos.
50 | This method gets information from the raw data.
51 |
52 | Args:
53 | num_workers (int, optional): Number of threads to be used.
54 | Default: 4.
55 | has_label (bool, optional): Whether the data has label.
56 | Default: True.
57 | sample_id_list (list[str], optional): Index list of the sample.
58 | Default: None.
59 |
60 | Returns:
61 | dict: Information of the raw data.
62 | """
63 | def process_single_scene(sample_idx):
64 | print(f'{self.split} sample_idx: {sample_idx}', end='\r')
65 | info = {
66 | 'lidar_points': {
67 | 'num_pts_feats': 6,
68 | 'lidar_path': f'{sample_idx}_point.bin'
69 | }
70 | }
71 | boxes = np.load(
72 | osp.join(self.data_path, f'{sample_idx}_bbox.npy'))
73 | labels = np.load(
74 | osp.join(self.data_path, f'{sample_idx}_label.npy'))
75 | instances = []
76 | for box, label in zip(boxes, labels):
77 | # follow heading angle of DepthInstance3DBoxes
78 | box[-1] = -box[-1]
79 | instances.append({
80 | 'bbox_3d': box.tolist(),
81 | 'bbox_label_3d': self.name2class[label]
82 | })
83 | info['instances'] = instances
84 | return info
85 |
86 | sample_id_list = sample_id_list if sample_id_list is not None \
87 | else self.sample_id_list
88 | with futures.ThreadPoolExecutor(num_workers) as executor:
89 | infos = executor.map(process_single_scene, list(sample_id_list))
90 |
91 | infos = {
92 | 'metainfo': {
93 | 'categories': self.name2class,
94 | 'dataset': 'arkitscenes_offline',
95 | 'info_version': '1.0'
96 | },
97 | 'data_list': list(infos)
98 | }
99 | return infos
100 |
101 |
102 | # do not want to add create_annotations.py to projects
103 | if __name__ == '__main__':
104 | root_path = '/opt/project/data/arkitscenes'
105 | out_path = '/opt/project/work_dirs/tmp'
106 | infos_train = ARKitScenesOfflineData(
107 | root_path=root_path, split='train').get_infos()
108 | train_filename = osp.join(out_path, 'arkitscenes_offline_infos_train.pkl')
109 | mmengine.dump(infos_train, train_filename, 'pkl')
110 | infos_val = ARKitScenesOfflineData(
111 | root_path=root_path, split='val').get_infos()
112 | val_filename = osp.join(out_path, 'arkitscenes_offline_infos_val.pkl')
113 | mmengine.dump(infos_val, val_filename, 'pkl')
114 |
--------------------------------------------------------------------------------
/data/arkitscenes/data_prepare_offline.py:
--------------------------------------------------------------------------------
1 | # adapted from https://github.com/apple/ARKitScenes/blob/main/threedod/benchmark_scripts/data_prepare_offline.py
2 | import argparse
3 | import numpy as np
4 | import os
5 | import pandas as pd
6 | from functools import partial
7 | from tqdm.contrib.concurrent import process_map
8 |
9 | import utils.box_utils as box_utils
10 | import utils.pc_utils as pc_utils
11 | import utils.taxonomy as taxonomy
12 | from utils.tenFpsDataLoader import TenFpsDataLoader, extract_gt
13 |
14 | # we keep this rough grid_size=0.05 from the original benchmark,
15 | # however accuracy might be better with smaller grid_size
16 | def accumulate_wrapper(loader, grid_size=0.05):
17 | """
18 | Args:
19 | loader: TenFpsDataLoader
20 | Returns:
21 | world_pc: (N, 3)
22 | xyz in world coordinate system
23 | world_sem: (N, d)
24 | semantic for each point
25 | grid_size: float
26 | keep only one point in each (g_size, g_size, g_size) grid
27 | """
28 | world_pc, world_rgb, poses = np.zeros((0, 3)), np.zeros((0, 3)), []
29 | for i in range(len(loader)):
30 | frame = loader[i]
31 | image_path = frame["image_path"]
32 | pcd = frame["pcd"] # in world coordinate
33 | pose = frame["pose"]
34 | rgb = frame["color"]
35 |
36 | world_pc = np.concatenate((world_pc, pcd), axis=0)
37 | world_rgb = np.concatenate((world_rgb, rgb), axis=0)
38 |
39 | choices = pc_utils.down_sample(world_pc, grid_size)
40 | world_pc = world_pc[choices]
41 | world_rgb = world_rgb[choices]
42 |
43 | return world_pc, world_rgb, poses
44 |
45 |
46 | def main(scene_id, split, data_root, output_dir):
47 | # step 0.0: output folder, make dir
48 | os.makedirs(output_dir, exist_ok=True)
49 | point_output_path = os.path.join(output_dir, f"{scene_id}_point.npy")
50 | bbox_output_path = os.path.join(output_dir, f"{scene_id}_bbox.npy")
51 | label_output_path = os.path.join(output_dir, f"{scene_id}_label.npy")
52 | # skip already processed scenes
53 | if os.path.exists(point_output_path) \
54 | and os.path.exists(bbox_output_path) \
55 | and os.path.exists(label_output_path):
56 | return
57 |
58 | # step 0.1: get annotation first,
59 | # if skipped or no gt boxes, we will not bother calling further steps
60 | gt_path = os.path.join(data_root, split, scene_id, f"{scene_id}_3dod_annotation.json")
61 | skipped, boxes_corners, _, _, labels, _ = extract_gt(gt_path)
62 | if skipped or boxes_corners.shape[0] == 0:
63 | return
64 |
65 | # step 0.2: data
66 | data_path = os.path.join(data_root, split, scene_id, f"{scene_id}_frames")
67 | loader = TenFpsDataLoader(
68 | dataset_cfg=None,
69 | class_names=taxonomy.class_names,
70 | root_path=data_path)
71 |
72 | # step 1: accumulate points and save points
73 | world_pc, world_rgb, _ = accumulate_wrapper(loader)
74 | # despite original benchmark script ignores rgb here, we save it
75 | # to allow user to use or skip it for trainig / testing / visualization
76 | points = np.concatenate((world_pc, world_rgb), axis=1).astype(np.float32)
77 | points.tofile(point_output_path)
78 |
79 | # step 2: save labels and boxes
80 | # not sure if we need uids, but keep them followinig original benchmark
81 | boxes = box_utils.corners_to_boxes(boxes_corners)
82 | np.save(bbox_output_path, boxes)
83 | np.save(label_output_path, labels)
84 |
85 |
86 | if __name__ == "__main__":
87 | parser = argparse.ArgumentParser()
88 | parser.add_argument(
89 | "--data-root",
90 | default="./3dod",
91 | help="input folder with ./Training/{scene_id}, ./Validation/{scene_id}"
92 | "and metadata.json"
93 | )
94 | parser.add_argument(
95 | "--output-dir",
96 | default="./offline_prepared_data",
97 | help="directory to save the data and annoation"
98 | )
99 | parser.add_argument(
100 | "--max-workers",
101 | default=1,
102 | type=int,
103 | help="number of parallel processes"
104 | )
105 |
106 | args = parser.parse_args()
107 | df = pd.read_csv(os.path.join(args.data_root, "metadata.csv"))
108 | scene_ids = list(map(str, df["video_id"].to_list()))
109 | splits = list(map(str, df["fold"].to_list()))
110 | process_map(
111 | partial(main, data_root=args.data_root, output_dir=args.output_dir),
112 | scene_ids, splits, max_workers=args.max_workers)
113 |
--------------------------------------------------------------------------------
/data/arkitscenes/misc.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | from collections import defaultdict
5 | from plyfile import PlyData
6 | from tqdm import tqdm
7 |
8 | import mmengine
9 | from mmdet3d.structures import DepthInstance3DBoxes
10 | from mmdet3d.apis import inference_detector, init_model
11 | from projects.TR3D.tr3d.local_visualizer import TR3DLocalVisualizer
12 | from utils.box_utils import boxes_to_corners_3d
13 | from utils.pc_utils import down_sample
14 |
15 |
16 | def verify_corners():
17 | a = np.random.rand(100, 7)
18 | mmdet3d_corners = DepthInstance3DBoxes(a, origin=(.5, .5, .5)).corners.numpy()
19 | a[:, -1] = -a[:, -1]
20 | arkiscenes_corners = boxes_to_corners_3d(a)[:, [2, 6, 7, 3, 1, 5, 4, 0]]
21 | assert np.abs(arkiscenes_corners - mmdet3d_corners).max() < 1e-5
22 |
23 |
24 | def print_object_statistics(path):
25 | print(path)
26 | infos = mmengine.load(path)
27 | categories = infos['metainfo']['categories']
28 | inverse_categories = {v: k for k, v in categories.items()}
29 | data = {c: defaultdict(list) for c in categories}
30 | for d in infos['data_list']:
31 | for instance in d['instances']:
32 | category_data = data[inverse_categories[instance['bbox_label_3d']]]
33 | box = instance['bbox_3d']
34 | category_data['xy_min'].append(min(box[3], box[4]))
35 | category_data['xy_max'].append(max(box[3], box[4]))
36 | category_data['z'].append(box[5])
37 |
38 | quantiles = (0, .75, 1)
39 | columns = ['category', 'N']
40 | df_data = []
41 | for key in category_data.keys():
42 | for q in quantiles:
43 | columns.append(f'{key}.{q}')
44 | for category, category_data in data.items():
45 | table_row = [category, len(category_data['z'])]
46 | for key in category_data.keys():
47 | for q in quantiles:
48 | value = np.quantile(category_data[key], q)
49 | table_row.append(value) # f'{value:.4f}'
50 | df_data.append(table_row)
51 | df = pd.DataFrame(data=df_data, columns=columns)
52 | pd.set_option('display.precision', 3)
53 | target = df[['xy_max.0.75', 'z.0.75']].to_numpy().max(axis=1)
54 | target = target > np.median(target)
55 | df['target'] = target
56 | print(df)
57 | print('target:', target.astype(int).tolist())
58 |
59 |
60 | def aggregate_multiple_ply(path, grid_size=0.05):
61 | world_pc, world_rgb = np.zeros((0, 3)), np.zeros((0, 3))
62 | for file_name in tqdm(os.listdir(path)):
63 | data = PlyData.read(os.path.join(path, file_name))
64 | pc = np.stack((
65 | data['vertex']['x'],
66 | data['vertex']['y'],
67 | data['vertex']['z']), axis=1)
68 | rgb = np.stack((
69 | data['vertex']['red'],
70 | data['vertex']['green'],
71 | data['vertex']['blue']), axis=1)
72 | world_pc = np.concatenate((world_pc, pc))
73 | world_rgb = np.concatenate((world_rgb, rgb))
74 | choices = down_sample(world_pc, grid_size)
75 | world_pc = world_pc[choices]
76 | world_rgb = world_rgb[choices]
77 | points = np.concatenate((world_pc, world_rgb), axis=1).astype(np.float32)
78 | file_name = f'{os.path.basename(os.path.dirname(path))}.bin'
79 | points.tofile(os.path.join('work_dirs/tmp/tmp', file_name))
80 |
81 |
82 | def predict(pcd_path, config_path, checkpoint_path):
83 | model = init_model(config_path, checkpoint_path, device='cuda:0',
84 | cfg_options=dict(test_dataloader=dict(dataset=dict(box_type_3d='depth'))))
85 | points = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)
86 | points = np.concatenate((points[:, :3], points[:, 3:] / 255), axis=1)
87 | result = inference_detector(model, points)
88 | TR3DLocalVisualizer().add_datasample(
89 | name='',
90 | data_input=dict(points=points),
91 | data_sample=result[0],
92 | draw_gt=False,
93 | out_file=pcd_path,
94 | vis_task='lidar_det')
95 |
96 | if __name__ == '__main__':
97 | # verify_corners()
98 | # print_object_statistics('/opt/project/data/arkitscenes/arkitscenes_offline_infos_train.pkl')
99 | # print_object_statistics('/opt/project/data/arkitscenes/arkitscenes_offline_infos_val.pkl')
100 | aggregate_multiple_ply('data/tmp/230621_sr_room_samples/Jun18at10-18PM-poly/pcds')
101 | predict(
102 | 'work_dirs/tmp/tmp/Jun18at10-18PM-poly.bin',
103 | 'projects/arkitscenes/configs/tr3d_1xb16_arkitscenes-offline-3d-4class.py',
104 | 'work_dirs/tmp/tr3d_arkitscenes_epoch10.pth')
105 |
--------------------------------------------------------------------------------
/data/arkitscenes/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/arkitscenes/utils/__init__.py
--------------------------------------------------------------------------------
/data/arkitscenes/utils/box_utils.py:
--------------------------------------------------------------------------------
1 | # TODO: Explain 8 corners logic at the top and use it consistently
2 | # Add comments of explanation
3 |
4 | import numpy as np
5 | import scipy.spatial
6 |
7 | from .rotation import rotate_points_along_z
8 |
9 |
10 | def get_size(box):
11 | """
12 | Args:
13 | box: 8x3
14 | Returns:
15 | size: [dx, dy, dz]
16 | """
17 | distance = scipy.spatial.distance.cdist(box[0:1, :], box[1:5, :])
18 | l = distance[0, 2]
19 | w = distance[0, 0]
20 | h = distance[0, 3]
21 | return [l, w, h]
22 |
23 |
24 | def get_heading_angle(box):
25 | """
26 | Args:
27 | box: (8, 3)
28 | Returns:
29 | heading_angle: float
30 | """
31 | a = box[0, 0] - box[1, 0]
32 | b = box[0, 1] - box[1, 1]
33 |
34 | heading_angle = np.arctan2(a, b)
35 | return heading_angle
36 |
37 |
38 | def compute_box_3d(size, center, rotmat):
39 | """Compute corners of a single box from rotation matrix
40 | Args:
41 | size: list of float [dx, dy, dz]
42 | center: np.array [x, y, z]
43 | rotmat: np.array (3, 3)
44 | Returns:
45 | corners: (8, 3)
46 | """
47 | l, h, w = [i / 2 for i in size]
48 | center = np.reshape(center, (-1, 3))
49 | center = center.reshape(3)
50 | x_corners = [l, l, -l, -l, l, l, -l, -l]
51 | y_corners = [h, -h, -h, h, h, -h, -h, h]
52 | z_corners = [w, w, w, w, -w, -w, -w, -w]
53 | corners_3d = np.dot(
54 | np.transpose(rotmat), np.vstack([x_corners, y_corners, z_corners])
55 | )
56 | corners_3d[0, :] += center[0]
57 | corners_3d[1, :] += center[1]
58 | corners_3d[2, :] += center[2]
59 | return np.transpose(corners_3d)
60 |
61 |
62 | def corners_to_boxes(corners3d):
63 | """
64 | 7 -------- 4
65 | /| /|
66 | 6 -------- 5 .
67 | | | | |
68 | . 3 -------- 0
69 | |/ |/
70 | 2 -------- 1
71 | Args:
72 | corners: (N, 8, 3), vertex order shown in figure above
73 |
74 | Returns:
75 | boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading]
76 | with (x, y, z) is the box center
77 | (dx, dy, dz) as the box size
78 | and heading as the clockwise rotation angle
79 | """
80 |
81 | boxes3d = np.zeros((corners3d.shape[0], 7))
82 | for i in range(corners3d.shape[0]):
83 | boxes3d[i, :3] = np.mean(corners3d[i, :, :], axis=0)
84 | boxes3d[i, 3:6] = get_size(corners3d[i, :, :])
85 | boxes3d[i, 6] = get_heading_angle(corners3d[i, :, :])
86 |
87 | return boxes3d
88 |
89 |
90 | def boxes_to_corners_3d(boxes3d):
91 | """
92 | 7 -------- 4
93 | /| /|
94 | 6 -------- 5 .
95 | | | | |
96 | . 3 -------- 0
97 | |/ |/
98 | 2 -------- 1
99 | Args:
100 | boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading],
101 | (x, y, z) is the box center
102 |
103 | Returns:
104 | corners: (N, 8, 3)
105 | """
106 | template = np.array([[1, 1, -1],
107 | [1, -1, -1],
108 | [-1, -1, -1],
109 | [-1, 1, -1],
110 | [1, 1, 1],
111 | [1, -1, 1],
112 | [-1, -1, 1],
113 | [-1, 1, 1]]
114 | ) / 2.
115 |
116 | # corners3d: of shape (N, 3, 8)
117 | corners3d = np.tile(boxes3d[:, None, 3:6], (1, 8, 1)) * template[None, :, :]
118 |
119 | corners3d = rotate_points_along_z(corners3d.reshape(-1, 8, 3), boxes3d[:, 6]).reshape(
120 | -1, 8, 3
121 | )
122 | corners3d += boxes3d[:, None, 0:3]
123 |
124 | return corners3d
125 |
126 |
127 | def points_in_boxes(points, boxes):
128 | """
129 | Args:
130 | pc: np.array (n, 3+d)
131 | boxes: np.array (m, 8, 3)
132 | Returns:
133 | mask: np.array (n, m) of type bool
134 | """
135 | if len(boxes) == 0:
136 | return np.zeros([points.shape[0], 1], dtype=np.bool)
137 | points = points[:, :3] # get xyz
138 | # u = p6 - p5
139 | u = boxes[:, 6, :] - boxes[:, 5, :] # (m, 3)
140 | # v = p6 - p7
141 | v = boxes[:, 6, :] - boxes[:, 7, :] # (m, 3)
142 | # w = p6 - p2
143 | w = boxes[:, 6, :] - boxes[:, 2, :] # (m, 3)
144 |
145 | # ux, vx, wx
146 | ux = np.matmul(points, u.T) # (n, m)
147 | vx = np.matmul(points, v.T)
148 | wx = np.matmul(points, w.T)
149 |
150 | # up6, up5, vp6, vp7, wp6, wp2
151 | up6 = np.sum(u * boxes[:, 6, :], axis=1)
152 | up5 = np.sum(u * boxes[:, 5, :], axis=1)
153 | vp6 = np.sum(v * boxes[:, 6, :], axis=1)
154 | vp7 = np.sum(v * boxes[:, 7, :], axis=1)
155 | wp6 = np.sum(w * boxes[:, 6, :], axis=1)
156 | wp2 = np.sum(w * boxes[:, 2, :], axis=1)
157 |
158 | mask_u = np.logical_and(ux <= up6, ux >= up5) # (1024, n)
159 | mask_v = np.logical_and(vx <= vp6, vx >= vp7)
160 | mask_w = np.logical_and(wx <= wp6, wx >= wp2)
161 |
162 | mask = mask_u & mask_v & mask_w # (10240, n)
163 |
164 | return mask
165 |
166 |
167 | def poly_area(x,y):
168 | """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
169 | return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
170 |
171 |
172 | def polygon_clip(subjectPolygon, clipPolygon):
173 | """ Clip a polygon with another polygon.
174 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
175 | Args:
176 | subjectPolygon: a list of (x,y) 2d points, any polygon.
177 | clipPolygon: a list of (x,y) 2d points, has to be *convex*
178 | Note:
179 | **points have to be counter-clockwise ordered**
180 | Return:
181 | a list of (x,y) vertex point for the intersection polygon.
182 | """
183 |
184 | def inside(p):
185 | return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0])
186 |
187 | def computeIntersection():
188 | dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]]
189 | dp = [s[0] - e[0], s[1] - e[1]]
190 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
191 | n2 = s[0] * e[1] - s[1] * e[0]
192 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
193 | return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3]
194 |
195 | outputList = subjectPolygon
196 | cp1 = clipPolygon[-1]
197 |
198 | for clipVertex in clipPolygon:
199 | cp2 = clipVertex
200 | inputList = outputList
201 | outputList = []
202 | s = inputList[-1]
203 |
204 | for subjectVertex in inputList:
205 | e = subjectVertex
206 | if inside(e):
207 | if not inside(s):
208 | outputList.append(computeIntersection())
209 | outputList.append(e)
210 | elif inside(s):
211 | outputList.append(computeIntersection())
212 | s = e
213 | cp1 = cp2
214 | if len(outputList) == 0:
215 | return None
216 | return (outputList)
217 |
218 |
219 | def convex_hull_intersection(p1, p2):
220 | """ Compute area of two convex hull's intersection area.
221 | p1,p2 are a list of (x,y) tuples of hull vertices.
222 | return a list of (x,y) for the intersection and its volume
223 | """
224 | inter_p = polygon_clip(p1,p2)
225 | if inter_p is not None:
226 | hull_inter = scipy.spatial.ConvexHull(inter_p)
227 | return inter_p, hull_inter.volume
228 | else:
229 | return None, 0.0
230 |
231 |
232 | def box3d_vol(corners):
233 | ''' corners: (8,3) no assumption on axis direction '''
234 | a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2))
235 | b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2))
236 | c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2))
237 | return a*b*c
238 |
239 |
240 | def box3d_iou(corners1, corners2):
241 | ''' Compute 3D bounding box IoU.
242 |
243 | Input:
244 | corners1: numpy array (8,3), assume up direction is negative Y
245 | corners2: numpy array (8,3), assume up direction is negative Y
246 | Output:
247 | iou: 3D bounding box IoU
248 | iou_2d: bird's eye view 2D bounding box IoU
249 |
250 | '''
251 | # corner points are in counter clockwise order
252 | rect1 = [(corners1[i,0], corners1[i,1]) for i in range(3,-1,-1)]
253 | rect2 = [(corners2[i,0], corners2[i,1]) for i in range(3,-1,-1)]
254 | area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
255 | area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
256 | inter, inter_area = convex_hull_intersection(rect1, rect2)
257 | iou_2d = inter_area/(area1+area2-inter_area)
258 | ymax = min(corners1[:,2].max(), corners2[:,2].max())
259 | ymin = max(corners1[:,2].min(), corners2[:,2].min())
260 | inter_vol = inter_area * max(0.0, ymax-ymin)
261 | vol1 = box3d_vol(corners1)
262 | vol2 = box3d_vol(corners2)
263 | iou = inter_vol / (vol1 + vol2 - inter_vol)
264 | return iou
--------------------------------------------------------------------------------
/data/arkitscenes/utils/pc_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def down_sample(point_cloud, voxel_sz):
5 | """Quantize point cloud by voxel_size
6 | Returns kept indices
7 |
8 | Args:
9 | all_points: np.array (n, 3) float
10 | voxel_sz: float
11 | Returns:
12 | indices: (m, ) int
13 | """
14 | coordinates = np.round(point_cloud / voxel_sz).astype(np.int32)
15 | _, indices = np.unique(coordinates, axis=0, return_index=True)
16 | return indices
--------------------------------------------------------------------------------
/data/arkitscenes/utils/rotation.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import math
3 | import numpy as np
4 |
5 |
6 | def eulerAnglesToRotationMatrix(theta):
7 | """Euler rotation matrix with clockwise logic.
8 | Rotation
9 |
10 | Args:
11 | theta: list of float
12 | [theta_x, theta_y, theta_z]
13 | Returns:
14 | R: np.array (3, 3)
15 | rotation matrix of Rz*Ry*Rx
16 | """
17 | R_x = np.array(
18 | [
19 | [1, 0, 0],
20 | [0, math.cos(theta[0]), -math.sin(theta[0])],
21 | [0, math.sin(theta[0]), math.cos(theta[0])],
22 | ]
23 | )
24 |
25 | R_y = np.array(
26 | [
27 | [math.cos(theta[1]), 0, math.sin(theta[1])],
28 | [0, 1, 0],
29 | [-math.sin(theta[1]), 0, math.cos(theta[1])],
30 | ]
31 | )
32 |
33 | R_z = np.array(
34 | [
35 | [math.cos(theta[2]), -math.sin(theta[2]), 0],
36 | [math.sin(theta[2]), math.cos(theta[2]), 0],
37 | [0, 0, 1],
38 | ]
39 | )
40 |
41 | R = np.dot(R_z, np.dot(R_y, R_x))
42 | return R
43 |
44 |
45 | def upright_camera_relative_transform(pose):
46 | """Generate pose matrix with z-dim as height
47 |
48 | Args:
49 | pose: np.array (4, 4)
50 | Returns:
51 | urc: (4, 4)
52 | urc_inv: (4, 4)
53 | """
54 |
55 | # take viewing direction in camera local coordiantes (which is simply unit vector along +z)
56 | view_dir_camera = np.asarray([0, 0, 1])
57 | R = pose[0:3, 0:3]
58 | t = pose[0:3, 3]
59 |
60 | # convert to world coordinates
61 | view_dir_world = np.dot(R, view_dir_camera)
62 |
63 | # compute heading
64 | view_dir_xy = view_dir_world[0:2]
65 | heading = math.atan2(view_dir_xy[1], view_dir_xy[0])
66 |
67 | # compute rotation around Z to align heading with +Y
68 | zRot = -heading + math.pi / 2
69 |
70 | # translation first, back to camera point
71 | urc_t = np.identity(4)
72 | urc_t[0:2, 3] = -1 * t[0:2]
73 |
74 | # compute rotation matrix
75 | urc_r = np.identity(4)
76 | urc_r[0:3, 0:3] = eulerAnglesToRotationMatrix([0, 0, zRot])
77 |
78 | urc = np.dot(urc_r, urc_t)
79 | urc_inv = np.linalg.inv(urc)
80 |
81 | return urc, urc_inv
82 |
83 |
84 | def rotate_pc(pc, rotmat):
85 | """Rotation points w.r.t. rotmat
86 | Args:
87 | pc: np.array (n, 3)
88 | rotmat: np.array (4, 4)
89 | Returns:
90 | pc: (n, 3)
91 | """
92 | pc_4 = np.ones([pc.shape[0], 4])
93 | pc_4[:, 0:3] = pc
94 | pc_4 = np.dot(pc_4, np.transpose(rotmat))
95 |
96 | return pc_4[:, 0:3]
97 |
98 |
99 | def rotate_points_along_z(points, angle):
100 | """Rotation clockwise
101 | Args:
102 | points: np.array of np.array (B, N, 3 + C) or
103 | (N, 3 + C) for single batch
104 | angle: np.array of np.array (B, )
105 | or (, ) for single batch
106 | angle along z-axis, angle increases x ==> y
107 | Returns:
108 | points_rot: (B, N, 3 + C) or (N, 3 + C)
109 |
110 | """
111 | single_batch = len(points.shape) == 2
112 | if single_batch:
113 | points = np.expand_dims(points, axis=0)
114 | angle = np.expand_dims(angle, axis=0)
115 | cosa = np.expand_dims(np.cos(angle), axis=1)
116 | sina = np.expand_dims(np.sin(angle), axis=1)
117 | zeros = np.zeros_like(cosa) # angle.new_zeros(points.shape[0])
118 | ones = np.ones_like(sina) # angle.new_ones(points.shape[0])
119 |
120 | rot_matrix = (
121 | np.concatenate((cosa, -sina, zeros, sina, cosa, zeros, zeros, zeros, ones), axis=1)
122 | .reshape(-1, 3, 3)
123 | )
124 |
125 | # print(rot_matrix.view(3, 3))
126 | points_rot = np.matmul(points[:, :, :3], rot_matrix)
127 | points_rot = np.concatenate((points_rot, points[:, :, 3:]), axis=-1)
128 |
129 | if single_batch:
130 | points_rot = points_rot.squeeze(0)
131 |
132 | return points_rot
133 |
134 |
135 | def convert_angle_axis_to_matrix3(angle_axis):
136 | """Return a Matrix3 for the angle axis.
137 | Arguments:
138 | angle_axis {Point3} -- a rotation in angle axis form.
139 | """
140 | matrix, jacobian = cv2.Rodrigues(angle_axis)
141 | return matrix
--------------------------------------------------------------------------------
/data/arkitscenes/utils/taxonomy.py:
--------------------------------------------------------------------------------
1 | #TODO: no original categories
2 | # shortened version only
3 |
4 | import copy
5 | import numpy as np
6 |
7 |
8 | # After merging, our label-id to class (string);
9 | class_names = [
10 | "cabinet", "refrigerator", "shelf", "stove", "bed", # 0..5
11 | "sink", "washer", "toilet", "bathtub", "oven", # 5..10
12 | "dishwasher", "fireplace", "stool", "chair", "table", # 10..15
13 | "tv_monitor", "sofa", # 15..17
14 | ]
15 |
16 | # 3D Anchor-sizes of merged categories (dx, dy, dz)
17 | '''
18 | Anchor box sizes are computed based on box corner order below:
19 | 6 -------- 7
20 | /| /|
21 | 5 -------- 4 .
22 | | | | |
23 | . 2 -------- 3
24 | |/ |/
25 | 1 -------- 0
26 | '''
27 |
28 |
29 | class ARKitDatasetConfig(object):
30 | def __init__(self):
31 | """
32 | init will set values for:
33 | self.class_names
34 | self.cls2label (after mapping)
35 | self.label2cls (after mapping)
36 | self.num_class
37 |
38 | Args:
39 | """
40 | # final training/val categories
41 | self.class_names = class_names
42 | self.label2cls = {}
43 | self.cls2label = {}
44 | for i, cls_ in enumerate(class_names):
45 | self.label2cls[i] = cls_
46 | self.cls2label[cls_] = i
47 |
48 | self.num_class = len(self.class_names)
49 |
--------------------------------------------------------------------------------
/data/multiscan/README.md:
--------------------------------------------------------------------------------
1 | ## Prepare MultiScan Data for Indoor 3D Detection
2 |
3 | 1. Download and unzip data from the official [MultiScan](https://github.com/smartscenes/multiscan?tab=readme-ov-file).
4 |
5 | 2. Generate bins and pkls data by running:
6 |
7 | ```bash
8 | python prepare_bins_pkls.py --path_to_pths path_to_unzipped_folder --path_to_save_bins path_to_save_bins
9 | ```
10 |
11 | Overall you achieve the following file structure in `bins` directory:
12 | ```
13 | bins
14 | ├── bboxs
15 | │ ├── xxxxx_xx.npy
16 | ├── instance_mask
17 | │ ├── xxxxx_xx.bin
18 | ├── points
19 | │ ├── xxxxx_xx.bin
20 | ├── semantic_mask
21 | │ ├── xxxxx_xx.bin
22 | ├── super_points
23 | │ ├── xxxxx_xx.bin
24 | ├── multiscan_infos_train.pkl
25 | ├── multiscan_infos_val.pkl
26 | ├── multiscan_infos_test.pkl
27 | ```
28 |
--------------------------------------------------------------------------------
/data/s3dis/README.md:
--------------------------------------------------------------------------------
1 | ### Preparation of S3DIS Data for Indoor Detection
2 |
3 | Please follow original mmdetection3d [instruction](https://github.com/open-mmlab/mmdetection3d/tree/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/data/s3dis). However, to match the order of points in each scene with our superpoints it will be needed to run `remap_superpoints.py` script.
4 |
--------------------------------------------------------------------------------
/data/s3dis/remap_superpoints.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import numpy as np
3 | from sklearn.neighbors import KDTree
4 | import argparse
5 | from tqdm import tqdm
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser(description='Remap superpoints from source to destination point cloud data')
9 | parser.add_argument('--src', type=str, required=True, help='Path to source data')
10 | parser.add_argument('--dst', type=str, required=True, help='Path to destination data')
11 | return parser.parse_args()
12 |
13 | args = parse_args()
14 |
15 | src_folder = Path(args.src)
16 | dst_folder = Path(args.dst)
17 |
18 | for src_file in tqdm(src_folder.glob('points/*.bin'), desc="Processing files"):
19 | pcds_src = np.fromfile(src_file, dtype=np.float32).reshape(-1, 6)[:, :3]
20 | sp_src = np.fromfile(src_file.parent.parent / 'super_points' / src_file.name, dtype=np.int64)
21 |
22 | dst_file = dst_folder / 'points' / src_file.name
23 | if dst_file.exists():
24 | pcds_dst = np.fromfile(dst_file, dtype=np.float32).reshape(-1, 6)[:, :3]
25 |
26 | tree = KDTree(pcds_src)
27 | _, indices = tree.query(pcds_dst, k=1)
28 | sp_dst = sp_src[indices.flatten()]
29 |
30 | dst_sp_file = dst_file.parent.parent / 'super_points' / dst_file.name
31 | dst_sp_file.parent.mkdir(parents=True, exist_ok=True)
32 | sp_dst.astype(np.int64).tofile(dst_sp_file)
33 | else:
34 | print(f"Corresponding file not found in destination folder: {dst_file}")
--------------------------------------------------------------------------------
/data/scannet/README.md:
--------------------------------------------------------------------------------
1 | ### Prepare ScanNet Data for Indoor Detection or Segmentation Task
2 |
3 | We follow the procedure in [votenet](https://github.com/facebookresearch/votenet/).
4 |
5 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Link or move the 'scans' folder to this level of directory. If you are performing segmentation tasks and want to upload the results to its official [benchmark](http://kaldir.vc.in.tum.de/scannet_benchmark/), please also link or move the 'scans_test' folder to this directory.
6 |
7 | 2. In this directory, extract point clouds and annotations by running `python batch_load_scannet_data.py`. Add the `--scannet200` flag if you want to get markup for the ScanNet200 dataset.
8 |
9 | 3. Enter the project root directory, generate training data by running
10 |
11 | ```bash
12 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet
13 | ```
14 | or for ScanNet200:
15 |
16 | ```bash
17 | mkdir data/scannet200
18 | python tools/create_data.py scannet200 --root-path ./data/scannet --out-dir ./data/scannet200 --extra-tag scannet200
19 | ```
20 |
21 | The overall process for ScanNet could be achieved through the following script
22 |
23 | ```bash
24 | python batch_load_scannet_data.py
25 | cd ../..
26 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet
27 | ```
28 |
29 | Or for ScanNet200:
30 |
31 | ```bash
32 | python batch_load_scannet_data.py --scannet200
33 | cd ../..
34 | mkdir data/scannet200
35 | python tools/create_data.py scannet200 --root-path ./data/scannet --out-dir ./data/scannet200 --extra-tag scannet200
36 | ```
37 |
38 | The directory structure after pre-processing should be as below
39 |
40 | ```
41 | scannet
42 | ├── meta_data
43 | ├── batch_load_scannet_data.py
44 | ├── load_scannet_data.py
45 | ├── scannet_utils.py
46 | ├── README.md
47 | ├── scans
48 | ├── scans_test
49 | ├── scannet_instance_data
50 | ├── points
51 | │ ├── xxxxx.bin
52 | ├── instance_mask
53 | │ ├── xxxxx.bin
54 | ├── semantic_mask
55 | │ ├── xxxxx.bin
56 | ├── super_points
57 | │ ├── xxxxx.bin
58 | ├── seg_info
59 | │ ├── train_label_weight.npy
60 | │ ├── train_resampled_scene_idxs.npy
61 | │ ├── val_label_weight.npy
62 | │ ├── val_resampled_scene_idxs.npy
63 | ├── scannet_infos_train.pkl
64 | ├── scannet_infos_val.pkl
65 | ├── scannet_infos_test.pkl
66 |
67 | ```
68 |
--------------------------------------------------------------------------------
/data/scannet/batch_load_scannet_data.py:
--------------------------------------------------------------------------------
1 | # Modified from
2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/batch_load_scannet_data.py
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 | """Batch mode in loading Scannet scenes with vertices and ground truth labels
8 | for semantic and instance segmentations.
9 |
10 | Usage example: python ./batch_load_scannet_data.py
11 | """
12 | import argparse
13 | import datetime
14 | import os
15 | from os import path as osp
16 |
17 | import torch
18 | import segmentator
19 | import open3d as o3d
20 | import numpy as np
21 | from load_scannet_data import export
22 |
23 | DONOTCARE_CLASS_IDS = np.array([])
24 |
25 | SCANNET_OBJ_CLASS_IDS = np.array(
26 | [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
27 |
28 | SCANNET200_OBJ_CLASS_IDS = np.array([2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
29 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 145, 148, 154,
30 | 155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230, 232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331, 342, 356, 370, 392, 395, 399, 408, 417,
31 | 488, 540, 562, 570, 572, 581, 609, 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191])
32 |
33 |
34 |
35 | def export_one_scan(scan_name,
36 | output_filename_prefix,
37 | max_num_point,
38 | label_map_file,
39 | scannet_dir,
40 | test_mode=False,
41 | scannet200=False):
42 | mesh_file = osp.join(scannet_dir, scan_name, scan_name + '_vh_clean_2.ply')
43 | agg_file = osp.join(scannet_dir, scan_name,
44 | scan_name + '.aggregation.json')
45 | seg_file = osp.join(scannet_dir, scan_name,
46 | scan_name + '_vh_clean_2.0.010000.segs.json')
47 | # includes axisAlignment info for the train set scans.
48 | meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt')
49 | mesh_vertices, semantic_labels, instance_labels, unaligned_bboxes, \
50 | aligned_bboxes, instance2semantic, axis_align_matrix = export(
51 | mesh_file, agg_file, seg_file, meta_file, label_map_file, None,
52 | test_mode, scannet200)
53 |
54 | if not test_mode:
55 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS))
56 | mesh_vertices = mesh_vertices[mask, :]
57 | semantic_labels = semantic_labels[mask]
58 | instance_labels = instance_labels[mask]
59 |
60 | num_instances = len(np.unique(instance_labels))
61 | print(f'Num of instances: {num_instances}')
62 | if scannet200:
63 | OBJ_CLASS_IDS = SCANNET200_OBJ_CLASS_IDS
64 | else:
65 | OBJ_CLASS_IDS = SCANNET_OBJ_CLASS_IDS
66 |
67 | bbox_mask = np.in1d(unaligned_bboxes[:, -1], OBJ_CLASS_IDS)
68 | unaligned_bboxes = unaligned_bboxes[bbox_mask, :]
69 | bbox_mask = np.in1d(aligned_bboxes[:, -1], OBJ_CLASS_IDS)
70 | aligned_bboxes = aligned_bboxes[bbox_mask, :]
71 | assert unaligned_bboxes.shape[0] == aligned_bboxes.shape[0]
72 | print(f'Num of care instances: {unaligned_bboxes.shape[0]}')
73 |
74 | if max_num_point is not None:
75 | max_num_point = int(max_num_point)
76 | N = mesh_vertices.shape[0]
77 | if N > max_num_point:
78 | choices = np.random.choice(N, max_num_point, replace=False)
79 | mesh_vertices = mesh_vertices[choices, :]
80 | if not test_mode:
81 | semantic_labels = semantic_labels[choices]
82 | instance_labels = instance_labels[choices]
83 |
84 | mesh = o3d.io.read_triangle_mesh(mesh_file)
85 | vertices = torch.from_numpy(np.array(mesh.vertices).astype(np.float32))
86 | faces = torch.from_numpy(np.array(mesh.triangles).astype(np.int64))
87 | superpoints = segmentator.segment_mesh(vertices, faces).numpy()
88 |
89 | np.save(f'{output_filename_prefix}_sp_label.npy', superpoints)
90 | np.save(f'{output_filename_prefix}_vert.npy', mesh_vertices)
91 |
92 | if not test_mode:
93 | assert superpoints.shape == semantic_labels.shape
94 | np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels)
95 | np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels)
96 | np.save(f'{output_filename_prefix}_unaligned_bbox.npy',
97 | unaligned_bboxes)
98 | np.save(f'{output_filename_prefix}_aligned_bbox.npy', aligned_bboxes)
99 | np.save(f'{output_filename_prefix}_axis_align_matrix.npy',
100 | axis_align_matrix)
101 |
102 |
103 | def batch_export(max_num_point,
104 | output_folder,
105 | scan_names_file,
106 | label_map_file,
107 | scannet_dir,
108 | test_mode=False,
109 | scannet200=False):
110 | if test_mode and not os.path.exists(scannet_dir):
111 | # test data preparation is optional
112 | return
113 | if not os.path.exists(output_folder):
114 | print(f'Creating new data folder: {output_folder}')
115 | os.mkdir(output_folder)
116 |
117 | scan_names = [line.rstrip() for line in open(scan_names_file)]
118 | for scan_name in scan_names:
119 | print('-' * 20 + 'begin')
120 | print(datetime.datetime.now())
121 | print(scan_name)
122 | output_filename_prefix = osp.join(output_folder, scan_name)
123 | if osp.isfile(f'{output_filename_prefix}_vert.npy'):
124 | print('File already exists. skipping.')
125 | print('-' * 20 + 'done')
126 | continue
127 | try:
128 | export_one_scan(scan_name, output_filename_prefix, max_num_point,
129 | label_map_file, scannet_dir, test_mode, scannet200)
130 | except Exception:
131 | print(f'Failed export scan: {scan_name}')
132 | print('-' * 20 + 'done')
133 |
134 |
135 | def main():
136 | parser = argparse.ArgumentParser()
137 | parser.add_argument(
138 | '--max_num_point',
139 | default=None,
140 | help='The maximum number of the points.')
141 | parser.add_argument(
142 | '--output_folder',
143 | default='./scannet_instance_data',
144 | help='output folder of the result.')
145 | parser.add_argument(
146 | '--train_scannet_dir', default='scans', help='scannet data directory.')
147 | parser.add_argument(
148 | '--test_scannet_dir',
149 | default='scans_test',
150 | help='scannet data directory.')
151 | parser.add_argument(
152 | '--label_map_file',
153 | default='meta_data/scannetv2-labels.combined.tsv',
154 | help='The path of label map file.')
155 | parser.add_argument(
156 | '--train_scan_names_file',
157 | default='meta_data/scannet_train.txt',
158 | help='The path of the file that stores the scan names.')
159 | parser.add_argument(
160 | '--test_scan_names_file',
161 | default='meta_data/scannetv2_test.txt',
162 | help='The path of the file that stores the scan names.')
163 | parser.add_argument(
164 | '--scannet200',
165 | action='store_true',
166 | help='Use it for scannet200 mapping')
167 | args = parser.parse_args()
168 | batch_export(
169 | args.max_num_point,
170 | args.output_folder,
171 | args.train_scan_names_file,
172 | args.label_map_file,
173 | args.train_scannet_dir,
174 | test_mode=False,
175 | scannet200=args.scannet200)
176 | batch_export(
177 | args.max_num_point,
178 | args.output_folder,
179 | args.test_scan_names_file,
180 | args.label_map_file,
181 | args.test_scannet_dir,
182 | test_mode=True,
183 | scannet200=args.scannet200)
184 |
185 |
186 | if __name__ == '__main__':
187 | main()
188 |
--------------------------------------------------------------------------------
/data/scannet/load_scannet_data.py:
--------------------------------------------------------------------------------
1 | # Modified from
2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/load_scannet_data.py
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 | """Load Scannet scenes with vertices and ground truth labels for semantic and
8 | instance segmentations."""
9 | import argparse
10 | import inspect
11 | import json
12 | import os
13 |
14 | import numpy as np
15 | import scannet_utils
16 |
17 | currentdir = os.path.dirname(
18 | os.path.abspath(inspect.getfile(inspect.currentframe())))
19 |
20 |
21 | def read_aggregation(filename):
22 | assert os.path.isfile(filename)
23 | object_id_to_segs = {}
24 | label_to_segs = {}
25 | with open(filename) as f:
26 | data = json.load(f)
27 | num_objects = len(data['segGroups'])
28 | for i in range(num_objects):
29 | object_id = data['segGroups'][i][
30 | 'objectId'] + 1 # instance ids should be 1-indexed
31 | label = data['segGroups'][i]['label']
32 | segs = data['segGroups'][i]['segments']
33 | object_id_to_segs[object_id] = segs
34 | if label in label_to_segs:
35 | label_to_segs[label].extend(segs)
36 | else:
37 | label_to_segs[label] = segs
38 | return object_id_to_segs, label_to_segs
39 |
40 |
41 | def read_segmentation(filename):
42 | assert os.path.isfile(filename)
43 | seg_to_verts = {}
44 | with open(filename) as f:
45 | data = json.load(f)
46 | num_verts = len(data['segIndices'])
47 | for i in range(num_verts):
48 | seg_id = data['segIndices'][i]
49 | if seg_id in seg_to_verts:
50 | seg_to_verts[seg_id].append(i)
51 | else:
52 | seg_to_verts[seg_id] = [i]
53 | return seg_to_verts, num_verts
54 |
55 |
56 | def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id,
57 | instance_ids):
58 | num_instances = len(np.unique(list(object_id_to_segs.keys())))
59 | instance_bboxes = np.zeros((num_instances, 7))
60 | for obj_id in object_id_to_segs:
61 | label_id = object_id_to_label_id[obj_id]
62 | obj_pc = mesh_vertices[instance_ids == obj_id, 0:3]
63 | if len(obj_pc) == 0:
64 | continue
65 | xyz_min = np.min(obj_pc, axis=0)
66 | xyz_max = np.max(obj_pc, axis=0)
67 | bbox = np.concatenate([(xyz_min + xyz_max) / 2.0, xyz_max - xyz_min,
68 | np.array([label_id])])
69 | # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
70 | instance_bboxes[obj_id - 1, :] = bbox
71 | return instance_bboxes
72 |
73 |
74 | def export(mesh_file,
75 | agg_file,
76 | seg_file,
77 | meta_file,
78 | label_map_file,
79 | output_file=None,
80 | test_mode=False,
81 | scannet200=False):
82 | """Export original files to vert, ins_label, sem_label and bbox file.
83 |
84 | Args:
85 | mesh_file (str): Path of the mesh_file.
86 | agg_file (str): Path of the agg_file.
87 | seg_file (str): Path of the seg_file.
88 | meta_file (str): Path of the meta_file.
89 | label_map_file (str): Path of the label_map_file.
90 | output_file (str): Path of the output folder.
91 | Default: None.
92 | test_mode (bool): Whether is generating test data without labels.
93 | Default: False.
94 |
95 | It returns a tuple, which contains the the following things:
96 | np.ndarray: Vertices of points data.
97 | np.ndarray: Indexes of label.
98 | np.ndarray: Indexes of instance.
99 | np.ndarray: Instance bboxes.
100 | dict: Map from object_id to label_id.
101 | """
102 | if scannet200:
103 | label_map = scannet_utils.read_label_mapping(
104 | label_map_file, label_from='raw_category', label_to='id')
105 | else:
106 | label_map = scannet_utils.read_label_mapping(
107 | label_map_file, label_from='raw_category', label_to='nyu40id')
108 |
109 | mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file)
110 |
111 | # Load scene axis alignment matrix
112 | lines = open(meta_file).readlines()
113 | # test set data doesn't have align_matrix
114 | axis_align_matrix = np.eye(4)
115 | for line in lines:
116 | if 'axisAlignment' in line:
117 | axis_align_matrix = [
118 | float(x)
119 | for x in line.rstrip().strip('axisAlignment = ').split(' ')
120 | ]
121 | break
122 | axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4))
123 |
124 | # perform global alignment of mesh vertices
125 | pts = np.ones((mesh_vertices.shape[0], 4))
126 | pts[:, 0:3] = mesh_vertices[:, 0:3]
127 | pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4
128 | aligned_mesh_vertices = np.concatenate([pts[:, 0:3], mesh_vertices[:, 3:]],
129 | axis=1)
130 |
131 | # Load semantic and instance labels
132 | if not test_mode:
133 | object_id_to_segs, label_to_segs = read_aggregation(agg_file)
134 | seg_to_verts, num_verts = read_segmentation(seg_file)
135 | label_ids = np.zeros(shape=(num_verts), dtype=np.uint32)
136 | object_id_to_label_id = {}
137 | for label, segs in label_to_segs.items():
138 | label_id = label_map[label]
139 | for seg in segs:
140 | verts = seg_to_verts[seg]
141 | label_ids[verts] = label_id
142 | instance_ids = np.zeros(
143 | shape=(num_verts), dtype=np.uint32) # 0: unannotated
144 | for object_id, segs in object_id_to_segs.items():
145 | for seg in segs:
146 | verts = seg_to_verts[seg]
147 | instance_ids[verts] = object_id
148 | if object_id not in object_id_to_label_id:
149 | object_id_to_label_id[object_id] = label_ids[verts][0]
150 | unaligned_bboxes = extract_bbox(mesh_vertices, object_id_to_segs,
151 | object_id_to_label_id, instance_ids)
152 | aligned_bboxes = extract_bbox(aligned_mesh_vertices, object_id_to_segs,
153 | object_id_to_label_id, instance_ids)
154 | else:
155 | label_ids = None
156 | instance_ids = None
157 | unaligned_bboxes = None
158 | aligned_bboxes = None
159 | object_id_to_label_id = None
160 |
161 | if output_file is not None:
162 | np.save(output_file + '_vert.npy', mesh_vertices)
163 | if not test_mode:
164 | np.save(output_file + '_sem_label.npy', label_ids)
165 | np.save(output_file + '_ins_label.npy', instance_ids)
166 | np.save(output_file + '_unaligned_bbox.npy', unaligned_bboxes)
167 | np.save(output_file + '_aligned_bbox.npy', aligned_bboxes)
168 | np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix)
169 |
170 | return mesh_vertices, label_ids, instance_ids, unaligned_bboxes, \
171 | aligned_bboxes, object_id_to_label_id, axis_align_matrix
172 |
173 |
174 | def main():
175 | parser = argparse.ArgumentParser()
176 | parser.add_argument(
177 | '--scan_path',
178 | required=True,
179 | help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00')
180 | parser.add_argument('--output_file', required=True, help='output file')
181 | parser.add_argument(
182 | '--label_map_file',
183 | required=True,
184 | help='path to scannetv2-labels.combined.tsv')
185 | parser.add_argument(
186 | '--scannet200',
187 | action='store_true',
188 | help='Use it for scannet200 mapping')
189 |
190 | opt = parser.parse_args()
191 |
192 | scan_name = os.path.split(opt.scan_path)[-1]
193 | mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply')
194 | agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json')
195 | seg_file = os.path.join(opt.scan_path,
196 | scan_name + '_vh_clean_2.0.010000.segs.json')
197 | meta_file = os.path.join(
198 | opt.scan_path, scan_name +
199 | '.txt') # includes axisAlignment info for the train set scans.
200 | export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file,
201 | opt.output_file, scannet200=opt.scannet200)
202 |
203 |
204 | if __name__ == '__main__':
205 | main()
206 |
--------------------------------------------------------------------------------
/data/scannet/meta_data/scannet_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/scannet/meta_data/scannet_means.npz
--------------------------------------------------------------------------------
/data/scannet/meta_data/scannetv2_test.txt:
--------------------------------------------------------------------------------
1 | scene0707_00
2 | scene0708_00
3 | scene0709_00
4 | scene0710_00
5 | scene0711_00
6 | scene0712_00
7 | scene0713_00
8 | scene0714_00
9 | scene0715_00
10 | scene0716_00
11 | scene0717_00
12 | scene0718_00
13 | scene0719_00
14 | scene0720_00
15 | scene0721_00
16 | scene0722_00
17 | scene0723_00
18 | scene0724_00
19 | scene0725_00
20 | scene0726_00
21 | scene0727_00
22 | scene0728_00
23 | scene0729_00
24 | scene0730_00
25 | scene0731_00
26 | scene0732_00
27 | scene0733_00
28 | scene0734_00
29 | scene0735_00
30 | scene0736_00
31 | scene0737_00
32 | scene0738_00
33 | scene0739_00
34 | scene0740_00
35 | scene0741_00
36 | scene0742_00
37 | scene0743_00
38 | scene0744_00
39 | scene0745_00
40 | scene0746_00
41 | scene0747_00
42 | scene0748_00
43 | scene0749_00
44 | scene0750_00
45 | scene0751_00
46 | scene0752_00
47 | scene0753_00
48 | scene0754_00
49 | scene0755_00
50 | scene0756_00
51 | scene0757_00
52 | scene0758_00
53 | scene0759_00
54 | scene0760_00
55 | scene0761_00
56 | scene0762_00
57 | scene0763_00
58 | scene0764_00
59 | scene0765_00
60 | scene0766_00
61 | scene0767_00
62 | scene0768_00
63 | scene0769_00
64 | scene0770_00
65 | scene0771_00
66 | scene0772_00
67 | scene0773_00
68 | scene0774_00
69 | scene0775_00
70 | scene0776_00
71 | scene0777_00
72 | scene0778_00
73 | scene0779_00
74 | scene0780_00
75 | scene0781_00
76 | scene0782_00
77 | scene0783_00
78 | scene0784_00
79 | scene0785_00
80 | scene0786_00
81 | scene0787_00
82 | scene0788_00
83 | scene0789_00
84 | scene0790_00
85 | scene0791_00
86 | scene0792_00
87 | scene0793_00
88 | scene0794_00
89 | scene0795_00
90 | scene0796_00
91 | scene0797_00
92 | scene0798_00
93 | scene0799_00
94 | scene0800_00
95 | scene0801_00
96 | scene0802_00
97 | scene0803_00
98 | scene0804_00
99 | scene0805_00
100 | scene0806_00
101 |
--------------------------------------------------------------------------------
/data/scannet/meta_data/scannetv2_val.txt:
--------------------------------------------------------------------------------
1 | scene0568_00
2 | scene0568_01
3 | scene0568_02
4 | scene0304_00
5 | scene0488_00
6 | scene0488_01
7 | scene0412_00
8 | scene0412_01
9 | scene0217_00
10 | scene0019_00
11 | scene0019_01
12 | scene0414_00
13 | scene0575_00
14 | scene0575_01
15 | scene0575_02
16 | scene0426_00
17 | scene0426_01
18 | scene0426_02
19 | scene0426_03
20 | scene0549_00
21 | scene0549_01
22 | scene0578_00
23 | scene0578_01
24 | scene0578_02
25 | scene0665_00
26 | scene0665_01
27 | scene0050_00
28 | scene0050_01
29 | scene0050_02
30 | scene0257_00
31 | scene0025_00
32 | scene0025_01
33 | scene0025_02
34 | scene0583_00
35 | scene0583_01
36 | scene0583_02
37 | scene0701_00
38 | scene0701_01
39 | scene0701_02
40 | scene0580_00
41 | scene0580_01
42 | scene0565_00
43 | scene0169_00
44 | scene0169_01
45 | scene0655_00
46 | scene0655_01
47 | scene0655_02
48 | scene0063_00
49 | scene0221_00
50 | scene0221_01
51 | scene0591_00
52 | scene0591_01
53 | scene0591_02
54 | scene0678_00
55 | scene0678_01
56 | scene0678_02
57 | scene0462_00
58 | scene0427_00
59 | scene0595_00
60 | scene0193_00
61 | scene0193_01
62 | scene0164_00
63 | scene0164_01
64 | scene0164_02
65 | scene0164_03
66 | scene0598_00
67 | scene0598_01
68 | scene0598_02
69 | scene0599_00
70 | scene0599_01
71 | scene0599_02
72 | scene0328_00
73 | scene0300_00
74 | scene0300_01
75 | scene0354_00
76 | scene0458_00
77 | scene0458_01
78 | scene0423_00
79 | scene0423_01
80 | scene0423_02
81 | scene0307_00
82 | scene0307_01
83 | scene0307_02
84 | scene0606_00
85 | scene0606_01
86 | scene0606_02
87 | scene0432_00
88 | scene0432_01
89 | scene0608_00
90 | scene0608_01
91 | scene0608_02
92 | scene0651_00
93 | scene0651_01
94 | scene0651_02
95 | scene0430_00
96 | scene0430_01
97 | scene0689_00
98 | scene0357_00
99 | scene0357_01
100 | scene0574_00
101 | scene0574_01
102 | scene0574_02
103 | scene0329_00
104 | scene0329_01
105 | scene0329_02
106 | scene0153_00
107 | scene0153_01
108 | scene0616_00
109 | scene0616_01
110 | scene0671_00
111 | scene0671_01
112 | scene0618_00
113 | scene0382_00
114 | scene0382_01
115 | scene0490_00
116 | scene0621_00
117 | scene0607_00
118 | scene0607_01
119 | scene0149_00
120 | scene0695_00
121 | scene0695_01
122 | scene0695_02
123 | scene0695_03
124 | scene0389_00
125 | scene0377_00
126 | scene0377_01
127 | scene0377_02
128 | scene0342_00
129 | scene0139_00
130 | scene0629_00
131 | scene0629_01
132 | scene0629_02
133 | scene0496_00
134 | scene0633_00
135 | scene0633_01
136 | scene0518_00
137 | scene0652_00
138 | scene0406_00
139 | scene0406_01
140 | scene0406_02
141 | scene0144_00
142 | scene0144_01
143 | scene0494_00
144 | scene0278_00
145 | scene0278_01
146 | scene0316_00
147 | scene0609_00
148 | scene0609_01
149 | scene0609_02
150 | scene0609_03
151 | scene0084_00
152 | scene0084_01
153 | scene0084_02
154 | scene0696_00
155 | scene0696_01
156 | scene0696_02
157 | scene0351_00
158 | scene0351_01
159 | scene0643_00
160 | scene0644_00
161 | scene0645_00
162 | scene0645_01
163 | scene0645_02
164 | scene0081_00
165 | scene0081_01
166 | scene0081_02
167 | scene0647_00
168 | scene0647_01
169 | scene0535_00
170 | scene0353_00
171 | scene0353_01
172 | scene0353_02
173 | scene0559_00
174 | scene0559_01
175 | scene0559_02
176 | scene0593_00
177 | scene0593_01
178 | scene0246_00
179 | scene0653_00
180 | scene0653_01
181 | scene0064_00
182 | scene0064_01
183 | scene0356_00
184 | scene0356_01
185 | scene0356_02
186 | scene0030_00
187 | scene0030_01
188 | scene0030_02
189 | scene0222_00
190 | scene0222_01
191 | scene0338_00
192 | scene0338_01
193 | scene0338_02
194 | scene0378_00
195 | scene0378_01
196 | scene0378_02
197 | scene0660_00
198 | scene0553_00
199 | scene0553_01
200 | scene0553_02
201 | scene0527_00
202 | scene0663_00
203 | scene0663_01
204 | scene0663_02
205 | scene0664_00
206 | scene0664_01
207 | scene0664_02
208 | scene0334_00
209 | scene0334_01
210 | scene0334_02
211 | scene0046_00
212 | scene0046_01
213 | scene0046_02
214 | scene0203_00
215 | scene0203_01
216 | scene0203_02
217 | scene0088_00
218 | scene0088_01
219 | scene0088_02
220 | scene0088_03
221 | scene0086_00
222 | scene0086_01
223 | scene0086_02
224 | scene0670_00
225 | scene0670_01
226 | scene0256_00
227 | scene0256_01
228 | scene0256_02
229 | scene0249_00
230 | scene0441_00
231 | scene0658_00
232 | scene0704_00
233 | scene0704_01
234 | scene0187_00
235 | scene0187_01
236 | scene0131_00
237 | scene0131_01
238 | scene0131_02
239 | scene0207_00
240 | scene0207_01
241 | scene0207_02
242 | scene0461_00
243 | scene0011_00
244 | scene0011_01
245 | scene0343_00
246 | scene0251_00
247 | scene0077_00
248 | scene0077_01
249 | scene0684_00
250 | scene0684_01
251 | scene0550_00
252 | scene0686_00
253 | scene0686_01
254 | scene0686_02
255 | scene0208_00
256 | scene0500_00
257 | scene0500_01
258 | scene0552_00
259 | scene0552_01
260 | scene0648_00
261 | scene0648_01
262 | scene0435_00
263 | scene0435_01
264 | scene0435_02
265 | scene0435_03
266 | scene0690_00
267 | scene0690_01
268 | scene0693_00
269 | scene0693_01
270 | scene0693_02
271 | scene0700_00
272 | scene0700_01
273 | scene0700_02
274 | scene0699_00
275 | scene0231_00
276 | scene0231_01
277 | scene0231_02
278 | scene0697_00
279 | scene0697_01
280 | scene0697_02
281 | scene0697_03
282 | scene0474_00
283 | scene0474_01
284 | scene0474_02
285 | scene0474_03
286 | scene0474_04
287 | scene0474_05
288 | scene0355_00
289 | scene0355_01
290 | scene0146_00
291 | scene0146_01
292 | scene0146_02
293 | scene0196_00
294 | scene0702_00
295 | scene0702_01
296 | scene0702_02
297 | scene0314_00
298 | scene0277_00
299 | scene0277_01
300 | scene0277_02
301 | scene0095_00
302 | scene0095_01
303 | scene0015_00
304 | scene0100_00
305 | scene0100_01
306 | scene0100_02
307 | scene0558_00
308 | scene0558_01
309 | scene0558_02
310 | scene0685_00
311 | scene0685_01
312 | scene0685_02
313 |
--------------------------------------------------------------------------------
/data/scannet/scannet_utils.py:
--------------------------------------------------------------------------------
1 | # Modified from
2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/scannet_utils.py
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 | """Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts
8 | """
9 |
10 | import csv
11 | import os
12 |
13 | import numpy as np
14 | from plyfile import PlyData
15 |
16 |
17 | def represents_int(s):
18 | """Judge whether string s represents an int.
19 |
20 | Args:
21 | s(str): The input string to be judged.
22 |
23 | Returns:
24 | bool: Whether s represents int or not.
25 | """
26 | try:
27 | int(s)
28 | return True
29 | except ValueError:
30 | return False
31 |
32 |
33 | def read_label_mapping(filename,
34 | label_from='raw_category',
35 | label_to='nyu40id'):
36 | assert os.path.isfile(filename)
37 | mapping = dict()
38 | with open(filename) as csvfile:
39 | reader = csv.DictReader(csvfile, delimiter='\t')
40 | for row in reader:
41 | mapping[row[label_from]] = int(row[label_to])
42 | if represents_int(list(mapping.keys())[0]):
43 | mapping = {int(k): v for k, v in mapping.items()}
44 | return mapping
45 |
46 |
47 | def read_mesh_vertices(filename):
48 | """Read XYZ for each vertex.
49 |
50 | Args:
51 | filename(str): The name of the mesh vertices file.
52 |
53 | Returns:
54 | ndarray: Vertices.
55 | """
56 | assert os.path.isfile(filename)
57 | with open(filename, 'rb') as f:
58 | plydata = PlyData.read(f)
59 | num_verts = plydata['vertex'].count
60 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
61 | vertices[:, 0] = plydata['vertex'].data['x']
62 | vertices[:, 1] = plydata['vertex'].data['y']
63 | vertices[:, 2] = plydata['vertex'].data['z']
64 | return vertices
65 |
66 |
67 | def read_mesh_vertices_rgb(filename):
68 | """Read XYZ and RGB for each vertex.
69 |
70 | Args:
71 | filename(str): The name of the mesh vertices file.
72 |
73 | Returns:
74 | Vertices. Note that RGB values are in 0-255.
75 | """
76 | assert os.path.isfile(filename)
77 | with open(filename, 'rb') as f:
78 | plydata = PlyData.read(f)
79 | num_verts = plydata['vertex'].count
80 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32)
81 | vertices[:, 0] = plydata['vertex'].data['x']
82 | vertices[:, 1] = plydata['vertex'].data['y']
83 | vertices[:, 2] = plydata['vertex'].data['z']
84 | vertices[:, 3] = plydata['vertex'].data['red']
85 | vertices[:, 4] = plydata['vertex'].data['green']
86 | vertices[:, 5] = plydata['vertex'].data['blue']
87 | return vertices
88 |
--------------------------------------------------------------------------------
/data/scannetpp/README.md:
--------------------------------------------------------------------------------
1 | ## Prepare ScanNet++ Data for Indoor 3D Detection
2 |
3 | 1. Download data from the official [ScanNet++](https://github.com/scannetpp/scannetpp).
4 |
5 | 2. Preprocess raw data by running:
6 |
7 | ```bash
8 | python preprocess_raw_data.py --path_to_data path_to_dataset --output_dir path_to_save_preprocessed_raw_data
9 | ```
10 |
11 | 3. Generate bins and pkls data by running:
12 |
13 | ```bash
14 | python prepare_bins_pkls.py --path_to_data path_to_preprocessed_raw_data --path_to_save_bins path_to_save_bins
15 | ```
16 |
17 | Overall you achieve the following file structure in `bins` directory:
18 | ```
19 | bins
20 | ├── bboxs
21 | │ ├── xxxxx_xx.npy
22 | ├── instance_mask
23 | │ ├── xxxxx_xx.bin
24 | ├── points
25 | │ ├── xxxxx_xx.bin
26 | ├── semantic_mask
27 | │ ├── xxxxx_xx.bin
28 | ├── superpoints
29 | │ ├── xxxxx_xx.bin
30 | ├── scannetpp_infos_train.pkl
31 | ├── scannetpp_infos_val.pkl
32 | ├── scannetpp_infos_test.pkl
33 | ```
34 |
--------------------------------------------------------------------------------
/data/scannetpp/prepare_bins_pkls.py:
--------------------------------------------------------------------------------
1 | import mmengine
2 | import os
3 | from tqdm.auto import tqdm
4 | import numpy as np
5 | import argparse
6 |
7 | OBJ2SEM = {'wall': 0,
8 | 'ceiling': 1,
9 | 'floor': 2,
10 | 'table': 3,
11 | 'door': 4,
12 | 'ceiling lamp': 5,
13 | 'cabinet': 6,
14 | 'blinds': 7,
15 | 'curtain': 8,
16 | 'chair': 9,
17 | 'storage cabinet': 10,
18 | 'office chair': 11,
19 | 'bookshelf': 12,
20 | 'whiteboard': 13,
21 | 'window': 14,
22 | 'box': 15,
23 | 'window frame': 16,
24 | 'monitor': 17,
25 | 'shelf': 18,
26 | 'doorframe': 19,
27 | 'pipe': 20,
28 | 'heater': 21,
29 | 'kitchen cabinet': 22,
30 | 'sofa': 23,
31 | 'windowsill': 24,
32 | 'bed': 25,
33 | 'shower wall': 26,
34 | 'trash can': 27,
35 | 'book': 28,
36 | 'plant': 29,
37 | 'blanket': 30,
38 | 'tv': 31,
39 | 'computer tower': 32,
40 | 'kitchen counter': 33,
41 | 'refrigerator': 34,
42 | 'jacket': 35,
43 | 'electrical duct': 36,
44 | 'sink': 37,
45 | 'bag': 38,
46 | 'picture': 39,
47 | 'pillow': 40,
48 | 'towel': 41,
49 | 'suitcase': 42,
50 | 'backpack': 43,
51 | 'crate': 44,
52 | 'keyboard': 45,
53 | 'rack': 46,
54 | 'toilet': 47,
55 | 'paper': 48,
56 | 'printer': 49,
57 | 'poster': 50,
58 | 'painting': 51,
59 | 'microwave': 52,
60 | 'board': 53,
61 | 'shoes': 54,
62 | 'socket': 55,
63 | 'bottle': 56,
64 | 'bucket': 57,
65 | 'cushion': 58,
66 | 'basket': 59,
67 | 'shoe rack': 60,
68 | 'telephone': 61,
69 | 'file folder': 62,
70 | 'cloth': 63,
71 | 'blind rail': 64,
72 | 'laptop': 65,
73 | 'plant pot': 66,
74 | 'exhaust fan': 67,
75 | 'cup': 68,
76 | 'coat hanger': 69,
77 | 'light switch': 70,
78 | 'speaker': 71,
79 | 'table lamp': 72,
80 | 'air vent': 73,
81 | 'clothes hanger': 74,
82 | 'kettle': 75,
83 | 'smoke detector': 76,
84 | 'container': 77,
85 | 'power strip': 78,
86 | 'slippers': 79,
87 | 'paper bag': 80,
88 | 'mouse': 81,
89 | 'cutting board': 82,
90 | 'toilet paper': 83,
91 | 'paper towel': 84,
92 | 'pot': 85,
93 | 'clock': 86,
94 | 'pan': 87,
95 | 'tap': 88,
96 | 'jar': 89,
97 | 'soap dispenser': 90,
98 | 'binder': 91,
99 | 'bowl': 92,
100 | 'tissue box': 93,
101 | 'whiteboard eraser': 94,
102 | 'toilet brush': 95,
103 | 'spray bottle': 96,
104 | 'headphones': 97,
105 | 'stapler': 98,
106 | 'marker': 99}
107 |
108 | def create_dir(path):
109 | if not os.path.exists(path):
110 | os.mkdir(path)
111 |
112 | def load_txt(path):
113 | res = []
114 |
115 | with open(path) as f:
116 | for line in tqdm(f):
117 | res.append(line.strip())
118 |
119 | return res
120 |
121 | def create_dirs(path):
122 | points = os.path.join(path, 'points')
123 | create_dir(points)
124 |
125 | semantic_mask = os.path.join(path, 'semantic_mask')
126 | create_dir(semantic_mask)
127 |
128 | instance_mask = os.path.join(path, 'instance_mask')
129 | create_dir(instance_mask)
130 |
131 | bboxs = os.path.join(path, 'bboxs')
132 | create_dir(bboxs)
133 |
134 | superpoints = os.path.join(path, 'superpoints')
135 | create_dir(superpoints)
136 | return {
137 | 'points': points,
138 | 'semantic_mask': semantic_mask,
139 | 'instance_mask': instance_mask,
140 | 'bboxs': bboxs,
141 | 'superpoints': superpoints
142 | }
143 |
144 | def create_metainfo():
145 |
146 | return {
147 | 'categories': OBJ2SEM,
148 | 'dataset': 'scannetpp',
149 | 'info_version': '1.0'
150 | }
151 |
152 | def create_data_list(split, splits, bins_path):
153 |
154 | scenes = splits[split]
155 | final_list = []
156 | for scene in tqdm(scenes):
157 | lidar_points = {
158 | 'num_pts_feats': 6,
159 | 'lidar_path': f'{scene}.bin'
160 | }
161 | raw_bboxs = np.load(os.path.join(bins_path['bboxs'], f'{scene}.npy'))
162 | instances = []
163 | for rb in raw_bboxs:
164 | if len(rb) == 0:
165 | instances = []
166 | else:
167 | instances.append({
168 | 'bbox_3d': rb[:6].tolist(),
169 | 'bbox_label_3d': int(rb[-1])
170 | })
171 | final_list.append({
172 | 'lidar_points': lidar_points,
173 | 'instances': instances,
174 | 'pts_semantic_mask_path': f'{scene}.bin',
175 | 'pts_instance_mask_path': f'{scene}.bin',
176 | 'axis_align_matrix': np.eye(4)
177 | })
178 |
179 | return final_list
180 |
181 | def create_pkl_file(path_to_save, split, splits,
182 | bins_path, pkl_prefix = 'scannetpp'):
183 | metainfo = create_metainfo()
184 | data_list = create_data_list(split, splits, bins_path)
185 | anno = {
186 | 'metainfo': metainfo,
187 | 'data_list': data_list
188 | }
189 | filename = os.path.join(path_to_save, f'{pkl_prefix}_infos_{split}.pkl')
190 | mmengine.dump(anno, filename, 'pkl')
191 |
192 | if __name__ == '__main__':
193 | parser = argparse.ArgumentParser()
194 | parser.add_argument(
195 | '--path_to_data',
196 | required=True,
197 | help='Path to preprocessed raw data',
198 | type=str,
199 | )
200 |
201 | parser.add_argument(
202 | '--path_to_save_bins',
203 | required=True,
204 | help='Enter here the path where to save bins and pkls',
205 | type=str,
206 | )
207 |
208 | args = parser.parse_args()
209 | print(args)
210 |
211 | path_to_raw_data = args.path_to_data
212 | path_to_save_data = args.path_to_save_bins
213 | create_dir(path_to_save_data)
214 | bins_path = create_dirs(path_to_save_data)
215 |
216 | path_to_train_ids = os.path.join(path_to_raw_data, 'nvs_sem_train.txt')
217 | train_scenes = load_txt(path_to_train_ids)
218 | path_to_val_ids = os.path.join(path_to_raw_data, 'nvs_sem_val.txt')
219 | val_scenes = load_txt(path_to_val_ids)
220 | path_to_sem_test_ids = os.path.join(path_to_raw_data, 'sem_test.txt')
221 | test_scenes = load_txt(path_to_sem_test_ids)
222 |
223 | splits = {
224 | 'train': train_scenes,
225 | 'val': val_scenes,
226 | 'test': test_scenes
227 | }
228 |
229 | path_to_raw_data = os.path.join(path_to_raw_data, 'data')
230 | scene_ids = os.listdir(path_to_raw_data)
231 |
232 | for si in tqdm(scene_ids):
233 | temp_path = os.path.join(path_to_raw_data, si)
234 | point_cloud = np.load(temp_path + f'/{si}_point_cloud.npy')
235 | sem_label = np.load(temp_path + f'/{si}_semantic.npy')
236 | ins_label = np.load(temp_path + f'/{si}_instance.npy')
237 | bboxs = np.load(temp_path + f'/{si}_bboxs.npy')
238 | superpoints = np.load(temp_path + f'/{si}_superpoints.npy')
239 |
240 | point_cloud.astype(np.float32).tofile(
241 | os.path.join(bins_path['points'], f'{si}.bin'))
242 | sem_label.astype(np.int64).tofile(
243 | os.path.join(bins_path['semantic_mask'], f'{si}.bin'))
244 | ins_label.astype(np.int64).tofile(
245 | os.path.join(bins_path['instance_mask'], f'{si}.bin'))
246 | superpoints.astype(np.int64).tofile(
247 | os.path.join(bins_path['superpoints'], f'{si}.bin'))
248 | np.save(os.path.join(bins_path['bboxs'], f'{si}.npy'), bboxs)
249 |
250 | create_pkl_file(path_to_save_data, 'train', splits, bins_path)
251 | create_pkl_file(path_to_save_data, 'val', splits, bins_path)
252 | create_pkl_file(path_to_save_data, 'test', splits, bins_path)
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
--------------------------------------------------------------------------------
/data/scannetpp/preprocess_raw_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import json
4 | import numpy as np
5 | from plyfile import PlyData
6 | import pandas as pd
7 | from tqdm.auto import tqdm
8 | from concurrent.futures import ProcessPoolExecutor
9 | import shutil
10 | import segmentator
11 | import torch
12 | import trimesh
13 |
14 | POINT_CLOUD_PFX = "mesh_aligned_0.05.ply"
15 | SEGMENTS_ANNO_PFX = "segments_anno.json"
16 |
17 | def _handle_id(scene_id):
18 | print(f'Processing: {scene_id}')
19 | if not os.path.isdir(os.path.join(PATH_TO_IDS, scene_id, 'scans')):
20 | return
21 |
22 | point_cloud, _ = read_plymesh(os.path.join(PATH_TO_IDS, scene_id,
23 | 'scans', POINT_CLOUD_PFX))
24 |
25 | mesh = trimesh.load_mesh(os.path.join(PATH_TO_IDS, scene_id,
26 | 'scans', POINT_CLOUD_PFX))
27 | vertices = mesh.vertices
28 | faces = mesh.faces
29 |
30 | vertices = torch.from_numpy(vertices.astype(np.float32))
31 | faces = torch.from_numpy(faces.astype(np.int64))
32 | super_points = segmentator.segment_mesh(vertices, faces).numpy()
33 |
34 | mapping_superpoints = {tuple(i.tolist()):
35 | super_points[idx] for idx, i in enumerate(vertices)}
36 | super_points = np.array([mapping_superpoints[tuple(i.tolist())]
37 | for i in point_cloud[:, :3]])
38 |
39 | assert point_cloud.shape[1] == 6
40 | assert point_cloud.shape[0] == super_points.shape[0]
41 |
42 | semantic = np.zeros((point_cloud.shape[0], 1)) - 1 # -1: unannotated
43 | instance = np.zeros((point_cloud.shape[0], 1)) - 1 # -1: unannotated
44 | if scene_id in TRAIN_IDS or scene_id in VAL_IDS:
45 | seg_anno = load_json(os.path.join(PATH_TO_IDS, scene_id,
46 | 'scans', SEGMENTS_ANNO_PFX))
47 | seg_groups = seg_anno['segGroups']
48 | obj_idx = 0
49 | bboxs = []
50 | for idx, group in enumerate(seg_groups):
51 | label = group['label']
52 | segments = np.array(group['segments'])
53 |
54 | if label in TOP100SEM2ID:
55 | new_label = label
56 |
57 | elif label in SEMANTIC_MAP_TO and label not in TOP100SEM2ID:
58 | if SEMANTIC_MAP_TO[label] in TOP100SEM2ID:
59 | new_label = SEMANTIC_MAP_TO[label]
60 | else:
61 | continue
62 | else:
63 | continue
64 |
65 | label_id = TOP100SEM2ID[new_label]
66 |
67 | point_segments = point_cloud[segments]
68 | instance[segments] = obj_idx
69 | semantic[segments] = label_id
70 | xmin = np.min(point_segments[:,0])
71 | ymin = np.min(point_segments[:,1])
72 | zmin = np.min(point_segments[:,2])
73 | xmax = np.max(point_segments[:,0])
74 | ymax = np.max(point_segments[:,1])
75 | zmax = np.max(point_segments[:,2])
76 |
77 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2,
78 | xmax-xmin, ymax-ymin, zmax-zmin, label_id])
79 |
80 | bboxs.append(bbox)
81 | obj_idx += 1
82 |
83 | bboxs = np.stack(bboxs)
84 | data = {
85 | 'point_cloud': point_cloud,
86 | 'semantic': semantic[:, 0].astype(int),
87 | 'instance': instance[:, 0].astype(int),
88 | 'bboxs': bboxs,
89 | 'super_points': super_points
90 | }
91 |
92 | elif scene_id in SEM_TEST_IDS:
93 |
94 | data = {
95 | 'point_cloud': point_cloud,
96 | 'semantic': semantic[:, 0].astype(int),
97 | 'instance': instance[:, 0].astype(int),
98 | 'bboxs': np.zeros((0,7)),
99 | 'super_points': super_points
100 | }
101 |
102 | output_path = os.path.join(OUTPUT_DIR_DATA, f'{scene_id}')
103 | create_dir(os.path.join(output_path))
104 | output_prefix = os.path.join(output_path, f'{scene_id}')
105 | np.save(output_prefix+'_point_cloud.npy', data['point_cloud'])
106 | np.save(output_prefix+'_semantic.npy', data['semantic'])
107 | np.save(output_prefix+'_instance.npy', data['instance'])
108 | np.save(output_prefix+'_bboxs.npy', data['bboxs'])
109 | np.save(output_prefix+'_superpoints.npy', data['super_points'])
110 |
111 | def create_dir(path):
112 | if not os.path.exists(path):
113 | os.mkdir(path)
114 |
115 | def load_json(path):
116 | with open(path) as jd:
117 | return json.load(jd)
118 |
119 | def load_txt(path):
120 | res = []
121 |
122 | with open(path) as f:
123 | for line in tqdm(f):
124 | res.append(line.strip())
125 |
126 | return res
127 |
128 | def read_plymesh(filepath):
129 | """Read ply file and return it as numpy array. Returns None if emtpy."""
130 | with open(filepath, 'rb') as f:
131 | plydata = PlyData.read(f)
132 | if plydata.elements:
133 | vertices = pd.DataFrame(plydata['vertex'].data).values
134 | faces = np.array([f[0] for f in plydata["face"].data])
135 | return vertices, faces
136 |
137 |
138 | if __name__ == '__main__':
139 | parser = argparse.ArgumentParser()
140 | parser.add_argument(
141 | '--path_to_data',
142 | required=True,
143 | help='Path to raw data',
144 | type=str,
145 | )
146 |
147 | parser.add_argument(
148 | '--output_dir',
149 | required=True,
150 | help='Path to save preprocessed raw data',
151 | type=str,
152 | )
153 |
154 | parser.add_argument('--num_workers', default=20, type=int,
155 | help='The number of parallel workers')
156 |
157 | args = parser.parse_args()
158 | print(args)
159 | PATH_TO_DATA = args.path_to_data
160 | PATH_TO_IDS = os.path.join(PATH_TO_DATA, 'data')
161 | OUTPUT_DIR = args.output_dir
162 | create_dir(OUTPUT_DIR)
163 |
164 | OUTPUT_DIR_DATA = os.path.join(OUTPUT_DIR, 'data')
165 | create_dir(OUTPUT_DIR_DATA)
166 |
167 | TOP100SEM2ID = {}
168 | with open(os.path.join(PATH_TO_DATA ,
169 | 'metadata/semantic_benchmark/top100.txt')) as f:
170 | # check = f.read()
171 | for idx, line in enumerate(f):
172 | line = line.strip()
173 | TOP100SEM2ID[line] = idx
174 |
175 | TOPINST2ID = {}
176 | with open(os.path.join(PATH_TO_DATA,
177 | 'metadata/semantic_benchmark/top100_instance.txt')) as f:
178 | for idx, line in enumerate(f):
179 | line = line.strip()
180 | TOPINST2ID[line] = TOP100SEM2ID[line]
181 |
182 | MAPPING_BENCH = pd.read_csv(os.path.join(PATH_TO_DATA,
183 | 'metadata/semantic_benchmark/map_benchmark.csv'))
184 | SEMANTIC_MAP_TO = MAPPING_BENCH[~MAPPING_BENCH['semantic_map_to'].isna()]
185 | INSTANCE_MAP_TO = MAPPING_BENCH[~MAPPING_BENCH['instance_map_to'].isna()]
186 |
187 | SEMANTIC_MAP_TO = SEMANTIC_MAP_TO[['class','semantic_map_to']].values
188 | SEMANTIC_MAP_TO = dict(zip(SEMANTIC_MAP_TO[:, 0], SEMANTIC_MAP_TO[:, 1]))
189 | print(len(SEMANTIC_MAP_TO))
190 |
191 | INSTANCE_MAP_TO = INSTANCE_MAP_TO[['class','instance_map_to']].values
192 | INSTANCE_MAP_TO = dict(zip(INSTANCE_MAP_TO[:, 0], INSTANCE_MAP_TO[:, 1]))
193 | print(len(INSTANCE_MAP_TO))
194 |
195 | SCENE_IDS = os.listdir(os.path.join(PATH_TO_DATA, 'data'))
196 | SCENE_IDS.remove('.ipynb_checkpoints')
197 |
198 | assert len(SCENE_IDS) == 380
199 |
200 | path_to_train_ids = os.path.join(PATH_TO_DATA, 'splits', 'nvs_sem_train.txt')
201 | TRAIN_IDS = load_txt(path_to_train_ids)
202 | path_to_val_ids = os.path.join(PATH_TO_DATA, 'splits', 'nvs_sem_val.txt')
203 | VAL_IDS = load_txt(path_to_val_ids)
204 | path_to_sem_test_ids = os.path.join(PATH_TO_DATA, 'splits', 'sem_test.txt')
205 | SEM_TEST_IDS = load_txt(path_to_sem_test_ids)
206 |
207 | shutil.copytree(os.path.join(PATH_TO_DATA, 'splits'),
208 | OUTPUT_DIR, dirs_exist_ok=True)
209 |
210 | pool = ProcessPoolExecutor(max_workers=args.num_workers)
211 | print('Processing scenes...')
212 | _ = list(pool.map(_handle_id, SCENE_IDS))
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
--------------------------------------------------------------------------------
/tools/create_data.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | from os import path as osp
4 |
5 | from indoor_converter import create_indoor_info_file
6 | from update_infos_to_v2 import update_pkl_infos
7 |
8 |
9 | def scannet_data_prep(root_path, info_prefix, out_dir, workers):
10 | """Prepare the info file for scannet dataset.
11 |
12 | Args:
13 | root_path (str): Path of dataset root.
14 | info_prefix (str): The prefix of info filenames.
15 | out_dir (str): Output directory of the generated info file.
16 | workers (int): Number of threads to be used.
17 | """
18 | create_indoor_info_file(
19 | root_path, info_prefix, out_dir, workers=workers)
20 | info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl')
21 | info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl')
22 | info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl')
23 | update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_train_path)
24 | update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_val_path)
25 | update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_test_path)
26 |
27 |
28 | parser = argparse.ArgumentParser(description='Data converter arg parser')
29 | parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
30 | parser.add_argument(
31 | '--root-path',
32 | type=str,
33 | default='./data/kitti',
34 | help='specify the root path of dataset')
35 | parser.add_argument(
36 | '--out-dir',
37 | type=str,
38 | default='./data/kitti',
39 | required=False,
40 | help='name of info pkl')
41 | parser.add_argument('--extra-tag', type=str, default='kitti')
42 | parser.add_argument(
43 | '--workers', type=int, default=4, help='number of threads to be used')
44 | args = parser.parse_args()
45 |
46 | if __name__ == '__main__':
47 | from mmdet3d.utils import register_all_modules
48 | register_all_modules()
49 |
50 | if args.dataset in ('scannet', 'scannet200'):
51 | scannet_data_prep(
52 | root_path=args.root_path,
53 | info_prefix=args.extra_tag,
54 | out_dir=args.out_dir,
55 | workers=args.workers)
56 | else:
57 | raise NotImplementedError(f'Don\'t support {args.dataset} dataset.')
58 |
--------------------------------------------------------------------------------
/tools/indoor_converter.py:
--------------------------------------------------------------------------------
1 | # Modified from mmdetection3d/tools/dataset_converters/indoor_converter.py
2 | # We just support ScanNet 200.
3 | import os
4 |
5 | import mmengine
6 |
7 | from scannet_data_utils import ScanNetData
8 |
9 |
10 | def create_indoor_info_file(data_path,
11 | pkl_prefix='sunrgbd',
12 | save_path=None,
13 | use_v1=False,
14 | workers=4):
15 | """Create indoor information file.
16 |
17 | Get information of the raw data and save it to the pkl file.
18 |
19 | Args:
20 | data_path (str): Path of the data.
21 | pkl_prefix (str, optional): Prefix of the pkl to be saved.
22 | Default: 'sunrgbd'.
23 | save_path (str, optional): Path of the pkl to be saved. Default: None.
24 | use_v1 (bool, optional): Whether to use v1. Default: False.
25 | workers (int, optional): Number of threads to be used. Default: 4.
26 | """
27 | assert os.path.exists(data_path)
28 | assert pkl_prefix in ['scannet', 'scannet200'], \
29 | f'unsupported indoor dataset {pkl_prefix}'
30 | save_path = data_path if save_path is None else save_path
31 | assert os.path.exists(save_path)
32 |
33 | # generate infos for both detection and segmentation task
34 | train_filename = os.path.join(
35 | save_path, f'{pkl_prefix}_infos_train.pkl')
36 | val_filename = os.path.join(
37 | save_path, f'{pkl_prefix}_infos_val.pkl')
38 | test_filename = os.path.join(
39 | save_path, f'{pkl_prefix}_infos_test.pkl')
40 | if pkl_prefix == 'scannet':
41 | # ScanNet has a train-val-test split
42 | train_dataset = ScanNetData(root_path=data_path, split='train')
43 | val_dataset = ScanNetData(root_path=data_path, split='val')
44 | test_dataset = ScanNetData(root_path=data_path, split='test')
45 | else: # ScanNet200
46 | # ScanNet has a train-val-test split
47 | train_dataset = ScanNetData(root_path=data_path, split='train',
48 | scannet200=True, save_path=save_path)
49 | val_dataset = ScanNetData(root_path=data_path, split='val',
50 | scannet200=True, save_path=save_path)
51 | test_dataset = ScanNetData(root_path=data_path, split='test',
52 | scannet200=True, save_path=save_path)
53 |
54 | infos_train = train_dataset.get_infos(
55 | num_workers=workers, has_label=True)
56 | mmengine.dump(infos_train, train_filename, 'pkl')
57 | print(f'{pkl_prefix} info train file is saved to {train_filename}')
58 |
59 | infos_val = val_dataset.get_infos(
60 | num_workers=workers, has_label=True)
61 | mmengine.dump(infos_val, val_filename, 'pkl')
62 | print(f'{pkl_prefix} info val file is saved to {val_filename}')
63 |
64 | infos_test = test_dataset.get_infos(
65 | num_workers=workers, has_label=False)
66 | mmengine.dump(infos_test, test_filename, 'pkl')
67 | print(f'{pkl_prefix} info test file is saved to {test_filename}')
68 |
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | # This is an exact copy of tools/test.py from open-mmlab/mmdetection3d.
2 | import argparse
3 | import os
4 | import os.path as osp
5 |
6 | from mmengine.config import Config, ConfigDict, DictAction
7 | from mmengine.registry import RUNNERS
8 | from mmengine.runner import Runner
9 |
10 | from mmdet3d.utils import replace_ceph_backend
11 |
12 |
13 | # TODO: support fuse_conv_bn and format_only
14 | def parse_args():
15 | parser = argparse.ArgumentParser(
16 | description='MMDet3D test (and eval) a model')
17 | parser.add_argument('config', help='test config file path')
18 | parser.add_argument('checkpoint', help='checkpoint file')
19 | parser.add_argument(
20 | '--work-dir',
21 | help='the directory to save the file containing evaluation metrics')
22 | parser.add_argument(
23 | '--ceph', action='store_true', help='Use ceph as data storage backend')
24 | parser.add_argument(
25 | '--show', action='store_true', help='show prediction results')
26 | parser.add_argument(
27 | '--show-dir',
28 | help='directory where painted images will be saved. '
29 | 'If specified, it will be automatically saved '
30 | 'to the work_dir/timestamp/show_dir')
31 | parser.add_argument(
32 | '--score-thr', type=float, default=0.1, help='bbox score threshold')
33 | parser.add_argument(
34 | '--task',
35 | type=str,
36 | choices=[
37 | 'mono_det', 'multi-view_det', 'lidar_det', 'lidar_seg',
38 | 'multi-modality_det'
39 | ],
40 | help='Determine the visualization method depending on the task.')
41 | parser.add_argument(
42 | '--wait-time', type=float, default=2, help='the interval of show (s)')
43 | parser.add_argument(
44 | '--cfg-options',
45 | nargs='+',
46 | action=DictAction,
47 | help='override some settings in the used config, the key-value pair '
48 | 'in xxx=yyy format will be merged into config file. If the value to '
49 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
50 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
51 | 'Note that the quotation marks are necessary and that no white space '
52 | 'is allowed.')
53 | parser.add_argument(
54 | '--launcher',
55 | choices=['none', 'pytorch', 'slurm', 'mpi'],
56 | default='none',
57 | help='job launcher')
58 | parser.add_argument(
59 | '--tta', action='store_true', help='Test time augmentation')
60 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
61 | # will pass the `--local-rank` parameter to `tools/test.py` instead
62 | # of `--local_rank`.
63 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
64 | args = parser.parse_args()
65 | if 'LOCAL_RANK' not in os.environ:
66 | os.environ['LOCAL_RANK'] = str(args.local_rank)
67 | return args
68 |
69 |
70 | def trigger_visualization_hook(cfg, args):
71 | default_hooks = cfg.default_hooks
72 | if 'visualization' in default_hooks:
73 | visualization_hook = default_hooks['visualization']
74 | # Turn on visualization
75 | visualization_hook['draw'] = True
76 | if args.show:
77 | visualization_hook['show'] = True
78 | visualization_hook['wait_time'] = args.wait_time
79 | if args.show_dir:
80 | visualization_hook['test_out_dir'] = args.show_dir
81 | all_task_choices = [
82 | 'mono_det', 'multi-view_det', 'lidar_det', 'lidar_seg',
83 | 'multi-modality_det'
84 | ]
85 | assert args.task in all_task_choices, 'You must set '\
86 | f"'--task' in {all_task_choices} in the command " \
87 | 'if you want to use visualization hook'
88 | visualization_hook['vis_task'] = args.task
89 | visualization_hook['score_thr'] = args.score_thr
90 | else:
91 | raise RuntimeError(
92 | 'VisualizationHook must be included in default_hooks.'
93 | 'refer to usage '
94 | '"visualization=dict(type=\'VisualizationHook\')"')
95 |
96 | return cfg
97 |
98 |
99 | def main():
100 | args = parse_args()
101 |
102 | # load config
103 | cfg = Config.fromfile(args.config)
104 |
105 | # TODO: We will unify the ceph support approach with other OpenMMLab repos
106 | if args.ceph:
107 | cfg = replace_ceph_backend(cfg)
108 |
109 | cfg.launcher = args.launcher
110 | if args.cfg_options is not None:
111 | cfg.merge_from_dict(args.cfg_options)
112 |
113 | # work_dir is determined in this priority: CLI > segment in file > filename
114 | if args.work_dir is not None:
115 | # update configs according to CLI args if args.work_dir is not None
116 | cfg.work_dir = args.work_dir
117 | elif cfg.get('work_dir', None) is None:
118 | # use config filename as default work_dir if cfg.work_dir is None
119 | cfg.work_dir = osp.join('./work_dirs',
120 | osp.splitext(osp.basename(args.config))[0])
121 |
122 | cfg.load_from = args.checkpoint
123 |
124 | if args.show or args.show_dir:
125 | # cfg = trigger_visualization_hook(cfg, args)
126 | cfg.test_evaluator['vis_dir'] = args.show_dir
127 |
128 | if args.tta:
129 | # Currently, we only support tta for 3D segmentation
130 | # TODO: Support tta for 3D detection
131 | assert 'tta_model' in cfg, 'Cannot find ``tta_model`` in config.'
132 | assert 'tta_pipeline' in cfg, 'Cannot find ``tta_pipeline`` in config.'
133 | cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline
134 | cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
135 |
136 | # build the runner from config
137 | if 'runner_type' not in cfg:
138 | # build the default runner
139 | runner = Runner.from_cfg(cfg)
140 | else:
141 | # build customized runner from the registry
142 | # if 'runner_type' is set in the cfg
143 | runner = RUNNERS.build(cfg)
144 |
145 | # start testing
146 | runner.test()
147 |
148 |
149 | if __name__ == '__main__':
150 | main()
151 |
--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | # This is an exact copy of tools/train.py from open-mmlab/mmdetection3d.
2 | import argparse
3 | import logging
4 | import os
5 | import os.path as osp
6 |
7 | from mmengine.config import Config, DictAction
8 | from mmengine.logging import print_log
9 | from mmengine.registry import RUNNERS
10 | from mmengine.runner import Runner
11 |
12 | from mmdet3d.utils import replace_ceph_backend
13 |
14 |
15 | def parse_args():
16 | parser = argparse.ArgumentParser(description='Train a 3D detector')
17 | parser.add_argument('config', help='train config file path')
18 | parser.add_argument('--work-dir', help='the dir to save logs and models')
19 | parser.add_argument(
20 | '--amp',
21 | action='store_true',
22 | default=False,
23 | help='enable automatic-mixed-precision training')
24 | parser.add_argument(
25 | '--auto-scale-lr',
26 | action='store_true',
27 | help='enable automatically scaling LR.')
28 | parser.add_argument(
29 | '--resume',
30 | nargs='?',
31 | type=str,
32 | const='auto',
33 | help='If specify checkpoint path, resume from it, while if not '
34 | 'specify, try to auto resume from the latest checkpoint '
35 | 'in the work directory.')
36 | parser.add_argument(
37 | '--ceph', action='store_true', help='Use ceph as data storage backend')
38 | parser.add_argument(
39 | '--cfg-options',
40 | nargs='+',
41 | action=DictAction,
42 | help='override some settings in the used config, the key-value pair '
43 | 'in xxx=yyy format will be merged into config file. If the value to '
44 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
45 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
46 | 'Note that the quotation marks are necessary and that no white space '
47 | 'is allowed.')
48 | parser.add_argument(
49 | '--launcher',
50 | choices=['none', 'pytorch', 'slurm', 'mpi'],
51 | default='none',
52 | help='job launcher')
53 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
54 | # will pass the `--local-rank` parameter to `tools/train.py` instead
55 | # of `--local_rank`.
56 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
57 | args = parser.parse_args()
58 | if 'LOCAL_RANK' not in os.environ:
59 | os.environ['LOCAL_RANK'] = str(args.local_rank)
60 | return args
61 |
62 |
63 | def main():
64 | args = parse_args()
65 |
66 | # load config
67 | cfg = Config.fromfile(args.config)
68 |
69 | # TODO: We will unify the ceph support approach with other OpenMMLab repos
70 | if args.ceph:
71 | cfg = replace_ceph_backend(cfg)
72 |
73 | cfg.launcher = args.launcher
74 | if args.cfg_options is not None:
75 | cfg.merge_from_dict(args.cfg_options)
76 |
77 | # work_dir is determined in this priority: CLI > segment in file > filename
78 | if args.work_dir is not None:
79 | # update configs according to CLI args if args.work_dir is not None
80 | cfg.work_dir = args.work_dir
81 | elif cfg.get('work_dir', None) is None:
82 | # use config filename as default work_dir if cfg.work_dir is None
83 | cfg.work_dir = osp.join('./work_dirs',
84 | osp.splitext(osp.basename(args.config))[0])
85 |
86 | # enable automatic-mixed-precision training
87 | if args.amp is True:
88 | optim_wrapper = cfg.optim_wrapper.type
89 | if optim_wrapper == 'AmpOptimWrapper':
90 | print_log(
91 | 'AMP training is already enabled in your config.',
92 | logger='current',
93 | level=logging.WARNING)
94 | else:
95 | assert optim_wrapper == 'OptimWrapper', (
96 | '`--amp` is only supported when the optimizer wrapper type is '
97 | f'`OptimWrapper` but got {optim_wrapper}.')
98 | cfg.optim_wrapper.type = 'AmpOptimWrapper'
99 | cfg.optim_wrapper.loss_scale = 'dynamic'
100 |
101 | # enable automatically scaling LR
102 | if args.auto_scale_lr:
103 | if 'auto_scale_lr' in cfg and \
104 | 'enable' in cfg.auto_scale_lr and \
105 | 'base_batch_size' in cfg.auto_scale_lr:
106 | cfg.auto_scale_lr.enable = True
107 | else:
108 | raise RuntimeError('Can not find "auto_scale_lr" or '
109 | '"auto_scale_lr.enable" or '
110 | '"auto_scale_lr.base_batch_size" in your'
111 | ' configuration file.')
112 |
113 | # resume is determined in this priority: resume from > auto_resume
114 | if args.resume == 'auto':
115 | cfg.resume = True
116 | cfg.load_from = None
117 | elif args.resume is not None:
118 | cfg.resume = True
119 | cfg.load_from = args.resume
120 |
121 | # build the runner from config
122 | if 'runner_type' not in cfg:
123 | # build the default runner
124 | runner = Runner.from_cfg(cfg)
125 | else:
126 | # build customized runner from the registry
127 | # if 'runner_type' is set in the cfg
128 | runner = RUNNERS.build(cfg)
129 |
130 | # start training
131 | runner.train()
132 |
133 |
134 | if __name__ == '__main__':
135 | main()
136 |
--------------------------------------------------------------------------------
/unidet3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .unidet3d import UniDet3D
2 | from .spconv_unet import SpConvUNet
3 | from .encoder import UniDet3DEncoder
4 | from .criterion import UniDet3DCriterion
5 | from .loading import LoadAnnotations3D_, NormalizePointsColor_, DenormalizePointsColor
6 | from .formatting import Pack3DDetInputs_
7 | from .transforms_3d import PointDetClassMappingScanNet
8 | from .data_preprocessor import Det3DDataPreprocessor_
9 | from .scannet_dataset import ScanNetSegDataset_, ScanNetDetDataset
10 | from .s3dis_dataset import S3DISSegDetDataset
11 | from .arkitscenes_dataset import ARKitScenesOfflineDataset
12 | from .multiscan_dataset import MultiScan_
13 | from .rscan_dataset import ThreeRScan_
14 | from .scannetpp_dataset import Scannetpp_
15 | from .structures import InstanceData_
16 | from .axis_aligned_iou_loss import UniDet3DAxisAlignedIoULoss
17 | from .rotated_iou_loss import UniDet3DRotatedIoU3DLoss
18 | from .indoor_metric import IndoorMetric_
19 | from .concat_dataset import ConcatDataset_
--------------------------------------------------------------------------------
/unidet3d/arkitscenes_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os.path as osp
3 |
4 | from mmdet3d.registry import DATASETS
5 | from mmdet3d.datasets import Det3DDataset
6 | from mmdet3d.structures import DepthInstance3DBoxes
7 | from mmengine.logging import print_log
8 | import logging
9 | import numpy as np
10 |
11 | @DATASETS.register_module()
12 | class ARKitScenesOfflineDataset(Det3DDataset):
13 | r"""ARKitScenes dataset (offline benchmark).
14 |
15 | Args:
16 | partition(float): Defaults to 1, the part of
17 | the dataset that will be used.
18 | data_prefix (dict): Prefix for data. Defaults to
19 | dict(pts='offline_prepared_data').
20 | box_type_3d (str): Type of 3D box of this dataset.
21 | Based on the `box_type_3d`, the dataset will encapsulate the box
22 | to its original format then converted them to `box_type_3d`.
23 | Defaults to 'Depth'.
24 | """
25 | METAINFO = {
26 | 'classes': ('cabinet', 'refrigerator', 'shelf', 'stove', 'bed',
27 | 'sink', 'washer', 'toilet', 'bathtub', 'oven',
28 | 'dishwasher', 'fireplace', 'stool', 'chair', 'table',
29 | 'tv_monitor', 'sofa')
30 | }
31 |
32 | def __init__(self,
33 | partition: float = 1,
34 | data_prefix: dict = dict(pts='offline_prepared_data'),
35 | box_type_3d: str = 'Depth',
36 | **kwargs) -> None:
37 | self.partition = partition
38 | super().__init__(
39 | data_prefix=data_prefix,
40 | box_type_3d=box_type_3d,
41 | **kwargs)
42 |
43 | def parse_ann_info(self, info: dict) -> dict:
44 | """Process the `instances` in data info to `ann_info`.
45 |
46 | Args:
47 | info (dict): Info dict.
48 |
49 | Returns:
50 | dict: Processed `ann_info`
51 | """
52 | ann_info = super().parse_ann_info(info)
53 | # empty gt
54 | if ann_info is None:
55 | ann_info = dict()
56 | ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
57 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
58 |
59 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
60 | ann_info['gt_bboxes_3d'],
61 | origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
62 |
63 | return ann_info
64 |
65 | def parse_data_info(self, info: dict) -> dict:
66 | """Process the raw data info.
67 |
68 | Args:
69 | info (dict): Raw info dict.
70 |
71 | Returns:
72 | dict: Has `ann_info` in training stage. And
73 | all path has been converted to absolute path.
74 | """
75 | info['super_pts_path'] = osp.join(
76 | self.data_prefix.get('sp_pts_mask', ''),
77 | info['lidar_points']['lidar_path']) #info['super_pts_path']
78 |
79 | info = super().parse_data_info(info)
80 |
81 | return info
82 |
83 | def __getitem__(self, idx: int) -> dict:
84 | """Get the idx-th image and data information of dataset after
85 | ``self.pipeline``, and ``full_init`` will be called if the dataset has
86 | not been fully initialized.
87 |
88 | During training phase, if ``self.pipeline`` get ``None``,
89 | ``self._rand_another`` will be called until a valid image is fetched or
90 | the maximum limit of refetech is reached.
91 |
92 | Args:
93 | idx (int): The index of self.data_list.
94 |
95 | Returns:
96 | dict: The idx-th image and data information of dataset after
97 | ``self.pipeline``.
98 | """
99 | # Performing full initialization by calling `__getitem__` will consume
100 | # extra memory. If a dataset is not fully initialized by setting
101 | # `lazy_init=True` and then fed into the dataloader. Different workers
102 | # will simultaneously read and parse the annotation. It will cost more
103 | # time and memory, although this may work. Therefore, it is recommended
104 | # to manually call `full_init` before dataset fed into dataloader to
105 | # ensure all workers use shared RAM from master process.
106 |
107 | if not self.test_mode:
108 | if self.serialize_data:
109 | dataset_len = len(self.data_address)
110 | else:
111 | dataset_len = len(self.data_list)
112 | idx = np.random.randint(0, dataset_len)
113 | if not self._fully_initialized:
114 | print_log(
115 | 'Please call `full_init()` method manually to accelerate '
116 | 'the speed.',
117 | logger='current',
118 | level=logging.WARNING)
119 | self.full_init()
120 |
121 | if self.test_mode:
122 | data = self.prepare_data(idx)
123 | if data is None:
124 | raise Exception('Test time pipline should not get `None` '
125 | 'data_sample')
126 | return data
127 |
128 | for _ in range(self.max_refetch + 1):
129 | data = self.prepare_data(idx)
130 | # Broken images or random augmentations may cause the returned data
131 | # to be None
132 | if data is None:
133 | idx = self._rand_another()
134 | continue
135 | return data
136 |
137 | def __len__(self) -> int:
138 | """Get the length of filtered dataset and automatically call
139 | ``full_init`` if the dataset has not been fully init.
140 |
141 | Returns:
142 | int: The length of filtered dataset.
143 | """
144 |
145 | if self.serialize_data:
146 | dataset_len = len(self.data_address)
147 | else:
148 | dataset_len = len(self.data_list)
149 | if not self.test_mode:
150 | return int(self.partition * dataset_len)
151 | else:
152 | return dataset_len
--------------------------------------------------------------------------------
/unidet3d/axis_aligned_iou_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import Optional
3 |
4 | import torch
5 | from mmdet.models.losses.utils import weighted_loss
6 | from torch import Tensor
7 | from torch import nn as nn
8 |
9 | from mmdet3d.models import axis_aligned_iou_loss
10 | from mmdet3d.registry import MODELS
11 | from mmdet3d.structures import AxisAlignedBboxOverlaps3D
12 |
13 |
14 | @weighted_loss
15 | def axis_aligned_diou_loss(pred: Tensor, target: Tensor) -> Tensor:
16 | """Calculate the DIoU loss (1-DIoU) of two sets of axis aligned bounding
17 | boxes. Note that predictions and targets are one-to-one corresponded.
18 |
19 | Args:
20 | pred (torch.Tensor): Bbox predictions with shape [..., 6]
21 | (x1, y1, z1, x2, y2, z2).
22 | target (torch.Tensor): Bbox targets (gt) with shape [..., 6]
23 | (x1, y1, z1, x2, y2, z2).
24 |
25 | Returns:
26 | torch.Tensor: DIoU loss between predictions and targets.
27 | """
28 | axis_aligned_iou = AxisAlignedBboxOverlaps3D()(
29 | pred, target, is_aligned=True)
30 | iou_loss = 1 - axis_aligned_iou
31 |
32 | xp1, yp1, zp1, xp2, yp2, zp2 = pred.split(1, dim=-1)
33 | xt1, yt1, zt1, xt2, yt2, zt2 = target.split(1, dim=-1)
34 |
35 | xpc = (xp1 + xp2) / 2
36 | ypc = (yp1 + yp2) / 2
37 | zpc = (zp1 + zp2) / 2
38 | xtc = (xt1 + xt2) / 2
39 | ytc = (yt1 + yt2) / 2
40 | ztc = (zt1 + zt2) / 2
41 | r2 = (xpc - xtc)**2 + (ypc - ytc)**2 + (zpc - ztc)**2
42 |
43 | x_min = torch.minimum(xp1, xt1)
44 | x_max = torch.maximum(xp2, xt2)
45 | y_min = torch.minimum(yp1, yt1)
46 | y_max = torch.maximum(yp2, yt2)
47 | z_min = torch.minimum(zp1, zt1)
48 | z_max = torch.maximum(zp2, zt2)
49 | c2 = (x_min - x_max)**2 + (y_min - y_max)**2 + (z_min - z_max)**2
50 |
51 | diou_loss = iou_loss + (r2 / c2)[:, 0]
52 |
53 | return diou_loss
54 |
55 |
56 | @MODELS.register_module()
57 | class UniDet3DAxisAlignedIoULoss(nn.Module):
58 | """Calculate the IoU loss (1-IoU) of axis aligned bounding boxes. The only
59 | difference with original AxisAlignedIoULoss is the addition of DIoU mode.
60 | These classes should be merged in the future.
61 |
62 | Args:
63 | mode (str): 'iou' for intersection over union or 'diou' for
64 | distance-iou loss. Defaults to 'iou'.
65 | reduction (str): Method to reduce losses.
66 | The valid reduction method are 'none', 'sum' or 'mean'.
67 | Defaults to 'mean'.
68 | loss_weight (float): Weight of loss. Defaults to 1.0.
69 | """
70 |
71 | def __init__(self,
72 | mode: str = 'iou',
73 | reduction: str = 'mean',
74 | loss_weight: float = 1.0) -> None:
75 | super(UniDet3DAxisAlignedIoULoss, self).__init__()
76 | assert mode in ['iou', 'diou']
77 | self.loss = axis_aligned_iou_loss if mode == 'iou' \
78 | else axis_aligned_diou_loss
79 | assert reduction in ['none', 'sum', 'mean']
80 | self.reduction = reduction
81 | self.loss_weight = loss_weight
82 |
83 | def forward(self,
84 | pred: Tensor,
85 | target: Tensor,
86 | weight: Optional[Tensor] = None,
87 | avg_factor: Optional[float] = None,
88 | reduction_override: Optional[str] = None,
89 | **kwargs) -> Tensor:
90 | """Forward function of loss calculation.
91 |
92 | Args:
93 | pred (Tensor): Bbox predictions with shape [..., 3].
94 | target (Tensor): Bbox targets (gt) with shape [..., 3].
95 | weight (Tensor, optional): Weight of loss.
96 | Defaults to None.
97 | avg_factor (float, optional): Average factor that is used to
98 | average the loss. Defaults to None.
99 | reduction_override (str, optional): Method to reduce losses.
100 | The valid reduction method are 'none', 'sum' or 'mean'.
101 | Defaults to None.
102 |
103 | Returns:
104 | Tensor: IoU loss between predictions and targets.
105 | """
106 | assert reduction_override in (None, 'none', 'mean', 'sum')
107 | reduction = (
108 | reduction_override if reduction_override else self.reduction)
109 | if (weight is not None) and (not torch.any(weight > 0)) and (
110 | reduction != 'none'):
111 | return (pred * weight).sum()
112 | return self.loss(
113 | pred,
114 | target,
115 | weight=weight,
116 | avg_factor=avg_factor,
117 | reduction=reduction) * self.loss_weight
--------------------------------------------------------------------------------
/unidet3d/concat_dataset.py:
--------------------------------------------------------------------------------
1 | from mmengine.dataset.dataset_wrapper import ConcatDataset
2 | from mmengine.dataset.base_dataset import BaseDataset
3 | from mmdet3d.registry import DATASETS
4 |
5 |
6 | @DATASETS.register_module()
7 | class ConcatDataset_(ConcatDataset):
8 | """A wrapper of concatenated dataset.
9 |
10 | Args:
11 | datasets (Sequence[BaseDataset] or Sequence[dict]): A list of datasets
12 | which will be concatenated.
13 | lazy_init (bool, optional): Whether to load annotation during
14 | instantiation. Defaults to False.
15 | ignore_keys (List[str] or str): Ignore the keys that can be
16 | unequal in `dataset.metainfo`. Defaults to None.
17 | `New in version 0.3.0.`
18 | """
19 |
20 | def __init__(self,
21 | datasets,
22 | lazy_init=False,
23 | ignore_keys=None):
24 | self.datasets = []
25 | for i, dataset in enumerate(datasets):
26 | if isinstance(dataset, dict):
27 | self.datasets.append(DATASETS.build(dataset))
28 | elif isinstance(dataset, BaseDataset):
29 | self.datasets.append(dataset)
30 | else:
31 | raise TypeError(
32 | 'elements in datasets sequence should be config or '
33 | f'`BaseDataset` instance, but got {type(dataset)}')
34 | if ignore_keys is None:
35 | self.ignore_keys = []
36 | elif isinstance(ignore_keys, str):
37 | self.ignore_keys = [ignore_keys]
38 | elif isinstance(ignore_keys, list):
39 | self.ignore_keys = ignore_keys
40 | else:
41 | raise TypeError('ignore_keys should be a list or str, '
42 | f'but got {type(ignore_keys)}')
43 |
44 | meta_keys: set = set()
45 | for dataset in self.datasets:
46 | meta_keys |= dataset.metainfo.keys()
47 | # Only use metainfo of first dataset.
48 | self._metainfo = self.datasets[0].metainfo
49 |
50 | self._fully_initialized = False
51 | if not lazy_init:
52 | self.full_init()
53 |
--------------------------------------------------------------------------------
/unidet3d/data_preprocessor.py:
--------------------------------------------------------------------------------
1 | # Copied from mmdet3d/models/data_preprocessors/data_preprocessor.py
2 | from mmdet3d.models.data_preprocessors.data_preprocessor import \
3 | Det3DDataPreprocessor
4 | from mmdet3d.registry import MODELS
5 |
6 |
7 | @MODELS.register_module()
8 | class Det3DDataPreprocessor_(Det3DDataPreprocessor):
9 | """
10 | We add only this 2 lines:
11 | if 'elastic_coords' in inputs:
12 | batch_inputs['elastic_coords'] = inputs['elastic_coords']
13 | """
14 | def simple_process(self, data, training=False):
15 | """Perform normalization, padding and bgr2rgb conversion for img data
16 | based on ``BaseDataPreprocessor``, and voxelize point cloud if `voxel`
17 | is set to be True.
18 |
19 | Args:
20 | data (dict): Data sampled from dataloader.
21 | training (bool): Whether to enable training time augmentation.
22 | Defaults to False.
23 |
24 | Returns:
25 | dict: Data in the same format as the model input.
26 | """
27 | if 'img' in data['inputs']:
28 | batch_pad_shape = self._get_pad_shape(data)
29 |
30 | data = self.collate_data(data)
31 | inputs, data_samples = data['inputs'], data['data_samples']
32 | batch_inputs = dict()
33 |
34 | if 'points' in inputs:
35 | batch_inputs['points'] = inputs['points']
36 |
37 | if self.voxel:
38 | voxel_dict = self.voxelize(inputs['points'], data_samples)
39 | batch_inputs['voxels'] = voxel_dict
40 |
41 | if 'elastic_coords' in inputs:
42 | batch_inputs['elastic_coords'] = inputs['elastic_coords']
43 |
44 | if 'imgs' in inputs:
45 | imgs = inputs['imgs']
46 |
47 | if data_samples is not None:
48 | # NOTE the batched image size information may be useful, e.g.
49 | # in DETR, this is needed for the construction of masks, which
50 | # is then used for the transformer_head.
51 | batch_input_shape = tuple(imgs[0].size()[-2:])
52 | for data_sample, pad_shape in zip(data_samples,
53 | batch_pad_shape):
54 | data_sample.set_metainfo({
55 | 'batch_input_shape': batch_input_shape,
56 | 'pad_shape': pad_shape
57 | })
58 |
59 | if hasattr(self, 'boxtype2tensor') and self.boxtype2tensor:
60 | from mmdet.models.utils.misc import \
61 | samplelist_boxtype2tensor
62 | samplelist_boxtype2tensor(data_samples)
63 | elif hasattr(self, 'boxlist2tensor') and self.boxlist2tensor:
64 | from mmdet.models.utils.misc import \
65 | samplelist_boxlist2tensor
66 | samplelist_boxlist2tensor(data_samples)
67 | if self.pad_mask:
68 | self.pad_gt_masks(data_samples)
69 |
70 | if self.pad_seg:
71 | self.pad_gt_sem_seg(data_samples)
72 |
73 | if training and self.batch_augments is not None:
74 | for batch_aug in self.batch_augments:
75 | imgs, data_samples = batch_aug(imgs, data_samples)
76 | batch_inputs['imgs'] = imgs
77 |
78 | return {'inputs': batch_inputs, 'data_samples': data_samples}
79 |
--------------------------------------------------------------------------------
/unidet3d/formatting.py:
--------------------------------------------------------------------------------
1 | # Adapted from mmdet3d/datasets/transforms/formating.py
2 | import numpy as np
3 | from .structures import InstanceData_
4 | from mmdet3d.datasets.transforms import Pack3DDetInputs
5 | from mmdet3d.datasets.transforms.formating import to_tensor
6 | from mmdet3d.registry import TRANSFORMS
7 | from mmdet3d.structures import BaseInstance3DBoxes, Det3DDataSample, PointData
8 | from mmdet3d.structures.points import BasePoints
9 |
10 |
11 | @TRANSFORMS.register_module()
12 | class Pack3DDetInputs_(Pack3DDetInputs):
13 | """Just add elastic_coords, sp_pts_mask, and gt_sp_masks.
14 | """
15 | INPUTS_KEYS = ['points', 'img', 'elastic_coords']
16 | SEG_KEYS = [
17 | 'gt_seg_map',
18 | 'pts_instance_mask',
19 | 'pts_semantic_mask',
20 | 'gt_semantic_seg',
21 | 'sp_pts_mask',
22 | ]
23 | INSTANCEDATA_3D_KEYS = [
24 | 'gt_bboxes_3d', 'gt_labels_3d', 'attr_labels', 'depths', 'centers_2d',
25 | 'gt_sp_masks'
26 | ]
27 |
28 | def pack_single_results(self, results: dict) -> dict:
29 | """Method to pack the single input data. when the value in this dict is
30 | a list, it usually is in Augmentations Testing.
31 |
32 | Args:
33 | results (dict): Result dict from the data pipeline.
34 |
35 | Returns:
36 | dict: A dict contains
37 |
38 | - 'inputs' (dict): The forward data of models. It usually contains
39 | following keys:
40 |
41 | - points
42 | - img
43 |
44 | - 'data_samples' (:obj:`Det3DDataSample`): The annotation info
45 | of the sample.
46 | """
47 | # Format 3D data
48 | if 'points' in results:
49 | if isinstance(results['points'], BasePoints):
50 | results['points'] = results['points'].tensor
51 |
52 | if 'img' in results:
53 | if isinstance(results['img'], list):
54 | # process multiple imgs in single frame
55 | imgs = np.stack(results['img'], axis=0)
56 | if imgs.flags.c_contiguous:
57 | imgs = to_tensor(imgs).permute(0, 3, 1, 2).contiguous()
58 | else:
59 | imgs = to_tensor(
60 | np.ascontiguousarray(imgs.transpose(0, 3, 1, 2)))
61 | results['img'] = imgs
62 | else:
63 | img = results['img']
64 | if len(img.shape) < 3:
65 | img = np.expand_dims(img, -1)
66 | # To improve the computational speed by by 3-5 times, apply:
67 | # `torch.permute()` rather than `np.transpose()`.
68 | # Refer to https://github.com/open-mmlab/mmdetection/pull/9533
69 | # for more details
70 | if img.flags.c_contiguous:
71 | img = to_tensor(img).permute(2, 0, 1).contiguous()
72 | else:
73 | img = to_tensor(
74 | np.ascontiguousarray(img.transpose(2, 0, 1)))
75 | results['img'] = img
76 |
77 | for key in [
78 | 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',
79 | 'gt_bboxes_labels', 'attr_labels', 'pts_instance_mask',
80 | 'pts_semantic_mask', 'sp_pts_mask', 'gt_sp_masks',
81 | 'elastic_coords', 'centers_2d', 'depths', 'gt_labels_3d'
82 | ]:
83 | if key not in results:
84 | continue
85 | if isinstance(results[key], list):
86 | results[key] = [to_tensor(res) for res in results[key]]
87 | else:
88 | results[key] = to_tensor(results[key])
89 | if 'gt_bboxes_3d' in results:
90 | if not isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):
91 | results['gt_bboxes_3d'] = to_tensor(results['gt_bboxes_3d'])
92 |
93 | if 'gt_semantic_seg' in results:
94 | results['gt_semantic_seg'] = to_tensor(
95 | results['gt_semantic_seg'][None])
96 | if 'gt_seg_map' in results:
97 | results['gt_seg_map'] = results['gt_seg_map'][None, ...]
98 |
99 | data_sample = Det3DDataSample()
100 | gt_instances_3d = InstanceData_()
101 | gt_instances = InstanceData_()
102 | gt_pts_seg = PointData()
103 |
104 | img_metas = {}
105 | for key in self.meta_keys:
106 | if key in results:
107 | img_metas[key] = results[key]
108 | data_sample.set_metainfo(img_metas)
109 |
110 | inputs = {}
111 | for key in self.keys:
112 | if key in results:
113 | if key in self.INPUTS_KEYS:
114 | inputs[key] = results[key]
115 | elif key in self.INSTANCEDATA_3D_KEYS:
116 | gt_instances_3d[self._remove_prefix(key)] = results[key]
117 | elif key in self.INSTANCEDATA_2D_KEYS:
118 | if key == 'gt_bboxes_labels':
119 | gt_instances['labels'] = results[key]
120 | else:
121 | gt_instances[self._remove_prefix(key)] = results[key]
122 | elif key in self.SEG_KEYS:
123 | gt_pts_seg[self._remove_prefix(key)] = results[key]
124 | else:
125 | raise NotImplementedError(f'Please modified '
126 | f'`Pack3DDetInputs` '
127 | f'to put {key} to '
128 | f'corresponding field')
129 |
130 | data_sample.gt_instances_3d = gt_instances_3d
131 | data_sample.gt_instances = gt_instances
132 | data_sample.gt_pts_seg = gt_pts_seg
133 | if 'eval_ann_info' in results:
134 | data_sample.eval_ann_info = results['eval_ann_info']
135 | else:
136 | data_sample.eval_ann_info = None
137 |
138 | packed_results = dict()
139 | packed_results['data_samples'] = data_sample
140 | packed_results['inputs'] = inputs
141 |
142 | return packed_results
143 |
--------------------------------------------------------------------------------
/unidet3d/image_vis.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import copy
3 |
4 | import cv2
5 | import numpy as np
6 | import torch
7 | from matplotlib import pyplot as plt
8 |
9 |
10 | def project_pts_on_img(points,
11 | raw_img,
12 | lidar2img_rt,
13 | max_distance=70,
14 | thickness=-1):
15 | """Project the 3D points cloud on 2D image.
16 |
17 | Args:
18 | points (numpy.array): 3D points cloud (x, y, z) to visualize.
19 | raw_img (numpy.array): The numpy array of image.
20 | lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
21 | according to the camera intrinsic parameters.
22 | max_distance (float, optional): the max distance of the points cloud.
23 | Default: 70.
24 | thickness (int, optional): The thickness of 2D points. Default: -1.
25 | """
26 | img = raw_img.copy()
27 | num_points = points.shape[0]
28 | pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
29 | pts_2d = pts_4d @ lidar2img_rt.T
30 |
31 | # cam_points is Tensor of Nx4 whose last column is 1
32 | # transform camera coordinate to image coordinate
33 | pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
34 | pts_2d[:, 0] /= pts_2d[:, 2]
35 | pts_2d[:, 1] /= pts_2d[:, 2]
36 |
37 | fov_inds = ((pts_2d[:, 0] < img.shape[1])
38 | & (pts_2d[:, 0] >= 0)
39 | & (pts_2d[:, 1] < img.shape[0])
40 | & (pts_2d[:, 1] >= 0))
41 |
42 | imgfov_pts_2d = pts_2d[fov_inds, :3] # u, v, d
43 |
44 | cmap = plt.cm.get_cmap('hsv', 256)
45 | cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
46 | for i in range(imgfov_pts_2d.shape[0]):
47 | depth = imgfov_pts_2d[i, 2]
48 | color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
49 | cv2.circle(
50 | img,
51 | center=(int(np.round(imgfov_pts_2d[i, 0])),
52 | int(np.round(imgfov_pts_2d[i, 1]))),
53 | radius=1,
54 | color=tuple(color),
55 | thickness=thickness,
56 | )
57 | cv2.imshow('project_pts_img', img.astype(np.uint8))
58 | cv2.waitKey(100)
59 |
60 |
61 | def plot_rect3d_on_img(img,
62 | num_rects,
63 | rect_corners,
64 | color=(0, 255, 0),
65 | thickness=1):
66 | """Plot the boundary lines of 3D rectangular on 2D images.
67 |
68 | Args:
69 | img (numpy.array): The numpy array of image.
70 | num_rects (int): Number of 3D rectangulars.
71 | rect_corners (numpy.array): Coordinates of the corners of 3D
72 | rectangulars. Should be in the shape of [num_rect, 8, 2].
73 | color (tuple[int], optional): The color to draw bboxes.
74 | Default: (0, 255, 0).
75 | thickness (int, optional): The thickness of bboxes. Default: 1.
76 | """
77 | line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
78 | (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
79 | for i in range(num_rects):
80 | corners = rect_corners[i].astype(np.int)
81 | for start, end in line_indices:
82 | cv2.line(img, (corners[start, 0], corners[start, 1]),
83 | (corners[end, 0], corners[end, 1]), color, thickness,
84 | cv2.LINE_AA)
85 |
86 | return img.astype(np.uint8)
87 |
88 |
89 | def draw_lidar_bbox3d_on_img(bboxes3d,
90 | raw_img,
91 | lidar2img_rt,
92 | img_metas,
93 | color=(0, 255, 0),
94 | thickness=1):
95 | """Project the 3D bbox on 2D plane and draw on input image.
96 |
97 | Args:
98 | bboxes3d (:obj:`LiDARInstance3DBoxes`):
99 | 3d bbox in lidar coordinate system to visualize.
100 | raw_img (numpy.array): The numpy array of image.
101 | lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
102 | according to the camera intrinsic parameters.
103 | img_metas (dict): Useless here.
104 | color (tuple[int], optional): The color to draw bboxes.
105 | Default: (0, 255, 0).
106 | thickness (int, optional): The thickness of bboxes. Default: 1.
107 | """
108 | img = raw_img.copy()
109 | corners_3d = bboxes3d.corners
110 | num_bbox = corners_3d.shape[0]
111 | pts_4d = np.concatenate(
112 | [corners_3d.reshape(-1, 3),
113 | np.ones((num_bbox * 8, 1))], axis=-1)
114 | lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
115 | if isinstance(lidar2img_rt, torch.Tensor):
116 | lidar2img_rt = lidar2img_rt.cpu().numpy()
117 | pts_2d = pts_4d @ lidar2img_rt.T
118 |
119 | pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
120 | pts_2d[:, 0] /= pts_2d[:, 2]
121 | pts_2d[:, 1] /= pts_2d[:, 2]
122 | imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
123 |
124 | return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
125 |
126 |
127 | # TODO: remove third parameter in all functions here in favour of img_metas
128 | def draw_depth_bbox3d_on_img(bboxes3d,
129 | raw_img,
130 | calibs,
131 | img_metas,
132 | color=(0, 255, 0),
133 | thickness=1):
134 | """Project the 3D bbox on 2D plane and draw on input image.
135 |
136 | Args:
137 | bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
138 | 3d bbox in depth coordinate system to visualize.
139 | raw_img (numpy.array): The numpy array of image.
140 | calibs (dict): Camera calibration information, Rt and K.
141 | img_metas (dict): Used in coordinates transformation.
142 | color (tuple[int], optional): The color to draw bboxes.
143 | Default: (0, 255, 0).
144 | thickness (int, optional): The thickness of bboxes. Default: 1.
145 | """
146 | from mmdet3d.core.bbox import points_cam2img
147 | from mmdet3d.models import apply_3d_transformation
148 |
149 | img = raw_img.copy()
150 | img_metas = copy.deepcopy(img_metas)
151 | corners_3d = bboxes3d.corners
152 | num_bbox = corners_3d.shape[0]
153 | points_3d = corners_3d.reshape(-1, 3)
154 |
155 | # first reverse the data transformations
156 | xyz_depth = apply_3d_transformation(
157 | points_3d, 'DEPTH', img_metas, reverse=True)
158 |
159 | # project to 2d to get image coords (uv)
160 | uv_origin = points_cam2img(xyz_depth,
161 | xyz_depth.new_tensor(img_metas['depth2img']))
162 | uv_origin = (uv_origin - 1).round()
163 | imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
164 |
165 | return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
166 |
167 |
168 | def draw_camera_bbox3d_on_img(bboxes3d,
169 | raw_img,
170 | cam2img,
171 | img_metas,
172 | color=(0, 255, 0),
173 | thickness=1):
174 | """Project the 3D bbox on 2D plane and draw on input image.
175 |
176 | Args:
177 | bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
178 | 3d bbox in camera coordinate system to visualize.
179 | raw_img (numpy.array): The numpy array of image.
180 | cam2img (dict): Camera intrinsic matrix,
181 | denoted as `K` in depth bbox coordinate system.
182 | img_metas (dict): Useless here.
183 | color (tuple[int], optional): The color to draw bboxes.
184 | Default: (0, 255, 0).
185 | thickness (int, optional): The thickness of bboxes. Default: 1.
186 | """
187 | from mmdet3d.core.bbox import points_cam2img
188 |
189 | img = raw_img.copy()
190 | cam2img = copy.deepcopy(cam2img)
191 | corners_3d = bboxes3d.corners
192 | num_bbox = corners_3d.shape[0]
193 | points_3d = corners_3d.reshape(-1, 3)
194 | if not isinstance(cam2img, torch.Tensor):
195 | cam2img = torch.from_numpy(np.array(cam2img))
196 |
197 | assert (cam2img.shape == torch.Size([3, 3])
198 | or cam2img.shape == torch.Size([4, 4]))
199 | cam2img = cam2img.float().cpu()
200 |
201 | # project to 2d to get image coords (uv)
202 | uv_origin = points_cam2img(points_3d, cam2img)
203 | uv_origin = (uv_origin - 1).round()
204 | imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
205 |
206 | return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
--------------------------------------------------------------------------------
/unidet3d/indoor_metric.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import Dict, List, Optional, Sequence
3 |
4 | from mmengine.evaluator import BaseMetric
5 | from mmengine.logging import MMLogger
6 |
7 | from .indoor_eval import indoor_eval
8 | from mmdet3d.registry import METRICS
9 | from mmdet3d.structures import get_box_type
10 | from .show_results import show_result_v2
11 | from pathlib import Path
12 |
13 | @METRICS.register_module()
14 | class IndoorMetric_(BaseMetric):
15 | """Indoor scene evaluation metric.
16 |
17 | Args:
18 | iou_thr (float or List[float]): List of iou threshold when calculate
19 | the metric. Defaults to [0.25, 0.5].
20 | collect_device (str): Device name used for collecting results from
21 | different ranks during distributed training. Must be 'cpu' or
22 | 'gpu'. Defaults to 'cpu'.
23 | prefix (str, optional): The prefix that will be added in the metric
24 | names to disambiguate homonymous metrics of different evaluators.
25 | If prefix is not provided in the argument, self.default_prefix will
26 | be used instead. Defaults to None.
27 | """
28 |
29 | def __init__(self,
30 | datasets,
31 | datasets_classes,
32 | vis_dir: str = None,
33 | iou_thr: List[float] = [0.25, 0.5],
34 | collect_device: str = 'cpu',
35 | prefix: Optional[str] = None) -> None:
36 | super(IndoorMetric_, self).__init__(
37 | prefix=prefix, collect_device=collect_device)
38 | self.iou_thr = [iou_thr] if isinstance(iou_thr, float) else iou_thr
39 | self.datasets = datasets
40 | self.datasets_classes = datasets_classes
41 | self.vis_dir = vis_dir
42 |
43 | def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
44 | """Process one batch of data samples and predictions.
45 |
46 | The processed results should be stored in ``self.results``, which will
47 | be used to compute the metrics when all batches have been processed.
48 |
49 | Args:
50 | data_batch (dict): A batch of data from the dataloader.
51 | data_samples (Sequence[dict]): A batch of outputs from the model.
52 | """
53 | for data_sample in data_samples:
54 | pred_3d = data_sample['pred_instances_3d']
55 | pred_3d['dataset'] = self.get_dataset(data_sample['lidar_path'])
56 | eval_ann_info = data_sample['eval_ann_info']
57 | cpu_pred_3d = dict()
58 | for k, v in pred_3d.items():
59 | if hasattr(v, 'to'):
60 | cpu_pred_3d[k] = v.to('cpu')
61 | else:
62 | cpu_pred_3d[k] = v
63 | self.results.append((eval_ann_info, cpu_pred_3d))
64 |
65 | def compute_metrics(self, results: list) -> Dict[str, float]:
66 | """Compute the metrics from processed results.
67 |
68 | Args:
69 | results (list): The processed results of each batch.
70 |
71 | Returns:
72 | Dict[str, float]: The computed metrics. The keys are the names of
73 | the metrics, and the values are corresponding results.
74 | """
75 | logger: MMLogger = MMLogger.get_current_instance()
76 | ann_infos = [[] for _ in self.datasets]
77 | pred_results = [[] for _ in self.datasets]
78 |
79 | for eval_ann, sinlge_pred_results in results:
80 | idx = self.datasets.index(sinlge_pred_results['dataset'])
81 | ann_infos[idx].append(eval_ann)
82 | pred_results[idx].append(sinlge_pred_results)
83 | if self.vis_dir is not None:
84 | self.vis_results(eval_ann, sinlge_pred_results)
85 |
86 | # some checkpoints may not record the key "box_type_3d"
87 | box_type_3d, box_mode_3d = get_box_type(
88 | self.dataset_meta.get('box_type_3d', 'depth'))
89 |
90 | ret_dict = {}
91 | for i in range(len(self.datasets)):
92 | ret_dict[self.datasets[i]] = indoor_eval(
93 | ann_infos[i],
94 | pred_results[i],
95 | self.iou_thr,
96 | self.datasets_classes[i],
97 | logger=logger,
98 | box_mode_3d=box_mode_3d)
99 |
100 | return ret_dict
101 |
102 | def get_dataset(self, lidar_path):
103 | for dataset in self.datasets:
104 | if dataset in lidar_path.split('/'):
105 | return dataset
106 |
107 | def vis_results(self, eval_ann, sinlge_pred_results):
108 | pts = sinlge_pred_results['points'].numpy()
109 | pts[:, 3:] *= 127.5
110 | pts[:, 3:] += 127.5
111 | show_result_v2(pts, eval_ann['gt_bboxes_3d'].corners,
112 | eval_ann['gt_labels_3d'],
113 | sinlge_pred_results['bboxes_3d'].corners,
114 | sinlge_pred_results['labels_3d'],
115 | Path(self.vis_dir) / sinlge_pred_results['dataset'],
116 | eval_ann['lidar_idx'])
--------------------------------------------------------------------------------
/unidet3d/loading.py:
--------------------------------------------------------------------------------
1 | # Adapted from mmdet3d/datasets/transforms/loading.py
2 | import mmengine
3 | import numpy as np
4 |
5 | from mmdet3d.datasets.transforms import LoadAnnotations3D
6 | from mmdet3d.datasets.transforms.loading import get
7 | from mmdet3d.datasets.transforms.loading import NormalizePointsColor
8 | from mmdet3d.registry import TRANSFORMS
9 |
10 |
11 | @TRANSFORMS.register_module()
12 | class LoadAnnotations3D_(LoadAnnotations3D):
13 | """Just add super point mask loading.
14 |
15 | Args:
16 | with_sp_mask_3d (bool): Whether to load super point maks.
17 | """
18 |
19 | def __init__(self, with_sp_mask_3d, **kwargs):
20 | self.with_sp_mask_3d = with_sp_mask_3d
21 | super().__init__(**kwargs)
22 |
23 | def _load_sp_pts_3d(self, results):
24 | """Private function to load 3D superpoints mask annotations.
25 |
26 | Args:
27 | results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
28 |
29 | Returns:
30 | dict: The dict containing loaded 3D mask annotations.
31 | """
32 | sp_pts_mask_path = results['super_pts_path']
33 |
34 | try:
35 | mask_bytes = get(
36 | sp_pts_mask_path, backend_args=self.backend_args)
37 | # add .copy() to fix read-only bug
38 | sp_pts_mask = np.frombuffer(
39 | mask_bytes, dtype=np.int64).copy()
40 | except ConnectionError:
41 | mmengine.check_file_exist(sp_pts_mask_path)
42 | sp_pts_mask = np.fromfile(
43 | sp_pts_mask_path, dtype=np.int64)
44 |
45 | results['sp_pts_mask'] = sp_pts_mask
46 |
47 | # 'eval_ann_info' will be passed to evaluator
48 | if 'eval_ann_info' in results:
49 | results['eval_ann_info']['sp_pts_mask'] = sp_pts_mask
50 | results['eval_ann_info']['lidar_idx'] = \
51 | sp_pts_mask_path.split("/")[-1][:-4]
52 | return results
53 |
54 | def transform(self, results: dict) -> dict:
55 | """Function to load multiple types annotations.
56 |
57 | Args:
58 | results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
59 |
60 | Returns:
61 | dict: The dict containing loaded 3D bounding box, label, mask and
62 | semantic segmentation annotations.
63 | """
64 | results = super().transform(results)
65 | if self.with_sp_mask_3d:
66 | results = self._load_sp_pts_3d(results)
67 | return results
68 |
69 |
70 | @TRANSFORMS.register_module()
71 | class NormalizePointsColor_(NormalizePointsColor):
72 | """Just add color_std parameter.
73 |
74 | Args:
75 | color_mean (list[float]): Mean color of the point cloud.
76 | color_std (list[float]): Std color of the point cloud.
77 | Default value is from SPFormer preprocessing.
78 | """
79 |
80 | def __init__(self, color_mean, color_std=127.5):
81 | self.color_mean = color_mean
82 | self.color_std = color_std
83 |
84 | def transform(self, input_dict):
85 | """Call function to normalize color of points.
86 |
87 | Args:
88 | results (dict): Result dict containing point clouds data.
89 |
90 | Returns:
91 | dict: The result dict containing the normalized points.
92 | Updated key and value are described below.
93 | - points (:obj:`BasePoints`): Points after color normalization.
94 | """
95 | points = input_dict['points']
96 | assert points.attribute_dims is not None and \
97 | 'color' in points.attribute_dims.keys(), \
98 | 'Expect points have color attribute'
99 | if self.color_mean is not None:
100 | points.color = points.color - \
101 | points.color.new_tensor(self.color_mean)
102 | if self.color_std is not None:
103 | points.color = points.color / \
104 | points.color.new_tensor(self.color_std)
105 | input_dict['points'] = points
106 | return input_dict
107 |
108 |
109 | @TRANSFORMS.register_module()
110 | class DenormalizePointsColor(NormalizePointsColor):
111 | """Denormalize points colors.
112 |
113 | Args:
114 | color_mean (list[float]): Mean color of the point cloud.
115 | color_std (list[float]): Std color of the point cloud.
116 | Default value is from SPFormer preprocessing.
117 | """
118 |
119 | def __init__(self, color_mean, color_std):
120 | self.color_mean = color_mean
121 | self.color_std = color_std
122 |
123 | def transform(self, input_dict):
124 | """Call function to normalize color of points.
125 |
126 | Args:
127 | results (dict): Result dict containing point clouds data.
128 |
129 | Returns:
130 | dict: The result dict containing the normalized points.
131 | Updated key and value are described below.
132 | - points (:obj:`BasePoints`): Points after color normalization.
133 | """
134 | points = input_dict['points']
135 | assert points.attribute_dims is not None and \
136 | 'color' in points.attribute_dims.keys(), \
137 | 'Expect points have color attribute'
138 | if self.color_std is not None:
139 | points.color = points.color * \
140 | points.color.new_tensor(self.color_std)
141 | if self.color_mean is not None:
142 | points.color = points.color + \
143 | points.color.new_tensor(self.color_mean)
144 |
145 | input_dict['points'] = points
146 | return input_dict
--------------------------------------------------------------------------------
/unidet3d/multiscan_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | from mmdet3d.datasets import Det3DDataset
3 | from mmdet3d.registry import DATASETS
4 | from mmdet3d.structures import DepthInstance3DBoxes
5 | import os.path as osp
6 | from mmengine.logging import print_log
7 | import logging
8 | import numpy as np
9 |
10 | @DATASETS.register_module()
11 | class MultiScan(Det3DDataset):
12 | """MultiScan dataset.
13 |
14 | Args:
15 | data_prefix (dict): Prefix for data. Defaults to
16 | dict(pts='points', pts_instance_mask='instance_mask',
17 | pts_semantic_mask='semantic_mask').
18 | box_type_3d (str): Type of 3D box of this dataset.
19 | Based on the `box_type_3d`, the dataset will encapsulate the box
20 | to its original format then converted them to `box_type_3d`.
21 | Defaults to 'Depth'.
22 | """
23 | METAINFO = {
24 | 'classes':
25 | # ('door', 'table', 'chair', 'cabinet', 'window', 'sofa', 'microwave', 'pillow',
26 | # 'tv_monitor', 'curtain', 'trash_can', 'suitcase', 'sink', 'backpack', 'bed',
27 | # 'refrigerator', 'toilet', 'no_target')
28 | ('door', 'table', 'chair', 'cabinet', 'window', 'sofa', 'microwave', 'pillow',
29 | 'tv_monitor', 'curtain', 'trash_can', 'suitcase', 'sink', 'backpack', 'bed',
30 | 'refrigerator', 'toilet')
31 | }
32 |
33 | def __init__(self,
34 | data_prefix=dict(
35 | pts='points',
36 | pts_instance_mask='instance_mask',
37 | pts_semantic_mask='semantic_mask'),
38 | box_type_3d='Depth',
39 | **kwargs):
40 | super().__init__(
41 | data_prefix=data_prefix, box_type_3d=box_type_3d, **kwargs)
42 |
43 | def parse_ann_info(self, info):
44 | """Process the `instances` in data info to `ann_info`.
45 |
46 | Args:
47 | info (dict): Info dict.
48 |
49 | Returns:
50 | dict: Processed `ann_info`
51 | """
52 | ann_info = super().parse_ann_info(info)
53 | if ann_info is None:
54 | ann_info = dict()
55 | ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
56 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
57 |
58 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
59 | ann_info['gt_bboxes_3d'],
60 | origin=(0.5, 0.5, 0.5), box_dim=6,
61 | with_yaw=False).convert_to(self.box_mode_3d)
62 |
63 | return ann_info
64 |
65 | @DATASETS.register_module()
66 | class MultiScan_(MultiScan):
67 | """MultiScan dataset with partition.
68 |
69 | Args:
70 | partition(float): Defaults to 1, the part of
71 | the dataset that will be used.
72 | """
73 | METAINFO = {
74 | 'classes':
75 | ('door', 'table', 'chair', 'cabinet', 'window', 'sofa', 'microwave', 'pillow',
76 | 'tv_monitor', 'curtain', 'trash_can', 'suitcase', 'sink', 'backpack', 'bed',
77 | 'refrigerator', 'toilet'),
78 | 'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)
79 | }
80 |
81 | def __init__(self,
82 | partition: float = 1,
83 | **kwargs) -> None:
84 | self.partition = partition
85 | super().__init__(**kwargs)
86 |
87 | def parse_ann_info(self, info: dict) -> Union[dict, None]:
88 | """Process the `instances` in data info to `ann_info`.
89 |
90 | In `Custom3DDataset`, we simply concatenate all the field
91 | in `instances` to `np.ndarray`, you can do the specific
92 | process in subclass. You have to convert `gt_bboxes_3d`
93 | to different coordinates according to the task.
94 |
95 | Args:
96 | info (dict): Info dict.
97 |
98 | Returns:
99 | dict or None: Processed `ann_info`.
100 | """
101 | ids = {c: i for i, c in enumerate(self.metainfo['valid_class_ids'])}
102 | instances = []
103 | for instance in info['instances']:
104 | if instance['bbox_label_3d'] in ids:
105 | instance['bbox_label_3d'] = ids[instance['bbox_label_3d']]
106 | instances.append(instance)
107 | info['instances'] = instances
108 | return super().parse_ann_info(info)
109 |
110 | def __getitem__(self, idx: int) -> dict:
111 | """Get the idx-th image and data information of dataset after
112 | ``self.pipeline``, and ``full_init`` will be called if the dataset has
113 | not been fully initialized.
114 |
115 | During training phase, if ``self.pipeline`` get ``None``,
116 | ``self._rand_another`` will be called until a valid image is fetched or
117 | the maximum limit of refetech is reached.
118 |
119 | Args:
120 | idx (int): The index of self.data_list.
121 |
122 | Returns:
123 | dict: The idx-th image and data information of dataset after
124 | ``self.pipeline``.
125 | """
126 | # Performing full initialization by calling `__getitem__` will consume
127 | # extra memory. If a dataset is not fully initialized by setting
128 | # `lazy_init=True` and then fed into the dataloader. Different workers
129 | # will simultaneously read and parse the annotation. It will cost more
130 | # time and memory, although this may work. Therefore, it is recommended
131 | # to manually call `full_init` before dataset fed into dataloader to
132 | # ensure all workers use shared RAM from master process.
133 |
134 | if not self.test_mode:
135 | if self.serialize_data:
136 | dataset_len = len(self.data_address)
137 | else:
138 | dataset_len = len(self.data_list)
139 | idx = np.random.randint(0, dataset_len)
140 | if not self._fully_initialized:
141 | print_log(
142 | 'Please call `full_init()` method manually to accelerate '
143 | 'the speed.',
144 | logger='current',
145 | level=logging.WARNING)
146 | self.full_init()
147 |
148 | if self.test_mode:
149 | data = self.prepare_data(idx)
150 | if data is None:
151 | raise Exception('Test time pipline should not get `None` '
152 | 'data_sample')
153 | return data
154 |
155 | for _ in range(self.max_refetch + 1):
156 | data = self.prepare_data(idx)
157 | # Broken images or random augmentations may cause the returned data
158 | # to be None
159 | if data is None:
160 | idx = self._rand_another()
161 | continue
162 | return data
163 |
164 | def __len__(self) -> int:
165 | """Get the length of filtered dataset and automatically call
166 | ``full_init`` if the dataset has not been fully init.
167 |
168 | Returns:
169 | int: The length of filtered dataset.
170 | """
171 |
172 | if self.serialize_data:
173 | dataset_len = len(self.data_address)
174 | else:
175 | dataset_len = len(self.data_list)
176 | if not self.test_mode:
177 | return int(self.partition * dataset_len)
178 | else:
179 | return dataset_len
180 |
181 | def parse_data_info(self, info: dict) -> dict:
182 | """Process the raw data info.
183 |
184 | Args:
185 | info (dict): Raw info dict.
186 |
187 | Returns:
188 | dict: Has `ann_info` in training stage. And
189 | all path has been converted to absolute path.
190 | """
191 | info['super_pts_path'] = osp.join(
192 | self.data_prefix.get('sp_pts_mask', ''),
193 | info['lidar_points']['lidar_path']) #info['super_pts_path']
194 |
195 | info = super().parse_data_info(info)
196 |
197 | return info
--------------------------------------------------------------------------------
/unidet3d/rotated_iou_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import Optional
3 |
4 | import torch
5 | from mmcv.ops.diff_iou_rotated import box2corners, oriented_box_intersection_2d
6 | from mmdet.models.losses.utils import weighted_loss
7 | from torch import Tensor
8 | from torch import nn as nn
9 |
10 | from mmdet3d.models import rotated_iou_3d_loss
11 | from mmdet3d.registry import MODELS
12 |
13 |
14 | def diff_diou_rotated_3d(box3d1: Tensor, box3d2: Tensor) -> Tensor:
15 | """Calculate differentiable DIoU of rotated 3d boxes.
16 |
17 | Args:
18 | box3d1 (Tensor): (B, N, 3+3+1) First box (x,y,z,w,h,l,alpha).
19 | box3d2 (Tensor): (B, N, 3+3+1) Second box (x,y,z,w,h,l,alpha).
20 | Returns:
21 | Tensor: (B, N) DIoU.
22 | """
23 | box1 = box3d1[..., [0, 1, 3, 4, 6]]
24 | box2 = box3d2[..., [0, 1, 3, 4, 6]]
25 | corners1 = box2corners(box1)
26 | corners2 = box2corners(box2)
27 | intersection, _ = oriented_box_intersection_2d(corners1, corners2)
28 | zmax1 = box3d1[..., 2] + box3d1[..., 5] * 0.5
29 | zmin1 = box3d1[..., 2] - box3d1[..., 5] * 0.5
30 | zmax2 = box3d2[..., 2] + box3d2[..., 5] * 0.5
31 | zmin2 = box3d2[..., 2] - box3d2[..., 5] * 0.5
32 | z_overlap = (torch.min(zmax1, zmax2) -
33 | torch.max(zmin1, zmin2)).clamp_(min=0.)
34 | intersection_3d = intersection * z_overlap
35 | volume1 = box3d1[..., 3] * box3d1[..., 4] * box3d1[..., 5]
36 | volume2 = box3d2[..., 3] * box3d2[..., 4] * box3d2[..., 5]
37 | union_3d = volume1 + volume2 - intersection_3d
38 |
39 | x1_max = torch.max(corners1[..., 0], dim=2)[0]
40 | x1_min = torch.min(corners1[..., 0], dim=2)[0]
41 | y1_max = torch.max(corners1[..., 1], dim=2)[0]
42 | y1_min = torch.min(corners1[..., 1], dim=2)[0]
43 |
44 | x2_max = torch.max(corners2[..., 0], dim=2)[0]
45 | x2_min = torch.min(corners2[..., 0], dim=2)[0]
46 | y2_max = torch.max(corners2[..., 1], dim=2)[0]
47 | y2_min = torch.min(corners2[..., 1], dim=2)[0]
48 |
49 | x_max = torch.max(x1_max, x2_max)
50 | x_min = torch.min(x1_min, x2_min)
51 | y_max = torch.max(y1_max, y2_max)
52 | y_min = torch.min(y1_min, y2_min)
53 |
54 | z_max = torch.max(zmax1, zmax2)
55 | z_min = torch.min(zmin1, zmin2)
56 |
57 | r2 = ((box1[..., :3] - box2[..., :3])**2).sum(dim=-1)
58 | c2 = (x_min - x_max)**2 + (y_min - y_max)**2 + (z_min - z_max)**2
59 |
60 | return intersection_3d / union_3d - r2 / c2
61 |
62 |
63 | @weighted_loss
64 | def rotated_diou_3d_loss(pred: Tensor, target: Tensor) -> Tensor:
65 | """Calculate the DIoU loss (1-DIoU) of two sets of rotated bounding boxes.
66 | Note that predictions and targets are one-to-one corresponded.
67 |
68 | Args:
69 | pred (torch.Tensor): Bbox predictions with shape [N, 7]
70 | (x, y, z, w, l, h, alpha).
71 | target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
72 | (x, y, z, w, l, h, alpha).
73 |
74 | Returns:
75 | torch.Tensor: IoU loss between predictions and targets.
76 | """
77 | if len(pred.shape) == 2:
78 | pred, target = pred.unsqueeze(0), target.unsqueeze(0)
79 | diou_loss = 1 - diff_diou_rotated_3d(pred, target)[0]
80 | else:
81 | diou_loss = 1 - diff_diou_rotated_3d(pred, target)
82 | return diou_loss
83 |
84 |
85 | @MODELS.register_module()
86 | class UniDet3DRotatedIoU3DLoss(nn.Module):
87 | """Calculate the IoU loss (1-IoU) of rotated bounding boxes. The only
88 | difference with original RotatedIoU3DLoss is the addition of DIoU mode.
89 | These classes should be merged in the future.
90 |
91 | Args:
92 | mode (str): 'iou' for intersection over union or 'diou' for
93 | distance-iou loss. Defaults to 'iou'.
94 | reduction (str): Method to reduce losses.
95 | The valid reduction method are 'none', 'sum' or 'mean'.
96 | Defaults to 'mean'.
97 | loss_weight (float): Weight of loss. Defaults to 1.0.
98 | """
99 |
100 | def __init__(self,
101 | mode: str = 'iou',
102 | reduction: str = 'mean',
103 | loss_weight: float = 1.0) -> None:
104 | super(UniDet3DRotatedIoU3DLoss, self).__init__()
105 | assert mode in ['iou', 'diou']
106 | self.loss = rotated_iou_3d_loss if mode == 'iou' \
107 | else rotated_diou_3d_loss
108 | assert reduction in ['none', 'sum', 'mean']
109 | self.reduction = reduction
110 | self.loss_weight = loss_weight
111 |
112 | def forward(self,
113 | pred: Tensor,
114 | target: Tensor,
115 | weight: Optional[Tensor] = None,
116 | avg_factor: Optional[float] = None,
117 | reduction_override: Optional[str] = None,
118 | **kwargs) -> Tensor:
119 | """Forward function of loss calculation.
120 |
121 | Args:
122 | pred (Tensor): Bbox predictions with shape [..., 7]
123 | (x, y, z, w, l, h, alpha).
124 | target (Tensor): Bbox targets (gt) with shape [..., 7]
125 | (x, y, z, w, l, h, alpha).
126 | weight (Tensor, optional): Weight of loss.
127 | Defaults to None.
128 | avg_factor (float, optional): Average factor that is used to
129 | average the loss. Defaults to None.
130 | reduction_override (str, optional): Method to reduce losses.
131 | The valid reduction method are 'none', 'sum' or 'mean'.
132 | Defaults to None.
133 |
134 | Returns:
135 | Tensor: IoU loss between predictions and targets.
136 | """
137 | if weight is not None and not torch.any(weight > 0):
138 | return pred.sum() * weight.sum() # 0
139 | assert reduction_override in (None, 'none', 'mean', 'sum')
140 | reduction = (
141 | reduction_override if reduction_override else self.reduction)
142 | if weight is not None and weight.dim() > 1:
143 | weight = weight.mean(-1)
144 | loss = self.loss_weight * self.loss(
145 | pred,
146 | target,
147 | weight,
148 | reduction=reduction,
149 | avg_factor=avg_factor,
150 | **kwargs)
151 |
152 | return loss
--------------------------------------------------------------------------------
/unidet3d/rscan_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | import numpy as np
3 | from mmdet3d.datasets import Det3DDataset
4 | from mmdet3d.registry import DATASETS
5 | from mmdet3d.structures import DepthInstance3DBoxes
6 | import os.path as osp
7 | from mmengine.logging import print_log
8 | import logging
9 | import numpy as np
10 |
11 | @DATASETS.register_module()
12 | class RScan(Det3DDataset):
13 | """RScan dataset.
14 |
15 | Args:
16 | data_prefix (dict): Prefix for data. Defaults to
17 | dict(pts='points', pts_instance_mask='instance_mask',
18 | pts_semantic_mask='semantic_mask').
19 | box_type_3d (str): Type of 3D box of this dataset.
20 | Based on the `box_type_3d`, the dataset will encapsulate the box
21 | to its original format then converted them to `box_type_3d`.
22 | Defaults to 'Depth'.
23 | """
24 | METAINFO = {
25 | 'classes':
26 | ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture',
27 | 'counter', 'blinds', 'desk', 'shelves', 'curtain', 'dresser', 'pillow', 'mirror', 'floor mat', 'clothes',
28 | 'ceiling', 'books', 'fridge', 'television', 'paper', 'towel', 'shower curtain', 'box', 'whiteboard', 'person',
29 | 'night stand', 'toilet', 'sink', 'lamp', 'bathtub', 'bag', 'structure', 'furniture', 'prop')
30 | }
31 |
32 | def __init__(self,
33 | data_prefix=dict(
34 | pts='points',
35 | pts_instance_mask='instance_mask',
36 | pts_semantic_mask='semantic_mask'),
37 | box_type_3d='Depth',
38 | **kwargs):
39 | super().__init__(
40 | data_prefix=data_prefix, box_type_3d=box_type_3d, **kwargs)
41 |
42 | def parse_ann_info(self, info):
43 | """Process the `instances` in data info to `ann_info`.
44 |
45 | Args:
46 | info (dict): Info dict.
47 |
48 | Returns:
49 | dict: Processed `ann_info`
50 | """
51 | ann_info = super().parse_ann_info(info)
52 | if ann_info is None:
53 | ann_info = dict()
54 | ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
55 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
56 |
57 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
58 | ann_info['gt_bboxes_3d'],
59 | origin=(0.5, 0.5, 0.5), box_dim=6,
60 | with_yaw=False).convert_to(self.box_mode_3d)
61 |
62 | return ann_info
63 |
64 | @DATASETS.register_module()
65 | class ThreeRScan_(RScan):
66 | """3RScan dataset with partition.
67 |
68 | Args:
69 | partition(float): Defaults to 1, the part of
70 | the dataset that will be used.
71 | """
72 | METAINFO = {
73 | 'classes':
74 | ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
75 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
76 | 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'),
77 | 'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)
78 | }
79 | def __init__(self,
80 | partition: float = 1,
81 | **kwargs) -> None:
82 | self.partition = partition
83 | super().__init__(**kwargs)
84 |
85 | def parse_ann_info(self, info: dict) -> Union[dict, None]:
86 | """Process the `instances` in data info to `ann_info`.
87 |
88 | In `Custom3DDataset`, we simply concatenate all the field
89 | in `instances` to `np.ndarray`, you can do the specific
90 | process in subclass. You have to convert `gt_bboxes_3d`
91 | to different coordinates according to the task.
92 |
93 | Args:
94 | info (dict): Info dict.
95 |
96 | Returns:
97 | dict or None: Processed `ann_info`.
98 | """
99 | ids = {c: i for i, c in enumerate(self.metainfo['valid_class_ids'])}
100 | instances = []
101 | for instance in info['instances']:
102 | if instance['bbox_label_3d'] in ids:
103 | instance['bbox_label_3d'] = ids[instance['bbox_label_3d']]
104 | instances.append(instance)
105 | info['instances'] = instances
106 | return super().parse_ann_info(info)
107 |
108 | def parse_data_info(self, info: dict) -> dict:
109 | """Process the raw data info.
110 |
111 | Args:
112 | info (dict): Raw info dict.
113 |
114 | Returns:
115 | dict: Has `ann_info` in training stage. And
116 | all path has been converted to absolute path.
117 | """
118 | info['super_pts_path'] = osp.join(
119 | self.data_prefix.get('sp_pts_mask', ''),
120 | info['lidar_points']['lidar_path']) #info['super_pts_path']
121 |
122 | info = super().parse_data_info(info)
123 |
124 | return info
125 |
126 | def __getitem__(self, idx: int) -> dict:
127 | """Get the idx-th image and data information of dataset after
128 | ``self.pipeline``, and ``full_init`` will be called if the dataset has
129 | not been fully initialized.
130 |
131 | During training phase, if ``self.pipeline`` get ``None``,
132 | ``self._rand_another`` will be called until a valid image is fetched or
133 | the maximum limit of refetech is reached.
134 |
135 | Args:
136 | idx (int): The index of self.data_list.
137 |
138 | Returns:
139 | dict: The idx-th image and data information of dataset after
140 | ``self.pipeline``.
141 | """
142 | # Performing full initialization by calling `__getitem__` will consume
143 | # extra memory. If a dataset is not fully initialized by setting
144 | # `lazy_init=True` and then fed into the dataloader. Different workers
145 | # will simultaneously read and parse the annotation. It will cost more
146 | # time and memory, although this may work. Therefore, it is recommended
147 | # to manually call `full_init` before dataset fed into dataloader to
148 | # ensure all workers use shared RAM from master process.
149 |
150 | if not self.test_mode:
151 | if self.serialize_data:
152 | dataset_len = len(self.data_address)
153 | else:
154 | dataset_len = len(self.data_list)
155 | idx = np.random.randint(0, dataset_len)
156 |
157 | if not self._fully_initialized:
158 | print_log(
159 | 'Please call `full_init()` method manually to accelerate '
160 | 'the speed.',
161 | logger='current',
162 | level=logging.WARNING)
163 | self.full_init()
164 |
165 | if self.test_mode:
166 | data = self.prepare_data(idx)
167 | if data is None:
168 | raise Exception('Test time pipline should not get `None` '
169 | 'data_sample')
170 | return data
171 |
172 | for _ in range(self.max_refetch + 1):
173 | data = self.prepare_data(idx)
174 | # Broken images or random augmentations may cause the returned data
175 | # to be None
176 | if data is None:
177 | idx = self._rand_another()
178 | continue
179 | return data
180 |
181 | def __len__(self) -> int:
182 | """Get the length of filtered dataset and automatically call
183 | ``full_init`` if the dataset has not been fully init.
184 |
185 | Returns:
186 | int: The length of filtered dataset.
187 | """
188 |
189 | if self.serialize_data:
190 | dataset_len = len(self.data_address)
191 | else:
192 | dataset_len = len(self.data_list)
193 | if not self.test_mode:
194 | return int(self.partition * dataset_len)
195 | else:
196 | return dataset_len
197 |
--------------------------------------------------------------------------------
/unidet3d/s3dis_dataset.py:
--------------------------------------------------------------------------------
1 | from mmdet3d.registry import DATASETS
2 | from mmdet3d.datasets.s3dis_dataset import S3DISDataset
3 | import os.path as osp
4 | from mmengine.logging import print_log
5 | import logging
6 | import numpy as np
7 |
8 | @DATASETS.register_module()
9 | class S3DISSegDetDataset(S3DISDataset):
10 | """S3DISSegDetDataset dataset.
11 |
12 | Args:
13 | partition(float): Defaults to 1, the part of
14 | the dataset that will be used.
15 | """
16 | def __init__(self,
17 | partition: float = 1,
18 | **kwargs) -> None:
19 | self.partition = partition
20 | super().__init__(**kwargs)
21 |
22 | def parse_data_info(self, info: dict) -> dict:
23 | """Process the raw data info.
24 |
25 | Args:
26 | info (dict): Raw info dict.
27 |
28 | Returns:
29 | dict: Has `ann_info` in training stage. And
30 | all path has been converted to absolute path.
31 | """
32 | info['super_pts_path'] = osp.join(
33 | self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path'])
34 |
35 | info = super().parse_data_info(info)
36 |
37 | return info
38 |
39 | def __getitem__(self, idx: int) -> dict:
40 | """Get the idx-th image and data information of dataset after
41 | ``self.pipeline``, and ``full_init`` will be called if the dataset has
42 | not been fully initialized.
43 |
44 | During training phase, if ``self.pipeline`` get ``None``,
45 | ``self._rand_another`` will be called until a valid image is fetched or
46 | the maximum limit of refetech is reached.
47 |
48 | Args:
49 | idx (int): The index of self.data_list.
50 |
51 | Returns:
52 | dict: The idx-th image and data information of dataset after
53 | ``self.pipeline``.
54 | """
55 | # Performing full initialization by calling `__getitem__` will consume
56 | # extra memory. If a dataset is not fully initialized by setting
57 | # `lazy_init=True` and then fed into the dataloader. Different workers
58 | # will simultaneously read and parse the annotation. It will cost more
59 | # time and memory, although this may work. Therefore, it is recommended
60 | # to manually call `full_init` before dataset fed into dataloader to
61 | # ensure all workers use shared RAM from master process.
62 |
63 | if not self.test_mode:
64 | if self.serialize_data:
65 | dataset_len = len(self.data_address)
66 | else:
67 | dataset_len = len(self.data_list)
68 | idx = np.random.randint(0, dataset_len)
69 | if not self._fully_initialized:
70 | print_log(
71 | 'Please call `full_init()` method manually to accelerate '
72 | 'the speed.',
73 | logger='current',
74 | level=logging.WARNING)
75 | self.full_init()
76 |
77 | if self.test_mode:
78 | data = self.prepare_data(idx)
79 | if data is None:
80 | raise Exception('Test time pipline should not get `None` '
81 | 'data_sample')
82 | return data
83 |
84 | for _ in range(self.max_refetch + 1):
85 | data = self.prepare_data(idx)
86 | # Broken images or random augmentations may cause the returned data
87 | # to be None
88 | if data is None:
89 | idx = self._rand_another()
90 | continue
91 | return data
92 |
93 | def __len__(self) -> int:
94 | """Get the length of filtered dataset and automatically call
95 | ``full_init`` if the dataset has not been fully init.
96 |
97 | Returns:
98 | int: The length of filtered dataset.
99 | """
100 |
101 | if self.serialize_data:
102 | dataset_len = len(self.data_address)
103 | else:
104 | dataset_len = len(self.data_list)
105 | if not self.test_mode:
106 | return int(self.partition * dataset_len)
107 | else:
108 | return dataset_len
109 |
--------------------------------------------------------------------------------
/unidet3d/scannet_dataset.py:
--------------------------------------------------------------------------------
1 | from os import path as osp
2 | import numpy as np
3 | import warnings
4 |
5 | from mmdet3d.datasets.scannet_dataset import ScanNetSegDataset
6 | from mmdet3d.structures import DepthInstance3DBoxes
7 | from mmdet3d.registry import DATASETS
8 |
9 |
10 | @DATASETS.register_module()
11 | class ScanNetSegDataset_(ScanNetSegDataset):
12 | """We just add super_pts_path."""
13 |
14 | def get_scene_idxs(self, *args, **kwargs):
15 | """Compute scene_idxs for data sampling."""
16 | return np.arange(len(self)).astype(np.int32)
17 |
18 | def parse_data_info(self, info: dict) -> dict:
19 | """Process the raw data info.
20 |
21 | Args:
22 | info (dict): Raw info dict.
23 |
24 | Returns:
25 | dict: Has `ann_info` in training stage. And
26 | all path has been converted to absolute path.
27 | """
28 | info['super_pts_path'] = osp.join(
29 | self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path'])
30 |
31 | info = super().parse_data_info(info)
32 |
33 | return info
34 |
35 | @DATASETS.register_module()
36 | class ScanNetDetDataset(ScanNetSegDataset_):
37 | """Dataset with loading gt_bboxes_3d, gt_labels_3d and
38 | axis-align matrix for evaluating SPFormer/OneFormer with
39 | IndoorMetric. We just copy some functions from Det3DDataset
40 | and comment some lines in them.
41 | """
42 | @staticmethod
43 | def _get_axis_align_matrix(info: dict) -> np.ndarray:
44 | """Get axis_align_matrix from info. If not exist, return identity mat.
45 |
46 | Args:
47 | info (dict): Info of a single sample data.
48 |
49 | Returns:
50 | np.ndarray: 4x4 transformation matrix.
51 | """
52 | if 'axis_align_matrix' in info:
53 | return np.array(info['axis_align_matrix'])
54 | else:
55 | warnings.warn(
56 | 'axis_align_matrix is not found in ScanNet data info, please '
57 | 'use new pre-process scripts to re-generate ScanNet data')
58 | return np.eye(4).astype(np.float32)
59 |
60 | def parse_data_info(self, info: dict) -> dict:
61 | """Process the raw data info.
62 |
63 | The only difference with it in `Det3DDataset`
64 | is the specific process for `axis_align_matrix'.
65 |
66 | Args:
67 | info (dict): Raw info dict.
68 |
69 | Returns:
70 | dict: Has `ann_info` in training stage. And
71 | all path has been converted to absolute path.
72 | """
73 |
74 | info['axis_align_matrix'] = self._get_axis_align_matrix(info)
75 | # info['super_pts_path'] = osp.join(
76 | # self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path'])
77 |
78 | info = super().parse_data_info(info)
79 |
80 | if not self.test_mode:
81 | # used in training
82 | info['ann_info'] = self.parse_ann_info(info)
83 | if self.test_mode and self.load_eval_anns:
84 | info['eval_ann_info'] = self.parse_ann_info(info)
85 |
86 | return info
87 |
88 | def _det3d_parse_ann_info(self, info):
89 | """Process the `instances` in data info to `ann_info`.
90 |
91 | In `Custom3DDataset`, we simply concatenate all the field
92 | in `instances` to `np.ndarray`, you can do the specific
93 | process in subclass. You have to convert `gt_bboxes_3d`
94 | to different coordinates according to the task.
95 |
96 | Args:
97 | info (dict): Info dict.
98 |
99 | Returns:
100 | dict or None: Processed `ann_info`.
101 | """
102 | # add s or gt prefix for most keys after concat
103 | # we only process 3d annotations here, the corresponding
104 | # 2d annotation process is in the `LoadAnnotations3D`
105 | # in `transforms`
106 | name_mapping = {
107 | 'bbox_label_3d': 'gt_labels_3d',
108 | 'bbox_label': 'gt_bboxes_labels',
109 | 'bbox': 'gt_bboxes',
110 | 'bbox_3d': 'gt_bboxes_3d',
111 | 'depth': 'depths',
112 | 'center_2d': 'centers_2d',
113 | 'attr_label': 'attr_labels',
114 | 'velocity': 'velocities',
115 | }
116 | instances = info['instances']
117 | # empty gt
118 | if len(instances) == 0:
119 | return None
120 | else:
121 | keys = list(instances[0].keys())
122 | ann_info = dict()
123 | for ann_name in keys:
124 | temp_anns = [item[ann_name] for item in instances]
125 | # map the original dataset label to training label
126 | # if 'label' in ann_name and ann_name != 'attr_label':
127 | # temp_anns = [
128 | # self.label_mapping[item] for item in temp_anns
129 | # ]
130 | if ann_name in name_mapping:
131 | mapped_ann_name = name_mapping[ann_name]
132 | else:
133 | mapped_ann_name = ann_name
134 |
135 | if 'label' in ann_name:
136 | temp_anns = np.array(temp_anns).astype(np.int64)
137 | elif ann_name in name_mapping:
138 | temp_anns = np.array(temp_anns).astype(np.float32)
139 | else:
140 | temp_anns = np.array(temp_anns)
141 |
142 | ann_info[mapped_ann_name] = temp_anns
143 | ann_info['instances'] = info['instances']
144 |
145 | # for label in ann_info['gt_labels_3d']:
146 | # if label != -1:
147 | # cat_name = self.metainfo['classes'][label]
148 | # self.num_ins_per_cat[cat_name] += 1
149 |
150 | return ann_info
151 |
152 | def parse_ann_info(self, info: dict) -> dict:
153 | """Process the `instances` in data info to `ann_info`.
154 |
155 | Args:
156 | info (dict): Info dict.
157 |
158 | Returns:
159 | dict: Processed `ann_info`.
160 | """
161 | ann_info = self._det3d_parse_ann_info(info)
162 | # empty gt
163 | if ann_info is None:
164 | ann_info = dict()
165 | ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
166 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
167 | # to target box structure
168 |
169 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
170 | ann_info['gt_bboxes_3d'],
171 | box_dim=ann_info['gt_bboxes_3d'].shape[-1],
172 | with_yaw=False,
173 | origin=(0.5, 0.5, 0.5)) # .convert_to(self.box_mode_3d)
174 |
175 | return ann_info
--------------------------------------------------------------------------------
/unidet3d/structures.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Sized
2 | from mmengine.structures import InstanceData
3 |
4 |
5 | class InstanceData_(InstanceData):
6 | """We only remove a single assert from __setattr__."""
7 |
8 | def __setattr__(self, name: str, value: Sized):
9 | """setattr is only used to set data.
10 |
11 | The value must have the attribute of `__len__` and have the same length
12 | of `InstanceData`.
13 | """
14 | if name in ('_metainfo_fields', '_data_fields'):
15 | if not hasattr(self, name):
16 | super(InstanceData, self).__setattr__(name, value)
17 | else:
18 | raise AttributeError(f'{name} has been used as a '
19 | 'private attribute, which is immutable.')
20 |
21 | else:
22 | assert isinstance(value,
23 | Sized), 'value must contain `__len__` attribute'
24 |
25 | super(InstanceData, self).__setattr__(name, value)
26 |
--------------------------------------------------------------------------------