├── .gitignore ├── LICENSE ├── README.md ├── assets └── omni6dpose.png ├── common ├── CHANGELOG.rst ├── LICENSE.rst ├── README.rst ├── __init__.py ├── align.py ├── bbox.py ├── build │ └── lib │ │ └── cutoop │ │ ├── __init__.py │ │ ├── align.py │ │ ├── bbox.py │ │ ├── data_loader.py │ │ ├── data_types.py │ │ ├── eval_utils.py │ │ ├── image_meta.py │ │ ├── iou │ │ ├── __init__.py │ │ ├── box.py │ │ └── iou.py │ │ ├── log.py │ │ ├── obj_meta.py │ │ ├── rotation.py │ │ ├── transform.py │ │ └── utils.py ├── cutoop.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt ├── data_loader.py ├── data_types.py ├── dist │ ├── cutoop-0.1.0-py3.10.egg │ └── cutoop-0.1.0-py3.8.egg ├── eval_utils.py ├── image_meta.py ├── iou │ ├── __init__.py │ ├── box.py │ └── iou.py ├── log.py ├── obj_meta.py ├── rotation.py ├── setup.cfg ├── setup.py ├── transform.py └── utils.py ├── data_links.json └── download.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | configs 2 | data -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Jiyao Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Omni6DPose: A Benchmark and Model for Universal 6D Object Pose Estimation and Tracking 2 | 3 | [![Website](https://img.shields.io/badge/Website-orange.svg )](https://jiyao06.github.io/Omni6DPose/) 4 | [![Arxiv](https://img.shields.io/badge/Arxiv-green.svg )](https://arxiv.org/pdf/2406.04316.pdf) 5 | [![Dataset](https://img.shields.io/badge/Dataset-v1.0.0-blue.svg)](https://jiyao06.github.io/Omni6DPose/download/) 6 | [![API](https://img.shields.io/badge/API-v0.1.0-blue.svg)](https://github.com/Omni6DPose/Omni6DPoseAPI/) 7 | [![Documentation](https://img.shields.io/badge/Documentation-v0.1.0-blue.svg)](https://jiyao06.github.io/Omni6DPose/cutoop/) 8 | [![GitHub license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/Omni6DPose/Omni6DPoseAPI/blob/main/LICENSE) 9 | [![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FJiyao06%2FOmni6DPose&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false)](https://hits.seeyoufarm.com) 10 | 11 | 12 | The official repository of Omni6DPose API, as presented in 13 | [Omni6DPose](https://arxiv.org/pdf/2406.04316). (ECCV 2024) 14 | 15 | ## 📗 Overview 16 | 17 | ![Pipeline](./assets/omni6dpose.png) 18 | We introduce **Omni6DPose**, a substantial dataset featured by its **diversity in object categories**, **large scale**, and **variety in object materials**. 19 | 20 | 21 | ## ✨ News 22 | * **2024.08.10**: **[GenPose++](https://github.com/Omni6DPose/GenPose2)** is released! 🎉 23 | * **2024.08.01**: **Omni6DPose** dataset and API are released! 🎉 24 | * **2024.07.01**: **Omni6DPose** has been accepted by ECCV2024! 🎉 25 | 26 | 27 | ## 📆 TODO 28 | - [x] Release the Omni6DPose dataset. 29 | - [x] Release the Omni6DPose API. 30 | - [x] Release the GenPose++ and pretrained models. 31 | - [ ] Release a convenient version of GenPose++ with SAM for the downstream tasks. 32 | 33 | 34 | ## 🗂️ Omni6DPose Dataset 35 | 36 | ### Dataset Overview 37 | The Omni6DPose dataset is available for download at [Omni6DPose](https://www.dropbox.com/scl/fo/ixmai3d7uf4mzp3le8sz3/ALRxBZUhhaAs11xH56rJXnM?rlkey=sn7kyuart2i8ujeu1vygz4wcy&st=8yy78r6z&dl=0). The dataset is organized into four parts: 38 | - **ROPE:** the real dataset for evaluation. 39 | - **SOPE:** the simulated dataset for training. 40 | - **PAM:** the pose aligned 3D models used in both ROPE and SOPE. 41 | - **Meta:** the meta information of the objects in PAM. 42 | 43 | ### Dataset Structure 44 | Omni6DPose provides a large-scale dataset with comprehensive data modalities and accurate annotations. The dataset is organized as follows. The files marked as [Optional] may not be necessary for some methods or tasks, but they are included to support a wide range of research possibilities. The detailed file descriptions can be found in the [project website](https://jiyao06.github.io/Omni6DPose/download). 45 | ``` 46 | Omni6DPose 47 | ├── ROPE 48 | │ ├── SCENE_ID 49 | │ │ ├── FRAME_ID_meta.json 50 | │ │ ├── FRAME_ID_color.png 51 | │ │ ├── FRAME_ID_mask.exr 52 | │ │ ├── FRAME_ID_depth.exr 53 | │ │ ├── FRAME_ID_mask_sam.npz [Optional] 54 | │ │ └── ... 55 | │ └── ... 56 | ├── SOPE 57 | │ ├── PATCH_ID 58 | │ │ ├── train 59 | │ │ │ ├── SCENE_NAME 60 | │ │ │ | ├── SCENE_ID 61 | │ | | | | ├── FRAME_ID_meta.json 62 | │ │ │ | | ├── FRAME_ID_color.png 63 | │ │ │ | | ├── FRAME_ID_mask.exr 64 | │ | | | | ├── FRAME_ID_depth.exr 65 | │ │ │ | | ├── FRAME_ID_depth_1.exr [Optional] 66 | │ │ │ | | ├── FRAME_ID_coord.png [Optional] 67 | │ │ │ | | ├── FRAME_ID_ir_l.png [Optional] 68 | │ │ │ | | ├── FRAME_ID_ir_r.png [Optional] 69 | │ │ │ | | └── ... 70 | │ │ │ | └── ... 71 | │ │ │ └── ... 72 | │ │ └── test 73 | │ └── ... 74 | ├── PAM 75 | │ └── obj_meshes 76 | │ ├── DATASET-CLASS_ID 77 | │ └── ... 78 | └── Meta 79 | ├── obj_meta.json 80 | └── real_obj_meta.json 81 | ``` 82 | 83 | 84 | ### Download 85 | Due to the large size of the dataset and the varying requirements of different tasks and methods, we have divided the dataset into multiple parts, each compressed separately. This allows users to download only the necessary parts of the dataset according to their needs. Please follow the instructions within the [download.ipynb](download.ipynb) to download and decompress the necessary dataset parts. 86 | 87 | **Note:** The meta file `Meta/obj_meta.json` is corresponding to the `SOPE` dataset, and the meta file `Meta/real_obj_meta.json` is corresponding to the `ROPE` dataset. When you using the dataset and the fllowing API, please make sure to use the correct meta file. 88 | 89 | 90 | ## 🔨 Omni6DPose API 91 | We provide Omni6DPose API `cutoop` for visualization and evaluation, which provides a convenient way to load the dataset and evaluate and visualize the results. The API is designed to be user-friendly and easy to use. Please refer to the [Omni6DPose API](https://jiyao06.github.io/Omni6DPose/cutoop/) for more details. 92 | 93 | ### Installation 94 | To enable EXR image reading by OpenCV, you need to install OpenEXR. On ubuntu, you can install it using the following command: 95 | ```bash 96 | sudo apt-get install openexr 97 | ``` 98 | 99 | Then, the API can be installed using the following two ways: 100 | - **Install from PyPI:** 101 | ```bash 102 | pip install cutoop 103 | ``` 104 | - **Install from source:** 105 | ```bash 106 | cd common 107 | python setup.py install 108 | ``` 109 | 110 | ### Usage 111 | Please refer to the [documentation](https://jiyao06.github.io/Omni6DPose/cutoop/) for more details. 112 | 113 | 114 | ## 🎯 Baseline 115 | We provide the official pytorch implementation of GenPose++ [here](https://github.com/Omni6DPose/GenPose2). 116 | 117 | ## 📮 Contact 118 | If you have any questions, please feel free to contact us: 119 | 120 | [Jiyao Zhang](https://jiyao06.github.io/): [jiyaozhang@stu.pku.edu.cn](mailto:jiyaozhang@stu.pku.edu.cn) 121 | 122 | [Weiyao Huang](https://github.com/sshwy): [sshwy@stu.pku.edu.cn](mailto:sshwy@stu.pku.edu.cn) 123 | 124 | [Bo Peng](https://github.com/p-b-p-b): [bo.peng@stu.pku.edu.cn](mailto:bo.peng@stu.pku.edu.cn) 125 | 126 | [Hao Dong](https://zsdonghao.github.io/): [hao.dong@pku.edu.cn](mailto:hao.dong@pku.edu.cn) 127 | 128 | ## 📝 License 129 | This project is released under the MIT license. See [LICENSE](LICENSE) for additional details. 130 | -------------------------------------------------------------------------------- /assets/omni6dpose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Omni6DPose/Omni6DPoseAPI/c1ed906eed2345259817b85bc9d852e302c4a730/assets/omni6dpose.png -------------------------------------------------------------------------------- /common/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | v0.1.0 2 | ====== 3 | 4 | - Initial release 5 | -------------------------------------------------------------------------------- /common/LICENSE.rst: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Weiyao Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /common/README.rst: -------------------------------------------------------------------------------- 1 | Common UTilities for Omni6DPose 2 | =============================== 3 | 4 | **cutoop** (pronounced ``/'kʌtup/``) is the official package containing convenient 5 | utilities for data preparation and model evaluation over the Omni6DPose 6 | dataset. 7 | 8 | Visit `Omni6DPose `_ for details. 9 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common UTilities for Omni6DPose 3 | =================================== 4 | 5 | **cutoop** (pronounced ``/'kʌtup/``) is the official package containing convenient 6 | utilities for data preparation and model evaluation over the Omni6DPose 7 | dataset. 8 | 9 | Installation & Prerequisites 10 | ---------------------------- 11 | 12 | .. highlight:: shell 13 | 14 | To enable EXR image reading by OpenCV, you need to install OpenEXR. On Ubuntu you have:: 15 | 16 | sudo apt-get install openexr 17 | 18 | Install via pip 19 | ^^^^^^^^^^^^^^^ 20 | 21 | Now you can install this package directly via pip:: 22 | 23 | pip install cutoop 24 | 25 | Install from source 26 | ^^^^^^^^^^^^^^^^^^^ 27 | 28 | After cloning this repository, you may run ``pip install .`` under `common` folder 29 | (note that the installed package is still named ``cutoop``). 30 | 31 | Evaluation Framework 32 | -------------------- 33 | 34 | .. highlight:: python 35 | 36 | Here comes a simple framework of model inference and evaulation. 37 | Notable APIs used in this simple example are listed below for referencing: 38 | 39 | - :class:`.data_loader.Dataset` 40 | - :meth:`.obj_meta.ObjectMetaData.load_json` 41 | - :class:`.eval_utils.GroundTruth` 42 | - :class:`.eval_utils.DetectMatch` 43 | - :meth:`.eval_utils.Metrics.dump_json` 44 | 45 | 46 | Firstly, after fetching data prefix list using :meth:`.data_loader.Dataset.glob_prefix`, 47 | you may want to filter the object annotations in some way: 48 | 49 | .. testcode:: 50 | 51 | import cutoop 52 | import numpy as np 53 | from cutoop.data_loader import Dataset 54 | from cutoop.eval_utils import GroundTruth, DetectMatch, DetectOutput 55 | from dataclasses import asdict 56 | 57 | # Object metadata of SOPE dataset 58 | objmeta = cutoop.obj_meta.ObjectMetaData.load_json( 59 | "../../configs/obj_meta.json" 60 | ) 61 | 62 | # take this prefix for example 63 | prefix = "../../misc/sample/0000_" 64 | 65 | # load RGB color image (in RGB order) 66 | image = Dataset.load_color(prefix + "color.png") 67 | # load object metadata 68 | meta = Dataset.load_meta(prefix + "meta.json") 69 | # load object mask (0 and 255 are both backgrounds) 70 | mask = Dataset.load_mask(prefix + "mask.exr") 71 | # load depth image. 0 and very large values are considered invisible 72 | depth = Dataset.load_depth(prefix + "depth.exr") 73 | depth[depth > 1e5] = 0 74 | 75 | # retain objects with at least 32 pixels visible in a reasonable distance 76 | occurs, occur_count = np.unique(np.where(depth == 0, 0, mask), return_counts=True) 77 | occurs = occurs[occur_count > 32] 78 | objects = [obj for obj in meta.objects if obj.is_valid and obj.mask_id in occurs] 79 | 80 | After that we can prepare a :class:`.eval_utils.GroundTruth` before the inference 81 | process: 82 | 83 | .. testcode:: 84 | 85 | gts = [] 86 | for obj in objects: 87 | objinfo = objmeta.instance_dict[obj.meta.oid] 88 | gt = GroundTruth( 89 | # object pose relative to camera 90 | gt_affine=obj.pose().to_affine()[None, ...], 91 | # object size under camera space 92 | gt_size=( 93 | np.array(obj.meta.scale) * np.array(objinfo.dimensions) 94 | )[None, ...], 95 | gt_sym_labels=[objinfo.tag.symmetry], 96 | gt_class_labels=np.array([objinfo.class_label]), 97 | 98 | # these are optional 99 | image_path=prefix + "color.png", 100 | camera_intrinsics=meta.camera.intrinsics, 101 | ) 102 | gts.append(gt) 103 | 104 | # Concatenate multiple arrays into one. 105 | gt = GroundTruth.concat(gts) 106 | 107 | Inference with ground truth detection 108 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 109 | 110 | Now you may start the inference process, with filtered objects as inputs. 111 | Assume we have a model that can predict correct rotation and translation, 112 | but incorrect scales. 113 | 114 | .. testcode:: 115 | 116 | np.random.seed(0) 117 | pred_affine = gt.gt_affine 118 | pred_size = gt.gt_size * (0.8 + np.random.rand(*gt.gt_size.shape) * 0.4) 119 | 120 | # Now we can collect the prediction results. 121 | # Apart from constructing DetectMatch from GroundTruth, you can also construct 122 | # it directly by filling its members. 123 | result = DetectMatch.from_gt(gt, pred_affine=pred_affine, pred_size=pred_size) 124 | 125 | # Calibrate the rotation of pose prediction according to gt symmetry. 126 | result = result.calibrate_rotation() 127 | 128 | # We can visualize the prediction, which outputs to ``./result.png`` as default. 129 | result.draw_image(path='source/_static/gr_0.png') 130 | 131 | # Calculate metrics. 132 | metrics = result.metrics() 133 | print_json(asdict(metrics.class_means)) 134 | 135 | .. testoutput:: 136 | 137 | { 138 | "iou_mean": 0.75514, 139 | "iou_acc": [ 140 | 1.0, 141 | 1.0, 142 | 0.42857 143 | ], 144 | "iou_auc": [ 145 | { 146 | "auc": 0.66429, 147 | "xs": null, 148 | "ys": null 149 | }, 150 | { 151 | "auc": 0.51071, 152 | "xs": null, 153 | "ys": null 154 | }, 155 | { 156 | "auc": 0.17857, 157 | "xs": null, 158 | "ys": null 159 | } 160 | ], 161 | "iou_ap": null, 162 | "deg_mean": 0.00372, 163 | "sht_mean": 0.0, 164 | "pose_acc": [ 165 | 1.0, 166 | 1.0, 167 | 1.0, 168 | 1.0 169 | ], 170 | "deg_auc": { 171 | "auc": 0.99943, 172 | "xs": null, 173 | "ys": null 174 | }, 175 | "sht_auc": { 176 | "auc": 1.0, 177 | "xs": null, 178 | "ys": null 179 | }, 180 | "pose_auc": [ 181 | { 182 | "auc": 1.0, 183 | "xs": null, 184 | "ys": null 185 | }, 186 | { 187 | "auc": 1.0, 188 | "xs": null, 189 | "ys": null 190 | }, 191 | { 192 | "auc": 1.0, 193 | "xs": null, 194 | "ys": null 195 | }, 196 | { 197 | "auc": 1.0, 198 | "xs": null, 199 | "ys": null 200 | } 201 | ], 202 | "pose_ap": null 203 | } 204 | 205 | .. image:: _static/gr_0.png 206 | :width: 100% 207 | :align: center 208 | 209 | Inference with detection outputs 210 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 211 | 212 | Assume we have a detection model that can detect 5 of the objects 213 | (after matching). After object detection and matching, before post estimation, 214 | you may prepare a :class:`.eval_utils.DetectOutput`. Is ok not to do so, then 215 | you'll have to compose a :class:`.eval_utils.DetectMatch` after pose estimation 216 | as a whole. 217 | 218 | .. testcode:: 219 | 220 | from collections import Counter 221 | np.random.seed(0) 222 | detect_gt = GroundTruth.concat(gts[:5]) 223 | detection = DetectOutput( 224 | gt_affine=detect_gt.gt_affine, 225 | gt_size=detect_gt.gt_size, 226 | gt_sym_labels=detect_gt.gt_sym_labels, 227 | gt_class_labels=detect_gt.gt_class_labels, 228 | image_path=detect_gt.image_path, 229 | camera_intrinsics=detect_gt.camera_intrinsics, 230 | detect_scores=np.random.rand(len(detect_gt)), 231 | gt_n_objects=Counter(gt.gt_class_labels), # pass an iterable to Counter 232 | ) 233 | print("total objects:", len(gt)) 234 | print("detected objects:", len(detection)) 235 | 236 | .. testoutput:: 237 | 238 | total objects: 9 239 | detected objects: 5 240 | 241 | Then we make prediction upon detected objects: 242 | 243 | .. testcode:: 244 | 245 | pred_affine = detection.gt_affine 246 | pred_size = detection.gt_size * ( 247 | 0.8 + np.random.rand(*detection.gt_size.shape) * 0.4 248 | ) 249 | 250 | # Now we can collect the prediction results and calibrate the rotation. 251 | result = DetectMatch.from_detection( 252 | detection, pred_affine=pred_affine, pred_size=pred_size 253 | ) 254 | result = result.calibrate_rotation() 255 | 256 | The bottleneck of metrics computation is 3D bounding box IoU. Here we introduce a 257 | trick: you may cache the result of criterion and then pass it to the metrics method if 258 | you somehow need to compute metrics for different settings multiple times. 259 | 260 | BTW, :meth:`.eval_utils.Metrics.dump_json` can be used to save metrics to JSON 261 | file. 262 | 263 | .. testcode:: 264 | 265 | criterion = result.criterion() # you may use pickle to cache it 266 | 267 | metrics = result.metrics( 268 | iou_thresholds=[0.25, 0.50, 0.75], 269 | pose_thresholds=[(5, 2), (5, 5), (10, 2), (10, 5)], 270 | criterion=criterion 271 | ) 272 | print_json(asdict(metrics.class_means)) 273 | 274 | .. testoutput:: 275 | 276 | { 277 | "iou_mean": 0.72034, 278 | "iou_acc": [ 279 | 1.0, 280 | 1.0, 281 | 0.4 282 | ], 283 | "iou_auc": [ 284 | { 285 | "auc": 0.62, 286 | "xs": null, 287 | "ys": null 288 | }, 289 | { 290 | "auc": 0.44, 291 | "xs": null, 292 | "ys": null 293 | }, 294 | { 295 | "auc": 0.098, 296 | "xs": null, 297 | "ys": null 298 | } 299 | ], 300 | "iou_ap": [ 301 | 0.9, 302 | 0.9, 303 | 0.4 304 | ], 305 | "deg_mean": 0.00303, 306 | "sht_mean": 0.0, 307 | "pose_acc": [ 308 | 1.0, 309 | 1.0, 310 | 1.0, 311 | 1.0 312 | ], 313 | "deg_auc": { 314 | "auc": 0.9996, 315 | "xs": null, 316 | "ys": null 317 | }, 318 | "sht_auc": { 319 | "auc": 1.0, 320 | "xs": null, 321 | "ys": null 322 | }, 323 | "pose_auc": [ 324 | { 325 | "auc": 1.0, 326 | "xs": null, 327 | "ys": null 328 | }, 329 | { 330 | "auc": 1.0, 331 | "xs": null, 332 | "ys": null 333 | }, 334 | { 335 | "auc": 1.0, 336 | "xs": null, 337 | "ys": null 338 | }, 339 | { 340 | "auc": 1.0, 341 | "xs": null, 342 | "ys": null 343 | } 344 | ], 345 | "pose_ap": [ 346 | 0.9, 347 | 0.9, 348 | 0.9, 349 | 0.9 350 | ] 351 | } 352 | 353 | """ 354 | 355 | __version__ = "0.1.0" 356 | 357 | from . import ( 358 | align, 359 | data_loader, 360 | data_types, 361 | eval_utils, 362 | image_meta, 363 | log, 364 | obj_meta, 365 | rotation, 366 | transform, 367 | utils, 368 | ) 369 | -------------------------------------------------------------------------------- /common/align.py: -------------------------------------------------------------------------------- 1 | """ 2 | Point Cloud Alignment 3 | ===================== 4 | """ 5 | 6 | import numpy as np 7 | from .bbox import create_3d_bbox_pc 8 | 9 | 10 | def estimateSimilarityUmeyama(SourceHom, TargetHom): 11 | """ 12 | Compute `OutTransform` s.t. `OutTransform` @ `SourceHom` approx. `TargetHom` 13 | Copy of original paper is at: `umeyama `_ 14 | 15 | :param SourceHom: (4, N) 16 | :param TargetHom: (4, N) 17 | 18 | :return: scalar Scale, (3, 3) Rotation, (3,) Translation, (4, 4) OutTransform 19 | """ 20 | SourceCentroid = np.mean(SourceHom[:3, :], axis=1) 21 | TargetCentroid = np.mean(TargetHom[:3, :], axis=1) 22 | nPoints = SourceHom.shape[1] 23 | CenteredSource = ( 24 | SourceHom[:3, :] - np.tile(SourceCentroid, (nPoints, 1)).transpose() 25 | ) 26 | CenteredTarget = ( 27 | TargetHom[:3, :] - np.tile(TargetCentroid, (nPoints, 1)).transpose() 28 | ) 29 | CovMatrix = np.matmul(CenteredTarget, np.transpose(CenteredSource)) / nPoints 30 | if np.isnan(CovMatrix).any(): 31 | print("nPoints:", nPoints) 32 | print(SourceHom.shape) 33 | print(TargetHom.shape) 34 | raise RuntimeError("There are NANs in the input.") 35 | 36 | U, D, Vh = np.linalg.svd(CovMatrix, full_matrices=True) 37 | d = (np.linalg.det(U) * np.linalg.det(Vh)) < 0.0 38 | if d: 39 | D[-1] = -D[-1] 40 | U[:, -1] = -U[:, -1] 41 | # rotation 42 | Rotation = np.matmul(U, Vh) 43 | # scale 44 | varP = np.var(SourceHom[:3, :], axis=1).sum() 45 | Scale = 1 / varP * np.sum(D) 46 | # translation 47 | Translation = TargetHom[:3, :].mean(axis=1) - SourceHom[:3, :].mean(axis=1).dot( 48 | Scale * Rotation.T 49 | ) 50 | # transformation matrix 51 | OutTransform = np.identity(4) 52 | OutTransform[:3, :3] = Scale * Rotation 53 | OutTransform[:3, 3] = Translation 54 | 55 | return Scale, Rotation, Translation, OutTransform 56 | 57 | 58 | def estimateSimilarityTransform( 59 | source_Nx3: np.ndarray, target_Nx3: np.ndarray, verbose=False 60 | ): 61 | """Compute an affine `OutTransform` from `source_Nx3` to `target_Nx3`, 62 | Add RANSAC algorithm to account for outliers. 63 | 64 | Copying from `object-deformnet `_ 65 | 66 | :returns: scalar Scale, (3, 3) Rotation, (3,) Translation, (4, 4) OutTransform 67 | """ 68 | assert ( 69 | source_Nx3.shape[0] == target_Nx3.shape[0] 70 | ), "Source and Target must have same number of points." 71 | SourceHom = np.transpose(np.hstack([source_Nx3, np.ones([source_Nx3.shape[0], 1])])) 72 | TargetHom = np.transpose(np.hstack([target_Nx3, np.ones([target_Nx3.shape[0], 1])])) 73 | # Auto-parameter selection based on source heuristics 74 | # Assume source is object model or gt nocs map, which is of high quality 75 | SourceCentroid = np.mean(SourceHom[:3, :], axis=1) 76 | nPoints = SourceHom.shape[1] 77 | CenteredSource = ( 78 | SourceHom[:3, :] - np.tile(SourceCentroid, (nPoints, 1)).transpose() 79 | ) 80 | SourceDiameter = 2 * np.amax(np.linalg.norm(CenteredSource, axis=0)) 81 | InlierT = SourceDiameter / 10.0 # 0.1 of source diameter 82 | maxIter = 128 83 | confidence = 0.99 84 | 85 | if verbose: 86 | print("Inlier threshold: ", InlierT) 87 | print("Max number of iterations: ", maxIter) 88 | 89 | BestInlierRatio = 0 90 | BestInlierIdx = np.arange(nPoints) 91 | for i in range(0, maxIter): 92 | # Pick 5 random (but corresponding) points from source and target 93 | RandIdx = np.random.randint(nPoints, size=5) 94 | Scale, _, _, OutTransform = estimateSimilarityUmeyama( 95 | SourceHom[:, RandIdx], TargetHom[:, RandIdx] 96 | ) 97 | PassThreshold = Scale * InlierT # propagate inlier threshold to target scale 98 | Diff = TargetHom - np.matmul(OutTransform, SourceHom) 99 | ResidualVec = np.linalg.norm(Diff[:3, :], axis=0) 100 | InlierIdx = np.where(ResidualVec < PassThreshold)[0] 101 | nInliers = InlierIdx.shape[0] 102 | InlierRatio = nInliers / nPoints 103 | # update best hypothesis 104 | if InlierRatio > BestInlierRatio: 105 | BestInlierRatio = InlierRatio 106 | BestInlierIdx = InlierIdx 107 | if verbose: 108 | print("Iteration: ", i) 109 | print("Inlier ratio: ", BestInlierRatio) 110 | # early break 111 | if (1 - (1 - BestInlierRatio**5) ** i) > confidence: 112 | break 113 | 114 | if BestInlierRatio < 0.1: 115 | print("[ WARN ] - Something is wrong. Small BestInlierRatio: ", BestInlierRatio) 116 | return None, None, None, None 117 | 118 | SourceInliersHom = SourceHom[:, BestInlierIdx] 119 | TargetInliersHom = TargetHom[:, BestInlierIdx] 120 | Scale, Rotation, Translation, OutTransform = estimateSimilarityUmeyama( 121 | SourceInliersHom, TargetInliersHom 122 | ) 123 | 124 | if verbose: 125 | print("BestInlierRatio:", BestInlierRatio) 126 | print("Rotation:\n", Rotation) 127 | print("Translation:\n", Translation) 128 | print("Scale:", Scale) 129 | 130 | return Scale, Rotation, Translation, OutTransform 131 | -------------------------------------------------------------------------------- /common/bbox.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bounding Box Utilities 3 | ====================== 4 | """ 5 | 6 | import numpy as np 7 | 8 | CUBE_3x8 = np.array( 9 | [ 10 | [+1, +1, +1], 11 | [+1, +1, -1], 12 | [-1, +1, +1], 13 | [-1, +1, -1], 14 | [+1, -1, +1], 15 | [+1, -1, -1], 16 | [-1, -1, +1], 17 | [-1, -1, -1], 18 | ] 19 | ).T 20 | """cube from (-1, -1, -1) to (1, 1, 1). 21 | 22 | The index assignment is:: 23 | 24 | ... 6------4 25 | ... /| /| 26 | ... 2-+----0 | 27 | ... | 7----+-5 28 | ... |/ |/ 29 | ... 3------1 30 | 31 | """ 32 | 33 | 34 | def create_3d_bbox(size: "list[float] | np.ndarray | float"): 35 | """Create a 3d aabb bbox centered at origin with specific size. 36 | 37 | :param size: bound box side length. 38 | 39 | :returns: ndarray of shape [3, N]. 40 | 41 | The index assignment is:: 42 | 43 | ... 6------4 44 | ... /| /| 45 | ... 2-+----0 | 46 | ... | 7----+-5 47 | ... |/ |/ 48 | ... 3------1 49 | 50 | """ 51 | if not hasattr(size, "__iter__"): 52 | size = [size, size, size] 53 | assert len(size) == 3 54 | bbox_3d = CUBE_3x8 * np.array(size).reshape(3, 1) / 2 55 | return bbox_3d 56 | 57 | 58 | def create_3d_bbox_pc(size: "list[float] | np.ndarray | float"): 59 | """Generate point cloud for a bounding box. Mainly used for debuging. 60 | 61 | :return: numpy array of shape (3, ?) 62 | """ 63 | if not hasattr(size, "__iter__"): 64 | size = [size, size, size] 65 | assert len(size) == 3 66 | 67 | xs = np.linspace(-size[0] / 2, size[0] / 2, 40).reshape(-1, 1) * np.array([1, 0, 0]) 68 | ys = np.linspace(-size[1] / 2, size[1] / 2, 40).reshape(-1, 1) * np.array([0, 1, 0]) 69 | zs = np.linspace(-size[2] / 2, size[2] / 2, 40).reshape(-1, 1) * np.array([0, 0, 1]) 70 | A = ( 71 | xs.reshape(-1, 1, 1, 3) 72 | + ys.reshape(1, -1, 1, 3) 73 | + zs[[0, -1]].reshape(1, 1, -1, 3) 74 | ) 75 | B = ( 76 | xs.reshape(-1, 1, 1, 3) 77 | + ys[[0, -1]].reshape(1, -1, 1, 3) 78 | + zs.reshape(1, 1, -1, 3) 79 | ) 80 | C = ( 81 | xs[[0, -1]].reshape(-1, 1, 1, 3) 82 | + ys.reshape(1, -1, 1, 3) 83 | + zs.reshape(1, 1, -1, 3) 84 | ) 85 | box_pc = np.concatenate([A.reshape(-1, 3), B.reshape(-1, 3), C.reshape(-1, 3)]) 86 | return box_pc.T 87 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common UTilities for Omni6DPose 3 | =================================== 4 | 5 | **cutoop** (pronounced ``/'kʌtup/``) is the official package containing convenient 6 | utilities for data preparation and model evaluation over the Omni6DPose 7 | dataset. 8 | 9 | Installation & Prerequisites 10 | ---------------------------- 11 | 12 | .. highlight:: shell 13 | 14 | To enable EXR image reading by OpenCV, you need to install OpenEXR. On Ubuntu you have:: 15 | 16 | sudo apt-get install openexr 17 | 18 | Install via pip 19 | ^^^^^^^^^^^^^^^ 20 | 21 | Now you can install this package directly via pip:: 22 | 23 | pip install cutoop 24 | 25 | Install from source 26 | ^^^^^^^^^^^^^^^^^^^ 27 | 28 | After cloning this repository, you may run ``pip install .`` under `common` folder 29 | (note that the installed package is still named ``cutoop``). 30 | 31 | Evaluation Framework 32 | -------------------- 33 | 34 | .. highlight:: python 35 | 36 | Here comes a simple framework of model inference and evaulation. 37 | Notable APIs used in this simple example are listed below for referencing: 38 | 39 | - :class:`.data_loader.Dataset` 40 | - :meth:`.obj_meta.ObjectMetaData.load_json` 41 | - :class:`.eval_utils.GroundTruth` 42 | - :class:`.eval_utils.DetectMatch` 43 | - :meth:`.eval_utils.Metrics.dump_json` 44 | 45 | 46 | Firstly, after fetching data prefix list using :meth:`.data_loader.Dataset.glob_prefix`, 47 | you may want to filter the object annotations in some way: 48 | 49 | .. testcode:: 50 | 51 | import cutoop 52 | import numpy as np 53 | from cutoop.data_loader import Dataset 54 | from cutoop.eval_utils import GroundTruth, DetectMatch, DetectOutput 55 | from dataclasses import asdict 56 | 57 | # Object metadata of SOPE dataset 58 | objmeta = cutoop.obj_meta.ObjectMetaData.load_json( 59 | "../../configs/obj_meta.json" 60 | ) 61 | 62 | # take this prefix for example 63 | prefix = "../../misc/sample/0000_" 64 | 65 | # load RGB color image (in RGB order) 66 | image = Dataset.load_color(prefix + "color.png") 67 | # load object metadata 68 | meta = Dataset.load_meta(prefix + "meta.json") 69 | # load object mask (0 and 255 are both backgrounds) 70 | mask = Dataset.load_mask(prefix + "mask.exr") 71 | # load depth image. 0 and very large values are considered invisible 72 | depth = Dataset.load_depth(prefix + "depth.exr") 73 | depth[depth > 1e5] = 0 74 | 75 | # retain objects with at least 32 pixels visible in a reasonable distance 76 | occurs, occur_count = np.unique(np.where(depth == 0, 0, mask), return_counts=True) 77 | occurs = occurs[occur_count > 32] 78 | objects = [obj for obj in meta.objects if obj.is_valid and obj.mask_id in occurs] 79 | 80 | After that we can prepare a :class:`.eval_utils.GroundTruth` before the inference 81 | process: 82 | 83 | .. testcode:: 84 | 85 | gts = [] 86 | for obj in objects: 87 | objinfo = objmeta.instance_dict[obj.meta.oid] 88 | gt = GroundTruth( 89 | # object pose relative to camera 90 | gt_affine=obj.pose().to_affine()[None, ...], 91 | # object size under camera space 92 | gt_size=( 93 | np.array(obj.meta.scale) * np.array(objinfo.dimensions) 94 | )[None, ...], 95 | gt_sym_labels=[objinfo.tag.symmetry], 96 | gt_class_labels=np.array([objinfo.class_label]), 97 | 98 | # these are optional 99 | image_path=prefix + "color.png", 100 | camera_intrinsics=meta.camera.intrinsics, 101 | ) 102 | gts.append(gt) 103 | 104 | # Concatenate multiple arrays into one. 105 | gt = GroundTruth.concat(gts) 106 | 107 | Inference with ground truth detection 108 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 109 | 110 | Now you may start the inference process, with filtered objects as inputs. 111 | Assume we have a model that can predict correct rotation and translation, 112 | but incorrect scales. 113 | 114 | .. testcode:: 115 | 116 | np.random.seed(0) 117 | pred_affine = gt.gt_affine 118 | pred_size = gt.gt_size * (0.8 + np.random.rand(*gt.gt_size.shape) * 0.4) 119 | 120 | # Now we can collect the prediction results. 121 | # Apart from constructing DetectMatch from GroundTruth, you can also construct 122 | # it directly by filling its members. 123 | result = DetectMatch.from_gt(gt, pred_affine=pred_affine, pred_size=pred_size) 124 | 125 | # Calibrate the rotation of pose prediction according to gt symmetry. 126 | result = result.calibrate_rotation() 127 | 128 | # We can visualize the prediction, which outputs to ``./result.png`` as default. 129 | result.draw_image(path='source/_static/gr_0.png') 130 | 131 | # Calculate metrics. 132 | metrics = result.metrics() 133 | print_json(asdict(metrics.class_means)) 134 | 135 | .. testoutput:: 136 | 137 | { 138 | "iou_mean": 0.75514, 139 | "iou_acc": [ 140 | 1.0, 141 | 1.0, 142 | 0.42857 143 | ], 144 | "iou_auc": [ 145 | { 146 | "auc": 0.66429, 147 | "xs": null, 148 | "ys": null 149 | }, 150 | { 151 | "auc": 0.51071, 152 | "xs": null, 153 | "ys": null 154 | }, 155 | { 156 | "auc": 0.17857, 157 | "xs": null, 158 | "ys": null 159 | } 160 | ], 161 | "iou_ap": null, 162 | "deg_mean": 0.00372, 163 | "sht_mean": 0.0, 164 | "pose_acc": [ 165 | 1.0, 166 | 1.0, 167 | 1.0, 168 | 1.0 169 | ], 170 | "deg_auc": { 171 | "auc": 0.99943, 172 | "xs": null, 173 | "ys": null 174 | }, 175 | "sht_auc": { 176 | "auc": 1.0, 177 | "xs": null, 178 | "ys": null 179 | }, 180 | "pose_auc": [ 181 | { 182 | "auc": 1.0, 183 | "xs": null, 184 | "ys": null 185 | }, 186 | { 187 | "auc": 1.0, 188 | "xs": null, 189 | "ys": null 190 | }, 191 | { 192 | "auc": 1.0, 193 | "xs": null, 194 | "ys": null 195 | }, 196 | { 197 | "auc": 1.0, 198 | "xs": null, 199 | "ys": null 200 | } 201 | ], 202 | "pose_ap": null 203 | } 204 | 205 | .. image:: _static/gr_0.png 206 | :width: 100% 207 | :align: center 208 | 209 | Inference with detection outputs 210 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 211 | 212 | Assume we have a detection model that can detect 5 of the objects 213 | (after matching). After object detection and matching, before post estimation, 214 | you may prepare a :class:`.eval_utils.DetectOutput`. Is ok not to do so, then 215 | you'll have to compose a :class:`.eval_utils.DetectMatch` after pose estimation 216 | as a whole. 217 | 218 | .. testcode:: 219 | 220 | from collections import Counter 221 | np.random.seed(0) 222 | detect_gt = GroundTruth.concat(gts[:5]) 223 | detection = DetectOutput( 224 | gt_affine=detect_gt.gt_affine, 225 | gt_size=detect_gt.gt_size, 226 | gt_sym_labels=detect_gt.gt_sym_labels, 227 | gt_class_labels=detect_gt.gt_class_labels, 228 | image_path=detect_gt.image_path, 229 | camera_intrinsics=detect_gt.camera_intrinsics, 230 | detect_scores=np.random.rand(len(detect_gt)), 231 | gt_n_objects=Counter(gt.gt_class_labels), # pass an iterable to Counter 232 | ) 233 | print("total objects:", len(gt)) 234 | print("detected objects:", len(detection)) 235 | 236 | .. testoutput:: 237 | 238 | total objects: 9 239 | detected objects: 5 240 | 241 | Then we make prediction upon detected objects: 242 | 243 | .. testcode:: 244 | 245 | pred_affine = detection.gt_affine 246 | pred_size = detection.gt_size * ( 247 | 0.8 + np.random.rand(*detection.gt_size.shape) * 0.4 248 | ) 249 | 250 | # Now we can collect the prediction results and calibrate the rotation. 251 | result = DetectMatch.from_detection( 252 | detection, pred_affine=pred_affine, pred_size=pred_size 253 | ) 254 | result = result.calibrate_rotation() 255 | 256 | The bottleneck of metrics computation is 3D bounding box IoU. Here we introduce a 257 | trick: you may cache the result of criterion and then pass it to the metrics method if 258 | you somehow need to compute metrics for different settings multiple times. 259 | 260 | BTW, :meth:`.eval_utils.Metrics.dump_json` can be used to save metrics to JSON 261 | file. 262 | 263 | .. testcode:: 264 | 265 | criterion = result.criterion() # you may use pickle to cache it 266 | 267 | metrics = result.metrics( 268 | iou_thresholds=[0.25, 0.50, 0.75], 269 | pose_thresholds=[(5, 2), (5, 5), (10, 2), (10, 5)], 270 | criterion=criterion 271 | ) 272 | print_json(asdict(metrics.class_means)) 273 | 274 | .. testoutput:: 275 | 276 | { 277 | "iou_mean": 0.72034, 278 | "iou_acc": [ 279 | 1.0, 280 | 1.0, 281 | 0.4 282 | ], 283 | "iou_auc": [ 284 | { 285 | "auc": 0.62, 286 | "xs": null, 287 | "ys": null 288 | }, 289 | { 290 | "auc": 0.44, 291 | "xs": null, 292 | "ys": null 293 | }, 294 | { 295 | "auc": 0.098, 296 | "xs": null, 297 | "ys": null 298 | } 299 | ], 300 | "iou_ap": [ 301 | 0.9, 302 | 0.9, 303 | 0.4 304 | ], 305 | "deg_mean": 0.00303, 306 | "sht_mean": 0.0, 307 | "pose_acc": [ 308 | 1.0, 309 | 1.0, 310 | 1.0, 311 | 1.0 312 | ], 313 | "deg_auc": { 314 | "auc": 0.9996, 315 | "xs": null, 316 | "ys": null 317 | }, 318 | "sht_auc": { 319 | "auc": 1.0, 320 | "xs": null, 321 | "ys": null 322 | }, 323 | "pose_auc": [ 324 | { 325 | "auc": 1.0, 326 | "xs": null, 327 | "ys": null 328 | }, 329 | { 330 | "auc": 1.0, 331 | "xs": null, 332 | "ys": null 333 | }, 334 | { 335 | "auc": 1.0, 336 | "xs": null, 337 | "ys": null 338 | }, 339 | { 340 | "auc": 1.0, 341 | "xs": null, 342 | "ys": null 343 | } 344 | ], 345 | "pose_ap": [ 346 | 0.9, 347 | 0.9, 348 | 0.9, 349 | 0.9 350 | ] 351 | } 352 | 353 | """ 354 | 355 | __version__ = "0.1.0" 356 | 357 | from . import ( 358 | align, 359 | data_loader, 360 | data_types, 361 | eval_utils, 362 | image_meta, 363 | log, 364 | obj_meta, 365 | rotation, 366 | transform, 367 | utils, 368 | ) 369 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/align.py: -------------------------------------------------------------------------------- 1 | """ 2 | Point Cloud Alignment 3 | ===================== 4 | """ 5 | 6 | import numpy as np 7 | from .bbox import create_3d_bbox_pc 8 | 9 | 10 | def estimateSimilarityUmeyama(SourceHom, TargetHom): 11 | """ 12 | Compute `OutTransform` s.t. `OutTransform` @ `SourceHom` approx. `TargetHom` 13 | Copy of original paper is at: `umeyama `_ 14 | 15 | :param SourceHom: (4, N) 16 | :param TargetHom: (4, N) 17 | 18 | :return: scalar Scale, (3, 3) Rotation, (3,) Translation, (4, 4) OutTransform 19 | """ 20 | SourceCentroid = np.mean(SourceHom[:3, :], axis=1) 21 | TargetCentroid = np.mean(TargetHom[:3, :], axis=1) 22 | nPoints = SourceHom.shape[1] 23 | CenteredSource = ( 24 | SourceHom[:3, :] - np.tile(SourceCentroid, (nPoints, 1)).transpose() 25 | ) 26 | CenteredTarget = ( 27 | TargetHom[:3, :] - np.tile(TargetCentroid, (nPoints, 1)).transpose() 28 | ) 29 | CovMatrix = np.matmul(CenteredTarget, np.transpose(CenteredSource)) / nPoints 30 | if np.isnan(CovMatrix).any(): 31 | print("nPoints:", nPoints) 32 | print(SourceHom.shape) 33 | print(TargetHom.shape) 34 | raise RuntimeError("There are NANs in the input.") 35 | 36 | U, D, Vh = np.linalg.svd(CovMatrix, full_matrices=True) 37 | d = (np.linalg.det(U) * np.linalg.det(Vh)) < 0.0 38 | if d: 39 | D[-1] = -D[-1] 40 | U[:, -1] = -U[:, -1] 41 | # rotation 42 | Rotation = np.matmul(U, Vh) 43 | # scale 44 | varP = np.var(SourceHom[:3, :], axis=1).sum() 45 | Scale = 1 / varP * np.sum(D) 46 | # translation 47 | Translation = TargetHom[:3, :].mean(axis=1) - SourceHom[:3, :].mean(axis=1).dot( 48 | Scale * Rotation.T 49 | ) 50 | # transformation matrix 51 | OutTransform = np.identity(4) 52 | OutTransform[:3, :3] = Scale * Rotation 53 | OutTransform[:3, 3] = Translation 54 | 55 | return Scale, Rotation, Translation, OutTransform 56 | 57 | 58 | def estimateSimilarityTransform( 59 | source_Nx3: np.ndarray, target_Nx3: np.ndarray, verbose=False 60 | ): 61 | """Compute an affine `OutTransform` from `source_Nx3` to `target_Nx3`, 62 | Add RANSAC algorithm to account for outliers. 63 | 64 | Copying from `object-deformnet `_ 65 | 66 | :returns: scalar Scale, (3, 3) Rotation, (3,) Translation, (4, 4) OutTransform 67 | """ 68 | assert ( 69 | source_Nx3.shape[0] == target_Nx3.shape[0] 70 | ), "Source and Target must have same number of points." 71 | SourceHom = np.transpose(np.hstack([source_Nx3, np.ones([source_Nx3.shape[0], 1])])) 72 | TargetHom = np.transpose(np.hstack([target_Nx3, np.ones([target_Nx3.shape[0], 1])])) 73 | # Auto-parameter selection based on source heuristics 74 | # Assume source is object model or gt nocs map, which is of high quality 75 | SourceCentroid = np.mean(SourceHom[:3, :], axis=1) 76 | nPoints = SourceHom.shape[1] 77 | CenteredSource = ( 78 | SourceHom[:3, :] - np.tile(SourceCentroid, (nPoints, 1)).transpose() 79 | ) 80 | SourceDiameter = 2 * np.amax(np.linalg.norm(CenteredSource, axis=0)) 81 | InlierT = SourceDiameter / 10.0 # 0.1 of source diameter 82 | maxIter = 128 83 | confidence = 0.99 84 | 85 | if verbose: 86 | print("Inlier threshold: ", InlierT) 87 | print("Max number of iterations: ", maxIter) 88 | 89 | BestInlierRatio = 0 90 | BestInlierIdx = np.arange(nPoints) 91 | for i in range(0, maxIter): 92 | # Pick 5 random (but corresponding) points from source and target 93 | RandIdx = np.random.randint(nPoints, size=5) 94 | Scale, _, _, OutTransform = estimateSimilarityUmeyama( 95 | SourceHom[:, RandIdx], TargetHom[:, RandIdx] 96 | ) 97 | PassThreshold = Scale * InlierT # propagate inlier threshold to target scale 98 | Diff = TargetHom - np.matmul(OutTransform, SourceHom) 99 | ResidualVec = np.linalg.norm(Diff[:3, :], axis=0) 100 | InlierIdx = np.where(ResidualVec < PassThreshold)[0] 101 | nInliers = InlierIdx.shape[0] 102 | InlierRatio = nInliers / nPoints 103 | # update best hypothesis 104 | if InlierRatio > BestInlierRatio: 105 | BestInlierRatio = InlierRatio 106 | BestInlierIdx = InlierIdx 107 | if verbose: 108 | print("Iteration: ", i) 109 | print("Inlier ratio: ", BestInlierRatio) 110 | # early break 111 | if (1 - (1 - BestInlierRatio**5) ** i) > confidence: 112 | break 113 | 114 | if BestInlierRatio < 0.1: 115 | print("[ WARN ] - Something is wrong. Small BestInlierRatio: ", BestInlierRatio) 116 | return None, None, None, None 117 | 118 | SourceInliersHom = SourceHom[:, BestInlierIdx] 119 | TargetInliersHom = TargetHom[:, BestInlierIdx] 120 | Scale, Rotation, Translation, OutTransform = estimateSimilarityUmeyama( 121 | SourceInliersHom, TargetInliersHom 122 | ) 123 | 124 | if verbose: 125 | print("BestInlierRatio:", BestInlierRatio) 126 | print("Rotation:\n", Rotation) 127 | print("Translation:\n", Translation) 128 | print("Scale:", Scale) 129 | 130 | return Scale, Rotation, Translation, OutTransform 131 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/bbox.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bounding Box Utilities 3 | ====================== 4 | """ 5 | 6 | import numpy as np 7 | 8 | CUBE_3x8 = np.array( 9 | [ 10 | [+1, +1, +1], 11 | [+1, +1, -1], 12 | [-1, +1, +1], 13 | [-1, +1, -1], 14 | [+1, -1, +1], 15 | [+1, -1, -1], 16 | [-1, -1, +1], 17 | [-1, -1, -1], 18 | ] 19 | ).T 20 | """cube from (-1, -1, -1) to (1, 1, 1). 21 | 22 | The index assignment is:: 23 | 24 | ... 6------4 25 | ... /| /| 26 | ... 2-+----0 | 27 | ... | 7----+-5 28 | ... |/ |/ 29 | ... 3------1 30 | 31 | """ 32 | 33 | 34 | def create_3d_bbox(size: "list[float] | np.ndarray | float"): 35 | """Create a 3d aabb bbox centered at origin with specific size. 36 | 37 | :param size: bound box side length. 38 | 39 | :returns: ndarray of shape [3, N]. 40 | 41 | The index assignment is:: 42 | 43 | ... 6------4 44 | ... /| /| 45 | ... 2-+----0 | 46 | ... | 7----+-5 47 | ... |/ |/ 48 | ... 3------1 49 | 50 | """ 51 | if not hasattr(size, "__iter__"): 52 | size = [size, size, size] 53 | assert len(size) == 3 54 | bbox_3d = CUBE_3x8 * np.array(size).reshape(3, 1) / 2 55 | return bbox_3d 56 | 57 | 58 | def create_3d_bbox_pc(size: "list[float] | np.ndarray | float"): 59 | """Generate point cloud for a bounding box. Mainly used for debuging. 60 | 61 | :return: numpy array of shape (3, ?) 62 | """ 63 | if not hasattr(size, "__iter__"): 64 | size = [size, size, size] 65 | assert len(size) == 3 66 | 67 | xs = np.linspace(-size[0] / 2, size[0] / 2, 40).reshape(-1, 1) * np.array([1, 0, 0]) 68 | ys = np.linspace(-size[1] / 2, size[1] / 2, 40).reshape(-1, 1) * np.array([0, 1, 0]) 69 | zs = np.linspace(-size[2] / 2, size[2] / 2, 40).reshape(-1, 1) * np.array([0, 0, 1]) 70 | A = ( 71 | xs.reshape(-1, 1, 1, 3) 72 | + ys.reshape(1, -1, 1, 3) 73 | + zs[[0, -1]].reshape(1, 1, -1, 3) 74 | ) 75 | B = ( 76 | xs.reshape(-1, 1, 1, 3) 77 | + ys[[0, -1]].reshape(1, -1, 1, 3) 78 | + zs.reshape(1, 1, -1, 3) 79 | ) 80 | C = ( 81 | xs[[0, -1]].reshape(-1, 1, 1, 3) 82 | + ys.reshape(1, -1, 1, 3) 83 | + zs.reshape(1, 1, -1, 3) 84 | ) 85 | box_pc = np.concatenate([A.reshape(-1, 3), B.reshape(-1, 3), C.reshape(-1, 3)]) 86 | return box_pc.T 87 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/data_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset and Loading Functions 3 | ============================= 4 | """ 5 | 6 | from functools import partial 7 | import glob 8 | import os 9 | from PIL import Image as _Image 10 | import warnings 11 | 12 | from .image_meta import ImageMetaData 13 | from .align import estimateSimilarityTransform 14 | from .transform import depth2xyz 15 | from .utils import draw_3d_bbox, draw_object_label, draw_pose_axes 16 | from .data_types import Pose 17 | from .image_meta import ImageMetaData 18 | 19 | os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" # before importing cv2 20 | import cv2 21 | import numpy as np 22 | 23 | 24 | class Dataset(object): 25 | """This class can be used as a base class for dataloader construction.""" 26 | 27 | @staticmethod 28 | def load_meta(path: str) -> ImageMetaData: 29 | """See :meth:`.image_meta.ImageMetaData.load_json`.""" 30 | return ImageMetaData.load_json(path) 31 | 32 | @staticmethod 33 | def load_color(path: str) -> np.ndarray: 34 | """Load RGB image in **RGB** order.""" 35 | data = cv2.imread(path)[:, :, ::-1] # RGB order 36 | return data 37 | 38 | @staticmethod 39 | def load_coord(path: str) -> np.ndarray: 40 | """Read NOCS image (PNG). This function does the following things: 41 | 42 | 1. Read, normalize, and transform the image into **RGB** order. 43 | 2. Due to historical reasons, the ``B`` channel is transformed to ``1 - B`` after 44 | reading the image, which is a common transformation for other NOCS image in prior 45 | datasets. 46 | 3. Minus `0.5` to change the range of pixel values from `[0, 1]` to `[-0.5, 0.5]`. 47 | 48 | :return: float array of shape (Height, Width) ranged [-0.5, 0.5], denothing xyz coordinates in NOCS space 49 | """ 50 | # set_trace() 51 | img = cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255 52 | # https://stackoverflow.com/questions/50963283/imshow-doesnt-need-convert-from-bgr-to-rgb 53 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 54 | img = img.clip(0, 1) 55 | img[:, :, 2] = 1 - img[:, :, 2] # 1 - z 56 | 57 | return img - 0.5 58 | 59 | @staticmethod 60 | def load_ir(path: str) -> np.ndarray: 61 | """Load the rendered IR image""" 62 | data = _Image.open(path).convert("L") 63 | return np.array(data) 64 | 65 | @staticmethod 66 | def load_depth(path: str) -> np.ndarray: 67 | """ 68 | This function read the depth image, selecting the first channel if multiple 69 | channels are detected. 70 | 71 | :return: A 2D float array of shape (Height, Width). For Omni6DPose, 72 | the unit of pixel value is meter. 73 | """ 74 | img = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 75 | if len(img.shape) == 3: 76 | img = img[:, :, 0] 77 | return img # unit: m 78 | 79 | @staticmethod 80 | def load_mask(path: str) -> np.ndarray: 81 | """Load the mask image. 82 | 83 | :return: uint8 array of shape (Height, Width), whose values are related 84 | to the objects' mask ids (:attr:`.image_meta.ObjectPoseInfo.mask_id`). 85 | """ 86 | img = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 87 | if len(img.shape) == 3: 88 | img = img[:, :, 2] 89 | return np.array(img * 255, dtype=np.uint8) 90 | 91 | @staticmethod 92 | def load_mask_sam(path: str) -> "tuple[np.ndarray, np.ndarray]": 93 | """Load the mask generated by SAM. 94 | 95 | :return: (masks, mask_ids) where masks is bool array of shape (n_objects, Height, Width) 96 | denoting the binary mask of each objects corresponds to mask_ids, whose shape is (n_objects,). 97 | """ 98 | data = np.load(path) 99 | masks = data["masks"] 100 | mask_ids = data["mask_ids"] 101 | return masks, mask_ids 102 | 103 | @staticmethod 104 | def load_normal(path: str) -> np.ndarray: 105 | """Read normal image (exr) 106 | 107 | :return: float array of shape (H, W, 3) ranged [-1, 1] containing normal vectors 108 | """ 109 | img = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 110 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 111 | img[:, :, 0] = 0 - img[:, :, 0] # x => -x 112 | return img 113 | 114 | LOADER_MAP = { 115 | "color": ("color.png", load_color), 116 | "coord": ("coord.png", load_coord), 117 | "ir_l": ("ir_l.png", load_ir), 118 | "ir_r": ("ir_r.png", load_ir), 119 | "depth": ("depth.exr", load_depth), 120 | "depth_syn": ("depth_syn.exr", load_depth), 121 | "mask": ("mask.exr", load_mask), 122 | "mask_sam": ("mask_sam.npz", load_mask_sam), 123 | "normal": ("normal.exr", load_normal), 124 | "meta": ("meta.json", load_meta), 125 | } 126 | 127 | def visualize_by( 128 | vis_img: np.ndarray, 129 | prefix: str, 130 | show_id=True, 131 | show_box=True, 132 | show_nocs_pred=False, 133 | show_axes_len: "float | None" = None, 134 | ): 135 | metadata = Dataset.load_meta(prefix + "meta.json") 136 | masks = Dataset.load_mask(prefix + "mask.exr") 137 | intrinsics = metadata.camera.intrinsics 138 | depth_pts = depth2xyz(Dataset.load_depth(prefix + "depth.exr"), intrinsics) 139 | if show_nocs_pred: 140 | coords = Dataset.load_coord(prefix + "coord.png") 141 | # set_trace() 142 | 143 | bbox_task = [] 144 | label_task = [] 145 | for i, data in enumerate(metadata.objects): 146 | idx = tuple(np.argwhere(masks == data.mask_id).T) 147 | pts = depth_pts[idx] 148 | if show_nocs_pred: 149 | coord = coords[idx] 150 | 151 | bbox_side_len = data.meta.bbox_side_len 152 | obj_pose = Pose( 153 | quaternion=data.quaternion_wxyz, translation=data.translation 154 | ) 155 | if show_box: 156 | bbox_task.append( 157 | partial( 158 | draw_3d_bbox, 159 | intrinsics=intrinsics, 160 | sRT_4x4=obj_pose.to_affine(), 161 | bbox_side_len=bbox_side_len, 162 | ) 163 | ) 164 | if show_axes_len is not None: 165 | bbox_task.append( 166 | partial( 167 | draw_pose_axes, 168 | intrinsics=intrinsics, 169 | sRT_4x4=obj_pose.to_affine(), 170 | length=show_axes_len, 171 | ) 172 | ) 173 | 174 | # set_trace() 175 | if show_nocs_pred and show_box: 176 | _, _, _, pred_sRT = estimateSimilarityTransform(coord, pts) 177 | bbox_task.append( 178 | partial( 179 | draw_3d_bbox, 180 | intrinsics=intrinsics, 181 | sRT_4x4=pred_sRT, 182 | bbox_side_len=2 * np.amax(np.abs(coord), axis=0), 183 | ) 184 | ) 185 | if show_id: 186 | label_task.append( 187 | partial( 188 | draw_object_label, 189 | intrinsics=intrinsics, 190 | sRT_4x4=obj_pose.to_affine(), 191 | label=str(i), 192 | ) 193 | ) 194 | 195 | for filter in bbox_task: 196 | vis_img = filter(img=vis_img) 197 | for filter in label_task: 198 | vis_img = filter(img=vis_img) 199 | 200 | return vis_img 201 | 202 | def visualize( 203 | prefix: str, 204 | out_path="./visual.png", 205 | show_id=True, 206 | show_box=True, 207 | show_nocs_pred=False, 208 | show_axes_len=None, 209 | ): 210 | """A convenient helper for data visualization. 211 | 212 | .. doctest:: 213 | 214 | >>> cutoop.data_loader.Dataset.visualize( 215 | ... prefix="../../misc/sample_real/000000_", 216 | ... out_path="source/_static/gr_5.png", 217 | ... ) 218 | 219 | .. image:: _static/gr_5.png 220 | :align: center 221 | """ 222 | color = Dataset.load_color(prefix + "color.png") 223 | vis_img = Dataset.visualize_by( 224 | color, 225 | prefix, 226 | show_id=show_id, 227 | show_box=show_box, 228 | show_nocs_pred=show_nocs_pred, 229 | show_axes_len=show_axes_len, 230 | ) 231 | vis_img = cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR) 232 | cv2.imwrite(out_path, vis_img) 233 | 234 | @staticmethod 235 | def glob_prefix(root: str) -> "list[str]": 236 | """Recursively find the prefix list of data (by RGB image). 237 | 238 | Since we use the `glob` package for image searching, you may use the asterisk 239 | symbol ``*`` for pattern matching. However, since we internally use the ``**`` 240 | pattern, you're not encouraged to use ``**`` to avoid duplicated prefix outputs. 241 | 242 | :param root: the root directory for searching. 243 | 244 | .. doctest:: 245 | 246 | >>> cutoop.data_loader.Dataset.glob_prefix("../../misc") 247 | ['../../misc/sample/0000_', '../../misc/sample/0001_', '../../misc/sample/0002_', '../../misc/sample_real/000000_'] 248 | >>> cutoop.data_loader.Dataset.glob_prefix("../../*/sample") 249 | ['../../misc/sample/0000_', '../../misc/sample/0001_', '../../misc/sample/0002_'] 250 | 251 | """ 252 | dirs = [root] 253 | prefixes: list[str] = [] 254 | for dir in dirs: 255 | prefs = [ 256 | path[:-9] 257 | for path in glob.glob( 258 | os.path.join(dir, "**/*_color.png"), recursive=True 259 | ) 260 | ] 261 | prefixes.extend(prefs) 262 | return sorted(prefixes) 263 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/data_types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Types 3 | ========== 4 | 5 | Data types are defined to unify the parameter types of functions. 6 | Moreover, they're able to be stored to and retrieved from JSON file. 7 | """ 8 | 9 | from dataclasses import dataclass 10 | import numpy as np 11 | from scipy.spatial.transform import Rotation as _Rot 12 | 13 | 14 | @dataclass 15 | class Pose(object): 16 | """Object/Camera pose representation""" 17 | 18 | quaternion: "tuple[float, float, float, float]" 19 | """quaternion in scale-first format (wxyz)""" 20 | 21 | translation: "tuple[float, float, float]" 22 | """translation from object (centered) space to camera space""" 23 | 24 | def __post_init__(self): 25 | assert len(self.quaternion) == 4 26 | assert len(self.translation) == 3 27 | 28 | def to_affine(self, scale=None): 29 | """transform to affine transformation (with no additional scaling) 30 | 31 | :return: 4x4 numpy array. 32 | 33 | Here's an example of getting :class:`Pose` from rotation matrix: 34 | 35 | .. doctest:: 36 | 37 | >>> from cutoop.data_types import Pose 38 | >>> from scipy.spatial.transform import Rotation 39 | >>> x, y, z, w = Rotation.from_matrix([ 40 | ... [ 0., 0., 1.], 41 | ... [ 0., 1., 0.], 42 | ... [-1., 0., 0.] 43 | ... ]).as_quat() 44 | >>> pose = Pose(quaternion=[w, x, y, z], translation=[1, 1, 1]) 45 | >>> pose.to_affine() 46 | array([[ 0., 0., 1., 1.], 47 | [ 0., 1., 0., 1.], 48 | [-1., 0., 0., 1.], 49 | [ 0., 0., 0., 1.]], dtype=float32) 50 | """ 51 | q = self.quaternion 52 | rot = _Rot.from_quat([q[1], q[2], q[3], q[0]]).as_matrix() 53 | if scale is not None: 54 | rot = rot * scale 55 | trans = np.array(self.translation) 56 | mtx = np.eye(4).astype(np.float32) 57 | mtx[:3, :3] = rot 58 | mtx[:3, 3] = trans 59 | return mtx 60 | 61 | 62 | @dataclass 63 | class CameraIntrinsicsBase: 64 | """Camera intrinsics data. 65 | The unit of ``fx, fy, cx, cy, width, height`` are all pixels. 66 | """ 67 | 68 | fx: float # unit: pixel 69 | fy: float # unit: pixel 70 | cx: float # unit: pixel 71 | cy: float # unit: pixel 72 | width: float # unit: pixel 73 | height: float # unit: pixel 74 | 75 | def to_matrix(self): 76 | """Transform to 3x3 K matrix. i. e.:: 77 | 78 | [[fx, 0, cx], 79 | [0, fy, cy], 80 | [0, 0, 1 ]] 81 | 82 | :return: 3x3 numpy array. 83 | """ 84 | fx = self.fx 85 | fy = self.fy 86 | cx = self.cx 87 | cy = self.cy 88 | return np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) 89 | 90 | def fov_x(self): 91 | return np.rad2deg(2 * np.arctan2(self.width, 2 * self.fx)) 92 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/image_meta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data types for Image Metafile 3 | ============================= 4 | 5 | .. testsetup:: 6 | 7 | from cutoop.image_meta import ImageMetaData 8 | 9 | """ 10 | 11 | from dataclasses import dataclass 12 | import json 13 | 14 | from .data_types import CameraIntrinsicsBase, Pose 15 | 16 | 17 | @dataclass 18 | class ViewInfo(Pose): 19 | intrinsics: CameraIntrinsicsBase 20 | """Camera intrinsics""" 21 | scene_obj_path: str 22 | """Scene object mesh path""" 23 | 24 | background_image_path: str 25 | background_depth_path: str 26 | distances: "list[float]" 27 | kind: str 28 | 29 | def __post_init__(self): 30 | if not isinstance(self.intrinsics, CameraIntrinsicsBase): 31 | self.intrinsics = CameraIntrinsicsBase(**self.intrinsics) 32 | 33 | 34 | @dataclass 35 | class ObjectMetaInfo: 36 | oid: str 37 | """Object ID, which is used to index object metadata""" 38 | class_name: str 39 | """Class name of the object""" 40 | class_label: int 41 | """1-indexed class label of the object""" 42 | instance_path: str 43 | """Path to the model mesh file""" 44 | scale: "list[float]" 45 | """Scale from `object space` (not NOCS) to the `camera space`. 46 | For the size of the object, refer to the object meta file.""" 47 | is_background: bool 48 | """Whether it is marked as a background object.""" 49 | 50 | # bounding box side len after scaling (deprecated) 51 | # should be equal to np.array(obj.meta.scale) * np.array( 52 | # objmeta.instance_dict[obj.meta.oid].dimensions 53 | # ) 54 | bbox_side_len: "list[float]" 55 | 56 | 57 | @dataclass 58 | class ObjectPoseInfo: 59 | mask_id: int 60 | """the value identifying this object in mask image""" 61 | meta: ObjectMetaInfo 62 | """object meta information.""" 63 | quaternion_wxyz: "list[float]" 64 | """object rotation in camera space""" 65 | translation: "list[float]" 66 | """object translation in camera space""" 67 | is_valid: bool 68 | """Whether the object meet requirements (faceup, reasonable real-world location)""" 69 | 70 | # object id in image (from 1 to the number of visible objects, deprecated) 71 | id: int 72 | # for data generation 73 | material: "list[str]" 74 | world_quaternion_wxyz: "list[float]" 75 | world_translation: "list[float]" 76 | 77 | def __post_init__(self): 78 | if not isinstance(self.meta, ObjectMetaInfo): 79 | self.meta = ObjectMetaInfo(**self.meta) 80 | 81 | def pose(self) -> Pose: 82 | """Construct :class:`Pose` relative to the camera coordinate.""" 83 | return Pose(quaternion=self.quaternion_wxyz, translation=self.translation) 84 | 85 | 86 | @dataclass 87 | class ImageMetaData: 88 | objects: "list[ObjectPoseInfo]" 89 | """A list of visiable objects""" 90 | camera: ViewInfo 91 | """Information of the scene""" 92 | scene_dataset: str 93 | """Dataset source of the scene""" 94 | 95 | # for data generation 96 | env_param: dict 97 | face_up: bool 98 | concentrated: bool 99 | comments: str 100 | runtime_seed: int 101 | baseline_dis: int 102 | emitter_dist_l: int 103 | 104 | def __post_init__(self): 105 | if isinstance(self.objects, dict): 106 | self.objects = [ 107 | ObjectPoseInfo(**x, mask_id=int(k.split("_")[0])) 108 | for k, x in self.objects.items() 109 | ] 110 | if not isinstance(self.camera, ViewInfo): 111 | self.camera = ViewInfo(**self.camera) 112 | 113 | @staticmethod 114 | def load_json(path: str) -> "ImageMetaData": 115 | """Load object meta data from json file 116 | 117 | 118 | .. doctest:: 119 | 120 | >>> meta = ImageMetaData.load_json("../../misc/sample_real/000000_meta.json") 121 | >>> meta.camera.intrinsics 122 | CameraIntrinsicsBase(fx=915.0556030273438, fy=914.1288452148438, cx=641.2314453125, cy=363.4847412109375, width=1280, height=720) 123 | 124 | """ 125 | 126 | with open(path, "r") as f: 127 | jsondata = json.load(f) 128 | 129 | return ImageMetaData(**jsondata) 130 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/iou/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bounding Box IoU 3 | ================ 4 | """ 5 | 6 | import numpy as np 7 | from .box import Box as _Box 8 | from .iou import IoU as _IoU 9 | 10 | 11 | def compute_3d_bbox_iou( 12 | rA_3x3: np.ndarray, 13 | tA_3: np.ndarray, 14 | sA_3: np.ndarray, 15 | rB_3x3: np.ndarray, 16 | tB_3: np.ndarray, 17 | sB_3: np.ndarray, 18 | ) -> float: 19 | """Compute the exact 3D IoU of two boxes using `scipy.spatial.ConvexHull `_""" 20 | 21 | try: 22 | boxA = _Box.from_transformation(rA_3x3, tA_3, sA_3) 23 | boxB = _Box.from_transformation(rB_3x3, tB_3, sB_3) 24 | 25 | return _IoU(boxA, boxB).iou() 26 | except KeyboardInterrupt: 27 | raise 28 | except: # silently ignore qhull error 29 | return 0 30 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/iou/box.py: -------------------------------------------------------------------------------- 1 | """General 3D Bounding Box class. 2 | 3 | Origin: https://github.com/OasisYang/Wild6D/blob/main/lib/box.py 4 | """ 5 | 6 | import numpy as np 7 | from numpy.linalg import lstsq as optimizer 8 | from scipy.spatial.transform import Rotation as rotation_util 9 | 10 | EDGES = ( 11 | [1, 5], 12 | [2, 6], 13 | [3, 7], 14 | [4, 8], # lines along x-axis 15 | [1, 3], 16 | [5, 7], 17 | [2, 4], 18 | [6, 8], # lines along y-axis 19 | [1, 2], 20 | [3, 4], 21 | [5, 6], 22 | [7, 8], # lines along z-axis 23 | ) 24 | 25 | # The vertices are ordered according to the left-hand rule, so the normal 26 | # vector of each face will point inward the box. 27 | FACES = np.array( 28 | [ 29 | [5, 6, 8, 7], # +x on yz plane 30 | [1, 3, 4, 2], # -x on yz plane 31 | [3, 7, 8, 4], # +y on xz plane = top 32 | [1, 2, 6, 5], # -y on xz plane 33 | [2, 4, 8, 6], # +z on xy plane = front 34 | [1, 5, 7, 3], # -z on xy plane 35 | ] 36 | ) 37 | 38 | UNIT_BOX = np.asarray( 39 | [ 40 | [0.0, 0.0, 0.0], 41 | [-0.5, -0.5, -0.5], 42 | [-0.5, -0.5, 0.5], 43 | [-0.5, 0.5, -0.5], 44 | [-0.5, 0.5, 0.5], 45 | [0.5, -0.5, -0.5], 46 | [0.5, -0.5, 0.5], 47 | [0.5, 0.5, -0.5], 48 | [0.5, 0.5, 0.5], 49 | ] 50 | ) 51 | 52 | NUM_KEYPOINTS = 9 53 | FRONT_FACE_ID = 4 54 | TOP_FACE_ID = 2 55 | 56 | 57 | class Box(object): 58 | """General 3D Oriented Bounding Box.""" 59 | 60 | def __init__(self, vertices=None): 61 | if vertices is None: 62 | vertices = self.scaled_axis_aligned_vertices(np.array([1.0, 1.0, 1.0])) 63 | 64 | self._vertices = vertices 65 | self._rotation = None 66 | self._translation = None 67 | self._scale = None 68 | self._transformation = None 69 | self._volume = None 70 | 71 | @classmethod 72 | def from_transformation(cls, rotation, translation, scale): 73 | """Constructs an oriented bounding box from transformation and scale.""" 74 | if rotation.size != 3 and rotation.size != 9: 75 | raise ValueError( 76 | "Unsupported rotation, only 3x1 euler angles or 3x3 " 77 | + "rotation matrices are supported. " 78 | + rotation 79 | ) 80 | if rotation.size == 3: 81 | rotation = rotation_util.from_rotvec(rotation.tolist()).as_dcm() 82 | scaled_identity_box = cls.scaled_axis_aligned_vertices(scale) 83 | vertices = np.zeros((NUM_KEYPOINTS, 3)) 84 | for i in range(NUM_KEYPOINTS): 85 | vertices[i, :] = ( 86 | np.matmul(rotation, scaled_identity_box[i, :]) + translation.flatten() 87 | ) 88 | return cls(vertices=vertices) 89 | 90 | def __repr__(self): 91 | representation = "Box: " 92 | for i in range(NUM_KEYPOINTS): 93 | representation += "[{0}: {1}, {2}, {3}]".format( 94 | i, self.vertices[i, 0], self.vertices[i, 1], self.vertices[i, 2] 95 | ) 96 | return representation 97 | 98 | def __len__(self): 99 | return NUM_KEYPOINTS 100 | 101 | def __name__(self): 102 | return "Box" 103 | 104 | def apply_transformation(self, transformation): 105 | """Applies transformation on the box. 106 | 107 | Group multiplication is the same as rotation concatenation. Therefore return 108 | new box with SE3(R * R2, T + R * T2); Where R2 and T2 are existing rotation 109 | and translation. Note we do not change the scale. 110 | 111 | Args: 112 | transformation: a 4x4 transformation matrix. 113 | 114 | Returns: 115 | transformed box. 116 | """ 117 | if transformation.shape != (4, 4): 118 | raise ValueError("Transformation should be a 4x4 matrix.") 119 | 120 | new_rotation = np.matmul(transformation[:3, :3], self.rotation) 121 | new_translation = transformation[:3, 3] + ( 122 | np.matmul(transformation[:3, :3], self.translation) 123 | ) 124 | return Box.from_transformation(new_rotation, new_translation, self.scale) 125 | 126 | @classmethod 127 | def scaled_axis_aligned_vertices(cls, scale): 128 | """Returns an axis-aligned set of verticies for a box of the given scale. 129 | 130 | Args: 131 | scale: A 3*1 vector, specifiying the size of the box in x-y-z dimension. 132 | """ 133 | w = scale[0] / 2.0 134 | h = scale[1] / 2.0 135 | d = scale[2] / 2.0 136 | 137 | # Define the local coordinate system, w.r.t. the center of the box 138 | aabb = np.array( 139 | [ 140 | [0.0, 0.0, 0.0], 141 | [-w, -h, -d], 142 | [-w, -h, +d], 143 | [-w, +h, -d], 144 | [-w, +h, +d], 145 | [+w, -h, -d], 146 | [+w, -h, +d], 147 | [+w, +h, -d], 148 | [+w, +h, +d], 149 | ] 150 | ) 151 | return aabb 152 | 153 | @classmethod 154 | def fit(cls, vertices): 155 | """Estimates a box 9-dof parameters from the given vertices. 156 | 157 | Directly computes the scale of the box, then solves for orientation and 158 | translation. 159 | 160 | Args: 161 | vertices: A 9*3 array of points. Points are arranged as 1 + 8 (center 162 | keypoint + 8 box vertices) matrix. 163 | 164 | Returns: 165 | orientation: 3*3 rotation matrix. 166 | translation: 3*1 translation vector. 167 | scale: 3*1 scale vector. 168 | """ 169 | orientation = np.identity(3) 170 | translation = np.zeros((3, 1)) 171 | scale = np.zeros(3) 172 | 173 | # The scale would remain invariant under rotation and translation. 174 | # We can safely estimate the scale from the oriented box. 175 | for axis in range(3): 176 | for edge_id in range(4): 177 | # The edges are stored in quadruples according to each axis 178 | begin, end = EDGES[axis * 4 + edge_id] 179 | scale[axis] += np.linalg.norm(vertices[begin, :] - vertices[end, :]) 180 | scale[axis] /= 4.0 181 | 182 | x = cls.scaled_axis_aligned_vertices(scale) 183 | system = np.concatenate((x, np.ones((NUM_KEYPOINTS, 1))), axis=1) 184 | solution, _, _, _ = optimizer(system, vertices, rcond=None) 185 | orientation = solution[:3, :3].T 186 | translation = solution[3, :3] 187 | return orientation, translation, scale 188 | 189 | def inside(self, point): 190 | """Tests whether a given point is inside the box. 191 | 192 | Brings the 3D point into the local coordinate of the box. In the local 193 | coordinate, the looks like an axis-aligned bounding box. Next checks if 194 | the box contains the point. 195 | Args: 196 | point: A 3*1 numpy vector. 197 | 198 | Returns: 199 | True if the point is inside the box, False otherwise. 200 | """ 201 | inv_trans = np.linalg.inv(self.transformation) 202 | scale = self.scale 203 | point_w = np.matmul(inv_trans[:3, :3], point) + inv_trans[:3, 3] 204 | for i in range(3): 205 | if abs(point_w[i]) > scale[i] / 2.0: 206 | return False 207 | return True 208 | 209 | def sample(self): 210 | """Samples a 3D point uniformly inside this box.""" 211 | point = np.random.uniform(-0.5, 0.5, 3) * self.scale 212 | point = np.matmul(self.rotation, point) + self.translation 213 | return point 214 | 215 | @property 216 | def vertices(self): 217 | return self._vertices 218 | 219 | @property 220 | def rotation(self): 221 | if self._rotation is None: 222 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 223 | return self._rotation 224 | 225 | @property 226 | def translation(self): 227 | if self._translation is None: 228 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 229 | return self._translation 230 | 231 | @property 232 | def scale(self): 233 | if self._scale is None: 234 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 235 | return self._scale 236 | 237 | @property 238 | def volume(self): 239 | """Compute the volume of the parallelpiped or the box. 240 | 241 | For the boxes, this is equivalent to np.prod(self.scale). However for 242 | parallelpiped, this is more involved. Viewing the box as a linear function 243 | we can estimate the volume using a determinant. This is equivalent to 244 | sp.ConvexHull(self._vertices).volume 245 | 246 | Returns: 247 | volume (float) 248 | """ 249 | if self._volume is None: 250 | i = self._vertices[2, :] - self._vertices[1, :] 251 | j = self._vertices[3, :] - self._vertices[1, :] 252 | k = self._vertices[5, :] - self._vertices[1, :] 253 | sys = np.array([i, j, k]) 254 | self._volume = abs(np.linalg.det(sys)) 255 | return self._volume 256 | 257 | @property 258 | def transformation(self): 259 | if self._rotation is None: 260 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 261 | if self._transformation is None: 262 | self._transformation = np.identity(4) 263 | self._transformation[:3, :3] = self._rotation 264 | self._transformation[:3, 3] = self._translation 265 | return self._transformation 266 | 267 | def get_ground_plane(self, gravity_axis=1): 268 | """Get ground plane under the box.""" 269 | 270 | gravity = np.zeros(3) 271 | gravity[gravity_axis] = 1 272 | 273 | def get_face_normal(face, center): 274 | """Get a normal vector to the given face of the box.""" 275 | v1 = self.vertices[face[0], :] - center 276 | v2 = self.vertices[face[1], :] - center 277 | normal = np.cross(v1, v2) 278 | return normal 279 | 280 | def get_face_center(face): 281 | """Get the center point of the face of the box.""" 282 | center = np.zeros(3) 283 | for vertex in face: 284 | center += self.vertices[vertex, :] 285 | center /= len(face) 286 | return center 287 | 288 | ground_plane_id = 0 289 | ground_plane_error = 10.0 290 | 291 | # The ground plane is defined as a plane aligned with gravity. 292 | # gravity is the (0, 1, 0) vector in the world coordinate system. 293 | for i in [0, 2, 4]: 294 | face = FACES[i, :] 295 | center = get_face_center(face) 296 | normal = get_face_normal(face, center) 297 | w = np.cross(gravity, normal) 298 | w_sq_norm = np.linalg.norm(w) 299 | if w_sq_norm < ground_plane_error: 300 | ground_plane_error = w_sq_norm 301 | ground_plane_id = i 302 | 303 | face = FACES[ground_plane_id, :] 304 | center = get_face_center(face) 305 | normal = get_face_normal(face, center) 306 | 307 | # For each face, we also have a parallel face that it's normal is also 308 | # aligned with gravity vector. We pick the face with lower height (y-value). 309 | # The parallel to face 0 is 1, face 2 is 3, and face 4 is 5. 310 | parallel_face_id = ground_plane_id + 1 311 | parallel_face = FACES[parallel_face_id] 312 | parallel_face_center = get_face_center(parallel_face) 313 | parallel_face_normal = get_face_normal(parallel_face, parallel_face_center) 314 | if parallel_face_center[gravity_axis] < center[gravity_axis]: 315 | center = parallel_face_center 316 | normal = parallel_face_normal 317 | return center, normal 318 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/iou/iou.py: -------------------------------------------------------------------------------- 1 | """The Intersection Over Union (IoU) for 3D oriented bounding boxes. 2 | 3 | Origin: https://github.com/OasisYang/Wild6D/blob/main/lib/iou.py 4 | """ 5 | 6 | import numpy as np 7 | import scipy.spatial as sp 8 | 9 | from .box import FACES, NUM_KEYPOINTS 10 | 11 | _PLANE_THICKNESS_EPSILON = 0.000001 12 | _POINT_IN_FRONT_OF_PLANE = 1 13 | _POINT_ON_PLANE = 0 14 | _POINT_BEHIND_PLANE = -1 15 | 16 | 17 | class IoU(object): 18 | """General Intersection Over Union cost for Oriented 3D bounding boxes.""" 19 | 20 | def __init__(self, box1, box2): 21 | self._box1 = box1 22 | self._box2 = box2 23 | self._intersection_points = [] 24 | 25 | def iou(self): 26 | """Computes the exact IoU using Sutherland-Hodgman algorithm.""" 27 | self._intersection_points = [] 28 | self._compute_intersection_points(self._box1, self._box2) 29 | self._compute_intersection_points(self._box2, self._box1) 30 | if self._intersection_points: 31 | intersection_volume = sp.ConvexHull(self._intersection_points).volume 32 | box1_volume = self._box1.volume 33 | box2_volume = self._box2.volume 34 | union_volume = box1_volume + box2_volume - intersection_volume 35 | return intersection_volume / union_volume 36 | else: 37 | return 0.0 38 | 39 | def iou_sampling(self, num_samples=10000): 40 | """Computes intersection over union by sampling points. 41 | 42 | Generate n samples inside each box and check if those samples are inside 43 | the other box. Each box has a different volume, therefore the number o 44 | samples in box1 is estimating a different volume than box2. To address 45 | this issue, we normalize the iou estimation based on the ratio of the 46 | volume of the two boxes. 47 | 48 | Args: 49 | num_samples: Number of generated samples in each box 50 | 51 | Returns: 52 | IoU Estimate (float) 53 | """ 54 | p1 = [self._box1.sample() for _ in range(num_samples)] 55 | p2 = [self._box2.sample() for _ in range(num_samples)] 56 | box1_volume = self._box1.volume 57 | box2_volume = self._box2.volume 58 | box1_intersection_estimate = 0 59 | box2_intersection_estimate = 0 60 | for point in p1: 61 | if self._box2.inside(point): 62 | box1_intersection_estimate += 1 63 | for point in p2: 64 | if self._box1.inside(point): 65 | box2_intersection_estimate += 1 66 | # We are counting the volume of intersection twice. 67 | intersection_volume_estimate = ( 68 | box1_volume * box1_intersection_estimate 69 | + box2_volume * box2_intersection_estimate 70 | ) / 2.0 71 | union_volume_estimate = ( 72 | box1_volume * num_samples + box2_volume * num_samples 73 | ) - intersection_volume_estimate 74 | iou_estimate = intersection_volume_estimate / union_volume_estimate 75 | return iou_estimate 76 | 77 | def _compute_intersection_points(self, box_src, box_template): 78 | """Computes the intersection of two boxes.""" 79 | # Transform the source box to be axis-aligned 80 | inv_transform = np.linalg.inv(box_src.transformation) 81 | box_src_axis_aligned = box_src.apply_transformation(inv_transform) 82 | template_in_src_coord = box_template.apply_transformation(inv_transform) 83 | for face in range(len(FACES)): 84 | indices = FACES[face, :] 85 | poly = [template_in_src_coord.vertices[indices[i], :] for i in range(4)] 86 | clip = self.intersect_box_poly(box_src_axis_aligned, poly) 87 | for point in clip: 88 | # Transform the intersection point back to the world coordinate 89 | point_w = np.matmul(box_src.rotation, point) + box_src.translation 90 | self._intersection_points.append(point_w) 91 | 92 | for point_id in range(NUM_KEYPOINTS): 93 | v = template_in_src_coord.vertices[point_id, :] 94 | if box_src_axis_aligned.inside(v): 95 | point_w = np.matmul(box_src.rotation, v) + box_src.translation 96 | self._intersection_points.append(point_w) 97 | 98 | def intersect_box_poly(self, box, poly): 99 | """Clips the polygon against the faces of the axis-aligned box.""" 100 | for axis in range(3): 101 | poly = self._clip_poly(poly, box.vertices[1, :], 1.0, axis) 102 | poly = self._clip_poly(poly, box.vertices[8, :], -1.0, axis) 103 | return poly 104 | 105 | def _clip_poly(self, poly, plane, normal, axis): 106 | """Clips the polygon with the plane using the Sutherland-Hodgman algorithm. 107 | 108 | See en.wikipedia.org/wiki/Sutherland-Hodgman_algorithm for the overview of 109 | the Sutherland-Hodgman algorithm. Here we adopted a robust implementation 110 | from "Real-Time Collision Detection", by Christer Ericson, page 370. 111 | 112 | Args: 113 | poly: List of 3D vertices defining the polygon. 114 | plane: The 3D vertices of the (2D) axis-aligned plane. 115 | normal: normal 116 | axis: A tuple defining a 2D axis. 117 | 118 | Returns: 119 | List of 3D vertices of the clipped polygon. 120 | """ 121 | # The vertices of the clipped polygon are stored in the result list. 122 | result = [] 123 | if len(poly) <= 1: 124 | return result 125 | 126 | # polygon is fully located on clipping plane 127 | poly_in_plane = True 128 | 129 | # Test all the edges in the polygon against the clipping plane. 130 | for i, current_poly_point in enumerate(poly): 131 | prev_poly_point = poly[(i + len(poly) - 1) % len(poly)] 132 | d1 = self._classify_point_to_plane(prev_poly_point, plane, normal, axis) 133 | d2 = self._classify_point_to_plane(current_poly_point, plane, normal, axis) 134 | if d2 == _POINT_BEHIND_PLANE: 135 | poly_in_plane = False 136 | if d1 == _POINT_IN_FRONT_OF_PLANE: 137 | intersection = self._intersect( 138 | plane, prev_poly_point, current_poly_point, axis 139 | ) 140 | result.append(intersection) 141 | elif d1 == _POINT_ON_PLANE: 142 | if not result or (not np.array_equal(result[-1], prev_poly_point)): 143 | result.append(prev_poly_point) 144 | elif d2 == _POINT_IN_FRONT_OF_PLANE: 145 | poly_in_plane = False 146 | if d1 == _POINT_BEHIND_PLANE: 147 | intersection = self._intersect( 148 | plane, prev_poly_point, current_poly_point, axis 149 | ) 150 | result.append(intersection) 151 | elif d1 == _POINT_ON_PLANE: 152 | if not result or (not np.array_equal(result[-1], prev_poly_point)): 153 | result.append(prev_poly_point) 154 | 155 | result.append(current_poly_point) 156 | else: 157 | if d1 != _POINT_ON_PLANE: 158 | result.append(current_poly_point) 159 | 160 | if poly_in_plane: 161 | return poly 162 | else: 163 | return result 164 | 165 | def _intersect(self, plane, prev_point, current_point, axis): 166 | """Computes the intersection of a line with an axis-aligned plane. 167 | 168 | Args: 169 | plane: Formulated as two 3D points on the plane. 170 | prev_point: The point on the edge of the line. 171 | current_point: The other end of the line. 172 | axis: A tuple defining a 2D axis. 173 | 174 | Returns: 175 | A 3D point intersection of the poly edge with the plane. 176 | """ 177 | alpha = (current_point[axis] - plane[axis]) / ( 178 | current_point[axis] - prev_point[axis] 179 | ) 180 | # Compute the intersecting points using linear interpolation (lerp) 181 | intersection_point = alpha * prev_point + (1.0 - alpha) * current_point 182 | return intersection_point 183 | 184 | def _inside(self, plane, point, axis): 185 | """Check whether a given point is on a 2D plane.""" 186 | # Cross products to determine the side of the plane the point lie. 187 | x, y = axis 188 | u = plane[0] - point 189 | v = plane[1] - point 190 | 191 | a = u[x] * v[y] 192 | b = u[y] * v[x] 193 | return a >= b 194 | 195 | def _classify_point_to_plane(self, point, plane, normal, axis): 196 | """Classify position of a point w.r.t the given plane. 197 | 198 | See Real-Time Collision Detection, by Christer Ericson, page 364. 199 | 200 | Args: 201 | point: 3x1 vector indicating the point 202 | plane: 3x1 vector indicating a point on the plane 203 | normal: scalar (+1, or -1) indicating the normal to the vector 204 | axis: scalar (0, 1, or 2) indicating the xyz axis 205 | 206 | Returns: 207 | Side: which side of the plane the point is located. 208 | """ 209 | signed_distance = normal * (point[axis] - plane[axis]) 210 | if signed_distance > _PLANE_THICKNESS_EPSILON: 211 | return _POINT_IN_FRONT_OF_PLANE 212 | elif signed_distance < -_PLANE_THICKNESS_EPSILON: 213 | return _POINT_BEHIND_PLANE 214 | else: 215 | return _POINT_ON_PLANE 216 | 217 | @property 218 | def intersection_points(self): 219 | return self._intersection_points 220 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging 3 | ======= 4 | 5 | In principle, we recommend Python's official `logging` package for logging. 6 | This module contains utilities for logging setup. 7 | """ 8 | 9 | import logging 10 | import json 11 | import sys 12 | 13 | try: 14 | from typing import Any, Literal 15 | except: 16 | pass 17 | 18 | 19 | class _CustomFormatter(logging.Formatter): 20 | grey = "\x1b[38;20m" 21 | green = "\x1b[32;20m" 22 | yellow = "\x1b[33;20m" 23 | red = "\x1b[31;20m" 24 | bold_red = "\x1b[31;1m" 25 | reset = "\x1b[0m" 26 | FORMAT_SHORT = " %(asctime)s [%(name)s] %(message)s" 27 | FORMAT_LONG = " %(asctime)s [%(name)s] %(message)s (%(filename)s:%(lineno)d)" 28 | FILE_FORMAT = ( 29 | "%(levelname)s %(asctime)s [%(name)s] %(message)s (%(filename)s:%(lineno)d)" 30 | ) 31 | 32 | LEVEL_FORMATS = { 33 | logging.DEBUG: ">> " + grey + "%(levelname)s" + reset + FORMAT_LONG, 34 | logging.INFO: green + "%(levelname)s" + reset + FORMAT_SHORT, 35 | logging.WARNING: yellow + "%(levelname)s" + reset + FORMAT_LONG, 36 | logging.ERROR: red + "%(levelname)s" + reset + FORMAT_LONG, 37 | logging.CRITICAL: bold_red + "%(levelname)s" + reset + FORMAT_LONG, 38 | } 39 | 40 | mode: str 41 | 42 | def __init__(self, mode: 'Literal["terminal", "file"]' = "terminal") -> None: 43 | self.mode = mode 44 | super().__init__() 45 | 46 | def format(self, record): 47 | if self.mode == "terminal": 48 | level_fmt = self.LEVEL_FORMATS.get(record.levelno) 49 | formatter = logging.Formatter(level_fmt) 50 | else: 51 | formatter = logging.Formatter(self.FILE_FORMAT) 52 | 53 | return formatter.format(record) 54 | 55 | 56 | class _JsonFormatter(logging.Formatter): 57 | """ 58 | Formatter that outputs JSON strings after parsing the LogRecord. 59 | 60 | :param fmt_dict: Key: logging format attribute pairs. Defaults to ``{"message": "message"}``. 61 | :param time_format: time.strftime() format string. Default: "%Y-%m-%dT%H:%M:%S" 62 | :param msec_format: Microsecond formatting. Appended at the end. Default: "%s.%03dZ" 63 | """ 64 | 65 | def __init__( 66 | self, 67 | fmt_dict: dict = { 68 | "level": "levelname", 69 | "message": "message", 70 | "loggerName": "name", 71 | "processName": "processName", 72 | "processID": "process", 73 | "threadName": "threadName", 74 | "threadID": "thread", 75 | "timestamp": "asctime", 76 | }, 77 | time_format: str = "%Y-%m-%dT%H:%M:%S", 78 | msec_format: str = "%s.%03dZ", 79 | ): 80 | self.fmt_dict = fmt_dict if fmt_dict is not None else {"message": "message"} 81 | self.default_time_format = time_format 82 | self.default_msec_format = msec_format 83 | self.datefmt = None 84 | 85 | def usesTime(self) -> bool: 86 | """ 87 | Overwritten to look for the attribute in the format dict values instead of the fmt string. 88 | """ 89 | return "asctime" in self.fmt_dict.values() 90 | 91 | def formatMessage(self, record) -> dict: 92 | """ 93 | Overwritten to return a dictionary of the relevant LogRecord attributes instead of a string. 94 | KeyError is raised if an unknown attribute is provided in the fmt_dict. 95 | """ 96 | return { 97 | fmt_key: record.__dict__[fmt_val] 98 | for fmt_key, fmt_val in self.fmt_dict.items() 99 | } 100 | 101 | def format(self, record) -> str: 102 | """ 103 | Mostly the same as the parent's class method, the difference being that a dict is manipulated and dumped as JSON 104 | instead of a string. 105 | """ 106 | record.message = record.getMessage() 107 | 108 | if self.usesTime(): 109 | record.asctime = self.formatTime(record, self.datefmt) 110 | 111 | message_dict = self.formatMessage(record) 112 | 113 | if record.exc_info: 114 | # Cache the traceback text to avoid converting it multiple times 115 | # (it's constant anyway) 116 | if not record.exc_text: 117 | record.exc_text = self.formatException(record.exc_info) 118 | 119 | if record.exc_text: 120 | message_dict["exc_info"] = record.exc_text 121 | 122 | if record.stack_info: 123 | message_dict["stack_info"] = self.formatStack(record.stack_info) 124 | 125 | return json.dumps(message_dict, default=str) 126 | 127 | 128 | def setup_logging( 129 | log_mode: str, debug=False, logfile: "str | None" = None, json=True, delay=True 130 | ): 131 | """Setup a simple logging for Python's official ``logging`` package. 132 | 133 | :param log_mode: ``file`` for only logging to file, `terminal` for only logging to terminal, otherwise both are taken. 134 | :param debug: whether set loglevel to DEBUG (default is INFO) 135 | :param logfile: specify logging filename 136 | :param json: whether output json format log, whose file name is ``logfile + ".jl"``. 137 | :param delay: See `FileHandler `_ 138 | 139 | .. highlight:: python 140 | 141 | Usage:: 142 | 143 | # only logging to terminal, enabling debug level logging. 144 | setup_logging("terminal", debug=True) 145 | 146 | # only logging to runtime.log file and runtime.log.jl (json log) 147 | setup_logging("file", logfile="runtime.log") 148 | 149 | # logging to terminal and file without json 150 | setup_logging("both", logfile="runtime.log", json=false) 151 | 152 | """ 153 | while len(logging.root.handlers) > 0: 154 | logging.root.removeHandler(logging.root.handlers[0]) 155 | 156 | level = logging.INFO 157 | if debug: 158 | level = logging.DEBUG 159 | logging.root.setLevel(level=level) 160 | 161 | if log_mode != "file": 162 | handler = logging.StreamHandler(sys.stderr) 163 | handler.setLevel(level) 164 | handler.setFormatter(_CustomFormatter("terminal")) 165 | logging.root.addHandler(handler) 166 | 167 | if log_mode != "terminal": 168 | assert logfile is not None 169 | handler = logging.FileHandler(logfile, delay=delay) 170 | handler.setLevel(level) 171 | handler.setFormatter(_CustomFormatter("file")) 172 | logging.root.addHandler(handler) 173 | if json: 174 | handler = logging.FileHandler(logfile + ".jl", delay=delay) 175 | handler.setLevel(level) 176 | handler.setFormatter(_JsonFormatter()) 177 | logging.root.addHandler(handler) 178 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/obj_meta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data types for Object Metafile 3 | ============================== 4 | """ 5 | 6 | from dataclasses import dataclass 7 | from .rotation import SymLabel 8 | import json 9 | 10 | 11 | @dataclass 12 | class ObjectTag: 13 | datatype: str 14 | """Which split does this object belongs to (`train` or `test`)""" 15 | sceneChanger: bool 16 | """whether this object has a relatively large size that can be placed objects on""" 17 | symmetry: SymLabel 18 | """Symmetry label.""" 19 | 20 | # only for Omni6DPose generation 21 | materialOptions: "list[str]" 22 | 23 | # only for Omni6DPose generation 24 | upAxis: "list[str]" 25 | 26 | def __post_init__(self): 27 | if not isinstance(self.symmetry, SymLabel): 28 | self.symmetry = SymLabel(**self.symmetry) 29 | 30 | 31 | @dataclass 32 | class ObjectInfo: 33 | object_id: str 34 | """Global unique object id.""" 35 | 36 | source: str 37 | """Source dataset of this object. 38 | 39 | - `phocal`: the `PhoCaL `_ dataset. 40 | - `omniobject3d`: the `OmniObject3D `_ dataset. 41 | - `google_scan`: the `Google Scanned Objects `_ dataset. 42 | - `real`: supplementary scanned objects collected from real-world. 43 | """ 44 | 45 | # name of this object (deprecated, use object_id instead) 46 | name: str 47 | 48 | obj_path: str 49 | """path to the object mesh file, relative to the source dataset path""" 50 | 51 | tag: ObjectTag 52 | """See :class:`ObjectTag`""" 53 | 54 | class_label: int 55 | """class ID of this object""" 56 | 57 | class_name: str 58 | """class name of this object""" 59 | 60 | dimensions: "list[float]" 61 | """Bounding box size under object space (not NOCS)""" 62 | 63 | def __post_init__(self): 64 | if not isinstance(self.tag, ObjectTag): 65 | self.tag = ObjectTag(**self.tag) 66 | 67 | 68 | @dataclass 69 | class ClassListItem: 70 | name: str 71 | """Name of this class""" 72 | 73 | label: int 74 | """ID of this class""" 75 | 76 | instance_ids: "list[str]" 77 | """List of the `object_id` of the objects belonging to this class""" 78 | 79 | stat: dict 80 | 81 | 82 | @dataclass 83 | class ObjectMetaData: 84 | class_list: "list[ClassListItem]" 85 | """The complete list of class""" 86 | 87 | instance_dict: "dict[str, ObjectInfo]" 88 | """Objects indexed by `object_id`""" 89 | 90 | def __post_init__(self): 91 | if len(self.class_list) > 0 and not isinstance( 92 | self.class_list[0], ClassListItem 93 | ): 94 | self.class_list = [ClassListItem(**x) for x in self.class_list] 95 | if len(self.instance_dict) > 0 and not isinstance( 96 | next(iter(self.instance_dict.values())), ObjectInfo 97 | ): 98 | self.instance_dict = { 99 | k: ObjectInfo(**v) for k, v in self.instance_dict.items() 100 | } 101 | 102 | @staticmethod 103 | def load_json(path: str) -> "ObjectMetaData": 104 | """Load object meta data from json file""" 105 | 106 | with open(path, "r") as f: 107 | jsondata = json.load(f) 108 | 109 | return ObjectMetaData(**jsondata) 110 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/rotation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rotation and Symmetry 3 | ===================== 4 | 5 | You can add ``CUTOOP_STRICT_MODE=1`` to environment to enable a strict value assertion 6 | before calculating rotation error. 7 | 8 | SymLabel 9 | -------- 10 | 11 | .. autoclass:: SymLabel 12 | :show-inheritance: 13 | :members: 14 | :special-members: __str__ 15 | 16 | Rotation Manipulation 17 | --------------------- 18 | 19 | .. autofunction:: rot_canonical_sym 20 | 21 | .. autofunction:: rots_of_sym 22 | 23 | Rotation Error Computation 24 | -------------------------- 25 | 26 | .. autofunction:: rot_diff_axis 27 | 28 | .. autofunction:: rot_diff_sym 29 | 30 | .. autofunction:: rot_diff_theta 31 | 32 | .. autofunction:: rot_diff_theta_pointwise 33 | 34 | Prelude Rotations 35 | ----------------- 36 | 37 | .. autodata:: STANDARD_SYMMETRY 38 | 39 | .. autodata:: cube_group 40 | 41 | .. autodata:: cube_flip_group 42 | 43 | .. autodata:: qtr 44 | 45 | .. autodata:: ax 46 | 47 | .. autodata:: rot90 48 | 49 | .. autodata:: rot180 50 | 51 | """ 52 | 53 | try: # just for type hints 54 | from typing import Literal 55 | except: 56 | pass 57 | 58 | import numpy as np 59 | from numpy import ndarray 60 | from scipy.spatial.transform import Rotation as _Rotation 61 | from dataclasses import dataclass 62 | import copy 63 | import os 64 | 65 | 66 | @dataclass 67 | class SymLabel: 68 | """ 69 | Symmetry labels for real-world objects. 70 | 71 | Axis rotation details: 72 | 73 | - `any`: arbitrary rotation around this axis is ok 74 | - `half`: rotate 180 degrees along this axis (central symmetry) 75 | - `quarter`: rotate 90 degrees along this axis (like square) 76 | 77 | .. doctest:: 78 | 79 | >>> from cutoop.rotation import SymLabel 80 | >>> sym = SymLabel(any=False, x='any', y='none', z='none') 81 | >>> str(sym) 82 | 'x-cone' 83 | >>> sym = SymLabel(any=False, x='any', y='any', z='none') # two any produces 'any' 84 | >>> str(sym) 85 | 'any' 86 | >>> sym = SymLabel(any=False, x='half', y='half', z='half') 87 | >>> str(sym) 88 | 'box' 89 | 90 | """ 91 | 92 | any: bool # sphere 93 | """Whether arbitrary rotation is allowed""" 94 | x: "Literal['none', 'any', 'half', 'quarter']" 95 | """axis rotation for x""" 96 | y: "Literal['none', 'any', 'half', 'quarter']" 97 | """axis rotation for y""" 98 | z: "Literal['none', 'any', 'half', 'quarter']" 99 | """axis rotation for z""" 100 | 101 | def get_only(self, tag: 'Literal["any", "half", "quarter"]'): 102 | """Get the only axis marked with the tag. If multiple or none is find, return ``None``.""" 103 | ret: 'list[Literal["x", "y", "z"]]' = [] 104 | if self.x == tag: 105 | ret.append("x") 106 | if self.y == tag: 107 | ret.append("y") 108 | if self.z == tag: 109 | ret.append("z") 110 | if len(ret) != 1: 111 | return None 112 | return ret[0] 113 | 114 | @staticmethod 115 | def from_str(s: str) -> "SymLabel": 116 | """Construct symmetry from string. 117 | 118 | .. note:: See also :obj:`STANDARD_SYMMETRY`. 119 | """ 120 | if s in STANDARD_SYMMETRY: 121 | return copy.deepcopy(STANDARD_SYMMETRY[s]) 122 | else: 123 | raise ValueError(f"invalid symmetry: {s}") 124 | 125 | def __str__(self) -> str: 126 | """ 127 | For human readability, rotations are divided into the following types (names): 128 | 129 | - ``any``: arbitrary rotation is ok; 130 | - ``cube``: the same symmetry as a cube; 131 | - ``box``: the same symmetry as a box (flipping along x, y, and z axis); 132 | - ``none``: no symmetry is provided; 133 | - ``{x,y,z}-flip``: flip along a single axis; 134 | - ``{x,y,z}-square-tube``: the same symmetry as a square tube alone the axis; 135 | - ``{x,y,z}-square-pyramid``: the same symmetry as a pyramid alone the axis; 136 | - ``{x,y,z}-cylinder``: the same symmetry as a cylinder the axis; 137 | - ``{x,y,z}-cone``: the same symmetry as a cone the axis. 138 | 139 | """ 140 | c_any = (self.x == "any") + (self.y == "any") + (self.z == "any") 141 | c_quarter = ( 142 | (self.x == "quarter") + (self.y == "quarter") + (self.z == "quarter") 143 | ) 144 | c_half = (self.x == "half") + (self.y == "half") + (self.z == "half") 145 | 146 | if self.any or c_any > 1 or (c_any > 0 and c_quarter > 0): # any rotation is ok 147 | return "any" 148 | 149 | if c_any == 0: 150 | if c_quarter > 1: 151 | return "cube" # cube group 152 | elif c_quarter == 0: 153 | if c_half > 1: 154 | return "box" # cube_flip_group 155 | else: # one half or none 156 | axis = self.get_only("half") 157 | return f"{axis}-flip" if axis is not None else "none" 158 | else: # c_quarter == 1 159 | axis = self.get_only("quarter") 160 | if c_half > 0: 161 | return f"{axis}-square-tube" 162 | else: 163 | return f"{axis}-square-pyramid" 164 | else: 165 | assert c_any == 1 and c_quarter == 0 166 | axis = self.get_only("any") 167 | if c_half > 0: 168 | return f"{axis}-cylinder" 169 | else: 170 | return f"{axis}-cone" 171 | 172 | 173 | STANDARD_SYMMETRY = { 174 | "any": SymLabel(any=True, x="any", y="any", z="any"), 175 | "cube": SymLabel(any=False, x="quarter", y="quarter", z="quarter"), 176 | "box": SymLabel(any=False, x="half", y="half", z="half"), 177 | "none": SymLabel(any=False, x="none", y="none", z="none"), 178 | "x-flip": SymLabel(any=False, x="half", y="none", z="none"), 179 | "y-flip": SymLabel(any=False, x="none", y="half", z="none"), 180 | "z-flip": SymLabel(any=False, x="none", y="none", z="half"), 181 | "x-square-tube": SymLabel(any=False, x="quarter", y="half", z="half"), 182 | "y-square-tube": SymLabel(any=False, x="half", y="quarter", z="half"), 183 | "z-square-tube": SymLabel(any=False, x="half", y="half", z="quarter"), 184 | "x-square-pyramid": SymLabel(any=False, x="quarter", y="none", z="none"), 185 | "y-square-pyramid": SymLabel(any=False, x="none", y="quarter", z="none"), 186 | "z-square-pyramid": SymLabel(any=False, x="none", y="none", z="quarter"), 187 | "x-cylinder": SymLabel(any=False, x="any", y="half", z="half"), 188 | "y-cylinder": SymLabel(any=False, x="half", y="any", z="half"), 189 | "z-cylinder": SymLabel(any=False, x="half", y="half", z="any"), 190 | "x-cone": SymLabel(any=False, x="any", y="none", z="none"), 191 | "y-cone": SymLabel(any=False, x="none", y="any", z="none"), 192 | "z-cone": SymLabel(any=False, x="none", y="none", z="any"), 193 | } 194 | """ 195 | All standard symmetries. 196 | 197 | .. doctest:: 198 | 199 | >>> for name, sym in cutoop.rotation.STANDARD_SYMMETRY.items(): 200 | ... assert str(sym) == name, f"name: {name}, sym: {repr(sym)}" 201 | 202 | """ 203 | 204 | ax = { 205 | "x": np.array([1, 0, 0]), 206 | "y": np.array([0, 1, 0]), 207 | "z": np.array([0, 0, 1]), 208 | } 209 | """Normalized axis vectors.""" 210 | 211 | rot90 = { 212 | "x": _Rotation.from_rotvec(np.pi / 2 * ax["x"]).as_matrix(), 213 | "y": _Rotation.from_rotvec(np.pi / 2 * ax["y"]).as_matrix(), 214 | "z": _Rotation.from_rotvec(np.pi / 2 * ax["z"]).as_matrix(), 215 | } 216 | """90-degree rotation along each axis.""" 217 | 218 | rot180 = { 219 | "x": _Rotation.from_rotvec(np.pi * ax["x"]).as_matrix(), 220 | "y": _Rotation.from_rotvec(np.pi * ax["y"]).as_matrix(), 221 | "z": _Rotation.from_rotvec(np.pi * ax["z"]).as_matrix(), 222 | } 223 | """flipping along each axis""" 224 | 225 | qtr = { 226 | "x": [np.eye(3), rot90["x"], rot180["x"], rot90["x"].T], 227 | "y": [np.eye(3), rot90["y"], rot180["y"], rot90["y"].T], 228 | "z": [np.eye(3), rot90["z"], rot180["z"], rot90["z"].T], 229 | } 230 | """All transformations composed by 90-degree rotations along each axis.""" 231 | 232 | cube_flip_group: "list[ndarray]" = [np.eye(3), rot180["x"], rot180["y"], rot180["z"]] 233 | """ 234 | All 4 rotations of a box, as 3x3 matrices. 235 | That is rotating 180 degrees around x y z, respectively (abelian group). 236 | 237 | :meta hide-value: 238 | """ 239 | 240 | cube_group: "list[ndarray]" = [ 241 | s @ g @ h 242 | for s in [np.eye(3), rot90["x"]] 243 | for g in [rot90["x"] @ rot90["y"], rot90["x"] @ rot90["z"], np.eye(3)] 244 | for h in cube_flip_group 245 | ] 246 | """ 247 | All 24 rotations of a cube (Sym(4)). 248 | 249 | The correctness of this implementation lies in: 250 | 251 | 1. ``cube_flip_group`` is a normal subgroup of alternating group 252 | 2. alternating group Alt(4) is a normal subgroup of Sym(4) 253 | 254 | Thus we can construct Sym(4) by enumerating all cosets of Alt(4), 255 | which is construct by enumerating all cosets of ``cube_flip_group``. 256 | 257 | One can check validity of it by 258 | 259 | .. doctest:: 260 | 261 | >>> from cutoop.rotation import cube_group 262 | >>> for i in range(24): 263 | ... for j in range(24): 264 | ... if i < j: 265 | ... diff = cube_group[i] @ cube_group[j].T 266 | ... assert np.linalg.norm(diff - np.eye(3)) > 0.1 267 | 268 | :meta hide-value: 269 | """ 270 | 271 | 272 | def rot_diff_theta(rA_3x3: ndarray, rB_Nx3x3: ndarray) -> ndarray: 273 | """ 274 | Compute the difference angle of one rotation with a series of rotations. 275 | 276 | Note that since `arccos` gets quite steep around 0, the computational loss is 277 | somehow inevitable (around 1e-4). 278 | 279 | :return: theta (unit: radius) array of length N. 280 | """ 281 | R = rA_3x3 @ rB_Nx3x3.reshape(-1, 3, 3).transpose(0, 2, 1) 282 | val = (np.trace(R, axis1=1, axis2=2) - 1) / 2 283 | 284 | if int(os.environ.get("CUTOOP_STRICT_MODE", "0")) == 1: 285 | assert np.all(np.abs(val) < 1 + 1e-5), f"invalid rotation matrix, cos = {val}" 286 | theta = np.arccos(np.clip(val, -1, 1)) 287 | return theta 288 | 289 | 290 | def rot_diff_theta_pointwise(rA_Nx3x3: ndarray, rB_Nx3x3: ndarray) -> ndarray: 291 | """ 292 | compute the difference angle of two sequences of rotations pointwisely. 293 | 294 | :return: theta (unit: radius) array of length N 295 | """ 296 | R = np.einsum("ijk,ilk->ijl", rA_Nx3x3, rB_Nx3x3) # rA @ rB.T 297 | val = (np.trace(R, axis1=1, axis2=2) - 1) / 2 298 | 299 | if int(os.environ.get("CUTOOP_STRICT_MODE", "0")) == 1: 300 | assert np.all(np.abs(val) < 1 + 1e-5), f"invalid rotation matrix, cos = {val}" 301 | theta = np.arccos(np.clip(val, -1, 1)) 302 | return theta 303 | 304 | 305 | def rot_diff_axis(rA_3x3: ndarray, rB_Nx3x3: ndarray, axis: ndarray) -> ndarray: 306 | """compute the difference angle where rotation aroud axis is ignored. 307 | 308 | :param axis: One of :obj:`ax`. 309 | 310 | """ 311 | axis = axis.reshape(3) 312 | y1 = rA_3x3.reshape(3, 3) @ axis 313 | y2 = rB_Nx3x3.reshape(-1, 3, 3) @ axis 314 | val = y2.dot(y1) / (np.linalg.norm(y1) * np.linalg.norm(y2, axis=1)) 315 | 316 | if int(os.environ.get("CUTOOP_STRICT_MODE", "0")) == 1: 317 | assert np.all(np.abs(val) < 1 + 1e-5), f"invalid rotation matrix, cos = {val}" 318 | theta = np.arccos(np.clip(val, -1, 1)) 319 | return theta 320 | 321 | 322 | def rot_diff_sym(rA_3x3: ndarray, rB_3x3: ndarray, sym: SymLabel) -> float: 323 | """compute the difference angle (rotation error) with regard of symmetry. 324 | 325 | This function use analytic method to calculate the difference angle, 326 | which is more accurate than :func:`rot_canonical_sym`. 327 | 328 | :return: the difference angle. 329 | """ 330 | 331 | rA_3x3 = rA_3x3.reshape(3, 3) 332 | rB_3x3 = rB_3x3.reshape(3, 3) 333 | 334 | c_any = (sym.x == "any") + (sym.y == "any") + (sym.z == "any") 335 | symname = str(sym) 336 | 337 | if symname == "any": # any rotation is ok 338 | return 0 339 | elif c_any == 0: 340 | _, theta = rot_canonical_sym(rA_3x3, rB_3x3, sym, return_theta=True) 341 | return theta 342 | else: # c_any == 1 and c_quarter == 0 343 | assert c_any == 1 344 | axis = sym.get_only("any") 345 | vec = ax[axis] 346 | rB1 = rB_3x3[None, ...] 347 | if symname.endswith("cylinder"): 348 | half_axis = "x" if axis == "y" else "y" # any other axis is ok 349 | t1 = rot_diff_axis(rA_3x3 @ rot180[half_axis], rB1, vec.reshape(3))[0] 350 | t2 = rot_diff_axis(rA_3x3, rB1, vec.reshape(3))[0] 351 | theta = min(t1, t2) 352 | else: 353 | theta = rot_diff_axis(rA_3x3, rB1, vec.reshape(3))[0] 354 | 355 | return theta 356 | 357 | 358 | def rot_canonical_sym( 359 | rA_3x3: ndarray, rB_3x3: ndarray, sym: SymLabel, split=100, return_theta=False 360 | ) -> "ndarray | tuple[ndarray, float]": 361 | """Find the optimal rotation ``rot`` that minimize ``theta(rA, rB @ rot)``. 362 | 363 | :param rA_3x3: often the ground truth rotation. 364 | :param rB_3x3: often the predicted rotation. 365 | :param sym: symmetry label. 366 | :param split: see :func:`rots_of_sym`. 367 | :param return_theta: if enabled, a tuple of the ``rot`` and its theta will both 368 | be returned. 369 | 370 | :returns: the optimal ``rot`` 371 | """ 372 | 373 | if str(sym) == "any": 374 | return rB_3x3.T @ rA_3x3 375 | 376 | rots = rots_of_sym(sym, split=split) 377 | rBs = rB_3x3 @ rots 378 | thetas = rot_diff_theta(rA_3x3, rBs) 379 | index = np.argmin(thetas) 380 | if return_theta: 381 | return rots[index], thetas[index] 382 | else: 383 | return rots[index] 384 | 385 | 386 | def rots_of_sym(sym: SymLabel, split=20) -> ndarray: 387 | """Get a list of rotation group corresponding to the sym label. 388 | 389 | :param split: Set the snap of rotation to ``2 * pi / split`` for continuous 390 | symmetry. 391 | 392 | :return: ndarray of shape [?, 3, 3] containing a set of rotation matrix. 393 | """ 394 | snap = 2 * np.pi / split 395 | symname = str(sym) 396 | 397 | if symname == "any": 398 | rots = [] 399 | for sx in range(split): 400 | for sy in range(split): 401 | for sz in range(split): 402 | r = ( 403 | _Rotation.from_rotvec(ax["x"] * snap * sx) 404 | @ _Rotation.from_rotvec(ax["y"] * snap * sy) 405 | @ _Rotation.from_rotvec(ax["z"] * snap * sz) 406 | ) 407 | rots.append(r.as_matrix()) 408 | return np.array(rots) # this is extremely not efficient 409 | elif symname == "cube": 410 | return np.array(cube_group) 411 | elif symname == "box": 412 | return np.array(cube_flip_group) 413 | elif symname.endswith("flip"): 414 | return np.array([np.eye(3), rot180[symname[:1]]]) 415 | elif symname == "none": 416 | return np.array([np.eye(3)]) 417 | elif symname.endswith("square-tube"): 418 | return np.array([g @ h for g in qtr[symname[:1]][:2] for h in cube_flip_group]) 419 | elif symname.endswith("square-pyramid"): 420 | return np.array(qtr[symname[:1]]) 421 | else: 422 | axis = symname[:1] 423 | vec = ax[axis] # norm(vec) == 1 424 | rots = np.array( 425 | [_Rotation.from_rotvec(vec * snap * s).as_matrix() for s in range(split)] 426 | ) 427 | if symname.endswith("cylinder"): 428 | half_axis = "x" if axis == "y" else "y" # any other axis is ok 429 | rots = np.concatenate([rots, rots @ rot180[half_axis]], axis=0) 430 | # otherwise cone 431 | return rots 432 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/transform.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3D/2D Affine Transformation 3 | =========================== 4 | """ 5 | 6 | import numpy as np 7 | from numpy import ndarray 8 | 9 | from .data_types import CameraIntrinsicsBase 10 | 11 | 12 | def toAffine(scale_N: ndarray, rot_Nx3x3: ndarray, tr_Nx3: ndarray): 13 | """return affine transformation: [N, 4, 4]""" 14 | if scale_N.shape[0] == 0: 15 | return np.zeros((0, 4, 4)), np.zeros((0, 3)) 16 | rot_Nx3x3 = rot_Nx3x3.reshape(-1, 3, 3) * scale_N.reshape(-1, 1, 1) 17 | tr_Nx3 = tr_Nx3.reshape(-1, 3) 18 | res = np.repeat(np.eye(4)[np.newaxis, ...], rot_Nx3x3.shape[0], axis=0) 19 | res[:, :3, :3] = rot_Nx3x3 20 | res[:, :3, 3] = tr_Nx3 21 | return res 22 | 23 | 24 | def sRTdestruct(rts_Nx4x4: ndarray) -> "tuple[ndarray, ndarray, ndarray]": 25 | """Destruct multiple 4x4 affine transformation into scales, rotations and translations. 26 | 27 | :return: scales, rotations and translations each of shape [N], [N, 3, 3], [N, 3] 28 | """ 29 | if rts_Nx4x4.shape[0] == 0: 30 | return np.zeros(0), np.zeros((0, 3, 3)), np.zeros((0, 3)) 31 | scales = np.cbrt(np.linalg.det(rts_Nx4x4[:, :3, :3])) 32 | return ( 33 | scales, 34 | (rts_Nx4x4[:, :3, :3] / scales.reshape(-1, 1, 1)), 35 | rts_Nx4x4[:, :3, 3], 36 | ) 37 | 38 | 39 | def depth2xyz(depth_img: ndarray, intrinsics: CameraIntrinsicsBase): 40 | """Transform ``depth_img`` pixel value to 3D coordinates under cv space, using camera intrinsics. 41 | 42 | About different camera space: 43 | 44 | - cv space: x goes right, y goes down, and you look through the positive direction of z axis 45 | - blender camera space: x goes right, y goes up, and you look through the negative direction of z axis 46 | 47 | :param depth_img: 2D matrix with shape (H, W) 48 | :returns: coordinates of each pixel with shape (H, W, 3) 49 | """ 50 | 51 | # scale camera parameters 52 | h, w = depth_img.shape 53 | scale_x = w / intrinsics.width 54 | scale_y = h / intrinsics.height 55 | fx = intrinsics.fx * scale_x 56 | fy = intrinsics.fy * scale_y 57 | x_offset = intrinsics.cx * scale_x 58 | y_offset = intrinsics.cy * scale_y 59 | 60 | indices = np.indices((h, w), dtype=np.float32).transpose(1, 2, 0) 61 | z_e = depth_img 62 | x_e = (indices[..., 1] - x_offset) * z_e / fx 63 | y_e = (indices[..., 0] - y_offset) * z_e / fy 64 | xyz_img = np.stack([x_e, y_e, z_e], axis=-1) # Shape: [H x W x 3] 65 | return xyz_img 66 | 67 | 68 | def pixel2xyz(h: int, w: int, pixel: ndarray, intrinsics: CameraIntrinsicsBase): 69 | """ 70 | Transform `(pixel[0], pixel[1])` to normalized 3D vector under cv space, using camera intrinsics. 71 | 72 | :param h: height of the actual image 73 | :param w: width of the actual image 74 | """ 75 | 76 | # scale camera parameters 77 | scale_x = w / intrinsics.width 78 | scale_y = h / intrinsics.height 79 | fx = intrinsics.fx * scale_x 80 | fy = intrinsics.fy * scale_y 81 | x_offset = intrinsics.cx * scale_x 82 | y_offset = intrinsics.cy * scale_y 83 | 84 | x = (pixel[1] - x_offset) / fx 85 | y = (pixel[0] - y_offset) / fy 86 | vec = np.array([x, y, 1]) 87 | return vec / np.linalg.norm(vec) 88 | 89 | 90 | def transform_coordinates_3d(coordinates: ndarray, sRT: ndarray): 91 | """Apply 3D affine transformation to pointcloud. 92 | 93 | :param coordinates: ndarray of shape [3, N] 94 | :param sRT: ndarray of shape [4, 4] 95 | 96 | :returns: new pointcloud of shape [3, N] 97 | """ 98 | assert coordinates.shape[0] == 3 99 | coordinates = np.vstack( 100 | [coordinates, np.ones((1, coordinates.shape[1]), dtype=np.float32)] 101 | ) 102 | new_coordinates = sRT @ coordinates 103 | new_coordinates = new_coordinates[:3, :] / new_coordinates[3, :] 104 | return new_coordinates 105 | 106 | 107 | def calculate_2d_projections(coordinates_3d: ndarray, intrinsics_K: ndarray): 108 | """ 109 | :param coordinates_3d: [3, N] 110 | :param intrinsics_K: K matrix [3, 3] (the return value of :meth:`.data_types.CameraIntrinsicsBase.to_matrix`) 111 | 112 | :returns: projected_coordinates: [N, 2] 113 | """ 114 | projected_coordinates = intrinsics_K @ coordinates_3d 115 | projected_coordinates = projected_coordinates[:2, :] / projected_coordinates[2, :] 116 | projected_coordinates = projected_coordinates.transpose() 117 | projected_coordinates = np.array(projected_coordinates, dtype=np.int32) 118 | 119 | return projected_coordinates 120 | -------------------------------------------------------------------------------- /common/build/lib/cutoop/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper Functions 3 | ================ 4 | 5 | Common Helpers 6 | -------------- 7 | 8 | .. autofunction:: mock_time 9 | 10 | .. autofunction:: pool_map 11 | 12 | .. autofunction:: save_pctxt 13 | 14 | Visualization Helpers 15 | --------------------- 16 | 17 | .. autofunction:: draw_bboxes 18 | 19 | .. autofunction:: draw_text 20 | 21 | .. autofunction:: draw_object_label 22 | 23 | .. autofunction:: draw_3d_bbox 24 | 25 | .. autofunction:: draw_pose_axes 26 | 27 | """ 28 | 29 | from contextlib import contextmanager 30 | import logging 31 | import time 32 | from typing import Callable, TypeVar 33 | from tqdm import tqdm 34 | import multiprocessing.pool 35 | import cv2 36 | import numpy as np 37 | 38 | from .bbox import create_3d_bbox 39 | from .transform import ( 40 | calculate_2d_projections, 41 | transform_coordinates_3d, 42 | ) 43 | from .data_types import CameraIntrinsicsBase 44 | 45 | # used for drawing 46 | _bbox_coef = np.array( 47 | [ 48 | [1, 1, 1], 49 | [1, 1, -1], 50 | [-1, 1, 1], 51 | [-1, 1, -1], 52 | [1, -1, 1], 53 | [1, -1, -1], 54 | [-1, -1, 1], 55 | [-1, -1, -1], 56 | ] 57 | ) 58 | 59 | 60 | def _draw_seg( 61 | img: np.ndarray, 62 | start: np.ndarray, 63 | end: np.ndarray, 64 | start_color: np.ndarray, 65 | end_color: "np.ndarray | None" = None, 66 | thickness_coef=1, 67 | ): 68 | if end_color is None: 69 | end_color = start_color 70 | 71 | # automatically adjust line thickness according to image resolution 72 | thickness = round(img.shape[1] / 320 * thickness_coef) 73 | steps = 10 74 | step = (end - start) / steps 75 | s_step = (end_color - start_color) / steps 76 | for i in range(steps): 77 | x, y = start + (i * step), start + ((i + 1) * step) 78 | sx = start_color + ((i + 0.5) * s_step) 79 | x = tuple(x.astype(int)) 80 | y = tuple(y.astype(int)) 81 | sx = tuple(sx.astype(int)) 82 | # set_trace() 83 | img = cv2.line( 84 | np.ascontiguousarray(img.copy()), 85 | x, 86 | y, 87 | (int(sx[0]), int(sx[1]), int(sx[2])), 88 | thickness, 89 | ) 90 | 91 | return img 92 | 93 | 94 | def draw_bboxes(img, projected_bbox_Nx2, transformed_bbox_Nx3, color=None, thickness=1): 95 | """ 96 | :param color: can be a 3-element array/tuple. 97 | """ 98 | if color is None: 99 | colors = (_bbox_coef * 255 + 255) / 2 100 | else: 101 | colors = np.tile(np.array(color).reshape(3), [8, 1]) 102 | 103 | projected_bbox_Nx2 = np.int32(projected_bbox_Nx2).reshape(-1, 2) 104 | lines = [ 105 | [4, 5], 106 | [5, 7], 107 | [6, 4], 108 | [7, 6], 109 | [0, 4], 110 | [1, 5], 111 | [2, 6], 112 | [3, 7], 113 | [0, 1], 114 | [1, 3], 115 | [2, 0], 116 | [3, 2], 117 | ] 118 | # sort by distance 119 | lines.sort( 120 | reverse=True, 121 | key=lambda x: ( 122 | (transformed_bbox_Nx3[x[0]] + transformed_bbox_Nx3[x[1]]) ** 2 123 | ).sum(), 124 | ) 125 | for i, j in lines: 126 | img = _draw_seg( 127 | img, 128 | projected_bbox_Nx2[i], 129 | projected_bbox_Nx2[j], 130 | colors[i], 131 | colors[j], 132 | thickness_coef=thickness, 133 | ) 134 | 135 | return img 136 | 137 | 138 | def draw_text(img: np.ndarray, text: str, pos: np.ndarray) -> np.ndarray: 139 | """draw black text with red border""" 140 | text_args = { 141 | "fontFace": cv2.FONT_HERSHEY_SIMPLEX, 142 | "fontScale": 0.4 * img.shape[1] / 640, 143 | "lineType": cv2.LINE_AA, 144 | } 145 | img = cv2.putText( 146 | img, str(text), org=pos, color=(255, 0, 0), thickness=6, **text_args 147 | ) 148 | img = cv2.putText( 149 | img, str(text), org=pos, color=(0, 0, 0), thickness=1, **text_args 150 | ) 151 | return img 152 | 153 | 154 | def draw_3d_bbox( 155 | img: np.ndarray, 156 | intrinsics: CameraIntrinsicsBase, 157 | sRT_4x4: np.ndarray, 158 | bbox_side_len: "list[float]", 159 | color=None, 160 | thickness=1, 161 | ) -> np.ndarray: 162 | """Visualize predicted 3D bounding box. 163 | 164 | :param color: See :func:`draw_bboxes`. 165 | 166 | :return: The image with drawn bounding box. 167 | """ 168 | 169 | # set_trace() 170 | matK = intrinsics.to_matrix() 171 | h, w = img.shape[0], img.shape[1] 172 | scale_y = h / intrinsics.height 173 | scale_x = w / intrinsics.width 174 | scale_2d = np.array([scale_x, scale_y]) 175 | 176 | bbox_3d = create_3d_bbox(bbox_side_len) 177 | transformed_bbox_Nx3 = transform_coordinates_3d(bbox_3d, sRT_4x4) 178 | projected_bbox = calculate_2d_projections(transformed_bbox_Nx3, matK) 179 | projected_bbox = np.array(projected_bbox * scale_2d[None, ...], dtype=np.int32) 180 | img = draw_bboxes( 181 | img, projected_bbox, transformed_bbox_Nx3.T, color=color, thickness=thickness 182 | ) 183 | 184 | return img 185 | 186 | 187 | def draw_pose_axes( 188 | img: np.ndarray, 189 | intrinsics: CameraIntrinsicsBase, 190 | sRT_4x4: np.ndarray, 191 | length: float, 192 | thickness=1, 193 | ) -> np.ndarray: 194 | """Visualize predicted pose. The _XYZ_ axes are colored by _RGB_ respectively. 195 | 196 | :return: The image with drawn pose axes. 197 | """ 198 | 199 | # set_trace() 200 | matK = intrinsics.to_matrix() 201 | h, w = img.shape[0], img.shape[1] 202 | scale_y = h / intrinsics.height 203 | scale_x = w / intrinsics.width 204 | scale_2d = np.array([scale_x, scale_y]) 205 | 206 | axes_ends = transform_coordinates_3d( 207 | np.concatenate( 208 | [np.zeros((3, 1)), np.diag([length, length, length]) / 2], axis=1 209 | ), 210 | sRT_4x4, 211 | ) 212 | origin, ax, ay, az = np.array( 213 | calculate_2d_projections(axes_ends, matK) * scale_2d[None, ...], 214 | dtype=np.int32, 215 | ) 216 | img = _draw_seg(img, origin, ax, np.array([255, 0, 0]), thickness_coef=thickness) 217 | img = _draw_seg(img, origin, ay, np.array([0, 255, 0]), thickness_coef=thickness) 218 | img = _draw_seg(img, origin, az, np.array([0, 0, 255]), thickness_coef=thickness) 219 | 220 | return img 221 | 222 | 223 | def draw_object_label( 224 | img: np.ndarray, intrinsics: CameraIntrinsicsBase, sRT_4x4: np.ndarray, label: str 225 | ) -> np.ndarray: 226 | """draw label text at the center of the object. 227 | 228 | :return: The image with drawn object label. 229 | """ 230 | 231 | matK = intrinsics.to_matrix() 232 | h, w = img.shape[0], img.shape[1] 233 | scale_y = h / intrinsics.height 234 | scale_x = w / intrinsics.width 235 | scale_2d = np.array([scale_x, scale_y]) 236 | 237 | pos3d = sRT_4x4[:3, 3].reshape(3, 1) 238 | pos = np.array(calculate_2d_projections(pos3d, matK)[0] * scale_2d, dtype=np.int32) 239 | 240 | img = draw_text(img, label, pos) 241 | 242 | return img 243 | 244 | 245 | @contextmanager 246 | def mock_time( 247 | fmt: str, logger: "logging.Logger | None" = None, use_print=False, debug=False 248 | ): 249 | """Record time usage. 250 | 251 | :param fmt: info print format 252 | :param use_print: use `print` instead of `logger.info` to display information 253 | :param logger: specify logger. Defaults to the ``'timer' logger of root``. 254 | :param debug: logging using debug level instead of info level. 255 | 256 | .. testsetup:: 257 | 258 | from cutoop.utils import mock_time 259 | 260 | Usage: 261 | 262 | .. testcode:: 263 | 264 | with mock_time("action took %.4fs"): 265 | # some heavy operation 266 | pass 267 | 268 | """ 269 | if logger == None: 270 | logger = logging.root.getChild("timer") 271 | 272 | start = time.perf_counter() 273 | yield 274 | stop = time.perf_counter() 275 | 276 | if use_print: 277 | print(fmt % (stop - start)) 278 | elif debug: 279 | logger.debug(fmt, stop - start) 280 | else: 281 | logger.info(fmt, stop - start) 282 | 283 | 284 | def save_pctxt( 285 | file: str, 286 | pts: np.ndarray, 287 | rgb: "np.ndarray | None" = None, 288 | normal: "np.ndarray | None" = None, 289 | ): 290 | """Save point cloud array to file. 291 | 292 | :param pts: point cloud array with shape (n, 3) 293 | :param file: dest file name 294 | :param rgb: optional rgb data with shape (n, 3), **conflicting with normal**. 295 | The result format would be ``X Y Z R G B`` for one line. 296 | :param normal: optional normal data with shape (n, 3), **conflicting with rgb**. 297 | The result format would be ``X Y Z NX NY NZ`` for one line. 298 | """ 299 | x, y, z = pts.T 300 | if rgb is not None: 301 | r, g, b = rgb.T 302 | with open(file, "w") as f: 303 | for i in range(x.shape[0]): 304 | f.write(f"{x[i]} {y[i]} {z[i]} {r[i]} {g[i]} {b[i]}\n") 305 | elif normal is not None: 306 | r, g, b = normal.T 307 | with open(file, "w") as f: 308 | for i in range(x.shape[0]): 309 | f.write(f"{x[i]} {y[i]} {z[i]} {r[i]} {g[i]} {b[i]}\n") 310 | else: 311 | np.savetxt(file, pts, fmt="%.6f") 312 | 313 | 314 | _R = TypeVar("_R") 315 | 316 | 317 | def pool_map( 318 | func: Callable[..., _R], items: list, processes=8, use_tqdm=True, desc="pool map" 319 | ) -> "list[_R]": 320 | """Thread pooling version of `map`. Equivalent to ``map(func, items)``, except that 321 | the argument should be a list (implementing ``__len__``) instead of an iterator. 322 | 323 | :param processes: number of threads. 324 | :param use_tqdm: whether to display a tqdm bar. 325 | :param desc: set the title of tqdm progress bar if ``use_tqdm`` is set. 326 | 327 | .. doctest:: 328 | 329 | >>> def square(x): 330 | ... return x * x 331 | >>> cutoop.utils.pool_map(square, list(range(10))) 332 | [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] 333 | 334 | """ 335 | 336 | def run(pair): 337 | id, arg = pair 338 | return id, func(arg) 339 | 340 | with multiprocessing.pool.ThreadPool(processes=processes) as pool: 341 | mapper = pool.imap_unordered(run, enumerate(items)) 342 | ret = [None] * len(items) 343 | if use_tqdm: 344 | mapper = tqdm(mapper, total=len(items), desc=f"{desc} (proc={processes})") 345 | for id, r in mapper: 346 | ret[id] = r 347 | return ret 348 | -------------------------------------------------------------------------------- /common/cutoop.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: cutoop 3 | Version: 0.1.0 4 | Summary: Common UTilities for Omni6DPose 5 | Author: Weiyao Huang 6 | Author-email: jy.cat@qq.com 7 | License: MIT 8 | Keywords: post_estimation 9 | Classifier: Programming Language :: Python :: 3 10 | Requires-Python: >=3.7 11 | Description-Content-Type: text/x-rst 12 | License-File: LICENSE.rst 13 | Requires-Dist: importlib-metadata; python_version < "3.10" 14 | Requires-Dist: numpy>=1.21 15 | Requires-Dist: ipdb>=0.13.9 16 | Requires-Dist: matplotlib>=3.5 17 | Requires-Dist: opencv_python>=4.8.0.76 18 | Requires-Dist: Pillow>=8.3 19 | Requires-Dist: rich>=13.7.0 20 | Requires-Dist: scipy>=1.7.3 21 | Requires-Dist: tqdm>=4.65.2 22 | Requires-Dist: typing_extensions>=4.7.1 23 | 24 | Common UTilities for Omni6DPose 25 | =============================== 26 | 27 | **cutoop** (pronounced ``/'kʌtup/``) is the official package containing convenient 28 | utilities for data preparation and model evaluation over the Omni6DPose 29 | dataset. 30 | 31 | Visit `Omni6DPose `_ for details. 32 | 33 | v0.1.0 34 | ====== 35 | 36 | - Initial release 37 | 38 | MIT License 39 | 40 | Copyright (c) 2024 Weiyao Huang 41 | 42 | Permission is hereby granted, free of charge, to any person obtaining a copy 43 | of this software and associated documentation files (the "Software"), to deal 44 | in the Software without restriction, including without limitation the rights 45 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 46 | copies of the Software, and to permit persons to whom the Software is 47 | furnished to do so, subject to the following conditions: 48 | 49 | The above copyright notice and this permission notice shall be included in all 50 | copies or substantial portions of the Software. 51 | 52 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 53 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 54 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 55 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 56 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 57 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 58 | SOFTWARE. 59 | -------------------------------------------------------------------------------- /common/cutoop.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | CHANGELOG.rst 2 | LICENSE.rst 3 | README.rst 4 | setup.cfg 5 | setup.py 6 | ./__init__.py 7 | ./align.py 8 | ./bbox.py 9 | ./data_loader.py 10 | ./data_types.py 11 | ./eval_utils.py 12 | ./image_meta.py 13 | ./log.py 14 | ./obj_meta.py 15 | ./rotation.py 16 | ./transform.py 17 | ./utils.py 18 | ./iou/__init__.py 19 | ./iou/box.py 20 | ./iou/iou.py 21 | cutoop.egg-info/PKG-INFO 22 | cutoop.egg-info/SOURCES.txt 23 | cutoop.egg-info/dependency_links.txt 24 | cutoop.egg-info/requires.txt 25 | cutoop.egg-info/top_level.txt -------------------------------------------------------------------------------- /common/cutoop.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /common/cutoop.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.21 2 | ipdb>=0.13.9 3 | matplotlib>=3.5 4 | opencv_python>=4.8.0.76 5 | Pillow>=8.3 6 | rich>=13.7.0 7 | scipy>=1.7.3 8 | tqdm>=4.65.2 9 | typing_extensions>=4.7.1 10 | 11 | [:python_version < "3.10"] 12 | importlib-metadata 13 | -------------------------------------------------------------------------------- /common/cutoop.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | cutoop 2 | -------------------------------------------------------------------------------- /common/data_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset and Loading Functions 3 | ============================= 4 | """ 5 | 6 | from functools import partial 7 | import glob 8 | import os 9 | from PIL import Image as _Image 10 | import warnings 11 | 12 | from .image_meta import ImageMetaData 13 | from .align import estimateSimilarityTransform 14 | from .transform import depth2xyz 15 | from .utils import draw_3d_bbox, draw_object_label, draw_pose_axes 16 | from .data_types import Pose 17 | from .image_meta import ImageMetaData 18 | 19 | os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" # before importing cv2 20 | import cv2 21 | import numpy as np 22 | 23 | 24 | class Dataset(object): 25 | """This class can be used as a base class for dataloader construction.""" 26 | 27 | @staticmethod 28 | def load_meta(path: str) -> ImageMetaData: 29 | """See :meth:`.image_meta.ImageMetaData.load_json`.""" 30 | return ImageMetaData.load_json(path) 31 | 32 | @staticmethod 33 | def load_color(path: str) -> np.ndarray: 34 | """Load RGB image in **RGB** order.""" 35 | data = cv2.imread(path)[:, :, ::-1] # RGB order 36 | return data 37 | 38 | @staticmethod 39 | def load_coord(path: str) -> np.ndarray: 40 | """Read NOCS image (PNG). This function does the following things: 41 | 42 | 1. Read, normalize, and transform the image into **RGB** order. 43 | 2. Due to historical reasons, the ``B`` channel is transformed to ``1 - B`` after 44 | reading the image, which is a common transformation for other NOCS image in prior 45 | datasets. 46 | 3. Minus `0.5` to change the range of pixel values from `[0, 1]` to `[-0.5, 0.5]`. 47 | 48 | :return: float array of shape (Height, Width) ranged [-0.5, 0.5], denothing xyz coordinates in NOCS space 49 | """ 50 | # set_trace() 51 | img = cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255 52 | # https://stackoverflow.com/questions/50963283/imshow-doesnt-need-convert-from-bgr-to-rgb 53 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 54 | img = img.clip(0, 1) 55 | img[:, :, 2] = 1 - img[:, :, 2] # 1 - z 56 | 57 | return img - 0.5 58 | 59 | @staticmethod 60 | def load_ir(path: str) -> np.ndarray: 61 | """Load the rendered IR image""" 62 | data = _Image.open(path).convert("L") 63 | return np.array(data) 64 | 65 | @staticmethod 66 | def load_depth(path: str) -> np.ndarray: 67 | """ 68 | This function read the depth image, selecting the first channel if multiple 69 | channels are detected. 70 | 71 | :return: A 2D float array of shape (Height, Width). For Omni6DPose, 72 | the unit of pixel value is meter. 73 | """ 74 | img = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 75 | if len(img.shape) == 3: 76 | img = img[:, :, 0] 77 | return img # unit: m 78 | 79 | @staticmethod 80 | def load_mask(path: str) -> np.ndarray: 81 | """Load the mask image. 82 | 83 | :return: uint8 array of shape (Height, Width), whose values are related 84 | to the objects' mask ids (:attr:`.image_meta.ObjectPoseInfo.mask_id`). 85 | """ 86 | img = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 87 | if len(img.shape) == 3: 88 | img = img[:, :, 2] 89 | return np.array(img * 255, dtype=np.uint8) 90 | 91 | @staticmethod 92 | def load_mask_sam(path: str) -> "tuple[np.ndarray, np.ndarray]": 93 | """Load the mask generated by SAM. 94 | 95 | :return: (masks, mask_ids) where masks is bool array of shape (n_objects, Height, Width) 96 | denoting the binary mask of each objects corresponds to mask_ids, whose shape is (n_objects,). 97 | """ 98 | data = np.load(path) 99 | masks = data["masks"] 100 | mask_ids = data["mask_ids"] 101 | return masks, mask_ids 102 | 103 | @staticmethod 104 | def load_normal(path: str) -> np.ndarray: 105 | """Read normal image (exr) 106 | 107 | :return: float array of shape (H, W, 3) ranged [-1, 1] containing normal vectors 108 | """ 109 | img = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 110 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 111 | img[:, :, 0] = 0 - img[:, :, 0] # x => -x 112 | return img 113 | 114 | LOADER_MAP = { 115 | "color": ("color.png", load_color), 116 | "coord": ("coord.png", load_coord), 117 | "ir_l": ("ir_l.png", load_ir), 118 | "ir_r": ("ir_r.png", load_ir), 119 | "depth": ("depth.exr", load_depth), 120 | "depth_syn": ("depth_syn.exr", load_depth), 121 | "mask": ("mask.exr", load_mask), 122 | "mask_sam": ("mask_sam.npz", load_mask_sam), 123 | "normal": ("normal.exr", load_normal), 124 | "meta": ("meta.json", load_meta), 125 | } 126 | 127 | def visualize_by( 128 | vis_img: np.ndarray, 129 | prefix: str, 130 | show_id=True, 131 | show_box=True, 132 | show_nocs_pred=False, 133 | show_axes_len: "float | None" = None, 134 | ): 135 | metadata = Dataset.load_meta(prefix + "meta.json") 136 | masks = Dataset.load_mask(prefix + "mask.exr") 137 | intrinsics = metadata.camera.intrinsics 138 | depth_pts = depth2xyz(Dataset.load_depth(prefix + "depth.exr"), intrinsics) 139 | if show_nocs_pred: 140 | coords = Dataset.load_coord(prefix + "coord.png") 141 | # set_trace() 142 | 143 | bbox_task = [] 144 | label_task = [] 145 | for i, data in enumerate(metadata.objects): 146 | idx = tuple(np.argwhere(masks == data.mask_id).T) 147 | pts = depth_pts[idx] 148 | if show_nocs_pred: 149 | coord = coords[idx] 150 | 151 | bbox_side_len = data.meta.bbox_side_len 152 | obj_pose = Pose( 153 | quaternion=data.quaternion_wxyz, translation=data.translation 154 | ) 155 | if show_box: 156 | bbox_task.append( 157 | partial( 158 | draw_3d_bbox, 159 | intrinsics=intrinsics, 160 | sRT_4x4=obj_pose.to_affine(), 161 | bbox_side_len=bbox_side_len, 162 | ) 163 | ) 164 | if show_axes_len is not None: 165 | bbox_task.append( 166 | partial( 167 | draw_pose_axes, 168 | intrinsics=intrinsics, 169 | sRT_4x4=obj_pose.to_affine(), 170 | length=show_axes_len, 171 | ) 172 | ) 173 | 174 | # set_trace() 175 | if show_nocs_pred and show_box: 176 | _, _, _, pred_sRT = estimateSimilarityTransform(coord, pts) 177 | bbox_task.append( 178 | partial( 179 | draw_3d_bbox, 180 | intrinsics=intrinsics, 181 | sRT_4x4=pred_sRT, 182 | bbox_side_len=2 * np.amax(np.abs(coord), axis=0), 183 | ) 184 | ) 185 | if show_id: 186 | label_task.append( 187 | partial( 188 | draw_object_label, 189 | intrinsics=intrinsics, 190 | sRT_4x4=obj_pose.to_affine(), 191 | label=str(i), 192 | ) 193 | ) 194 | 195 | for filter in bbox_task: 196 | vis_img = filter(img=vis_img) 197 | for filter in label_task: 198 | vis_img = filter(img=vis_img) 199 | 200 | return vis_img 201 | 202 | def visualize( 203 | prefix: str, 204 | out_path="./visual.png", 205 | show_id=True, 206 | show_box=True, 207 | show_nocs_pred=False, 208 | show_axes_len=None, 209 | ): 210 | """A convenient helper for data visualization. 211 | 212 | .. doctest:: 213 | 214 | >>> cutoop.data_loader.Dataset.visualize( 215 | ... prefix="../../misc/sample_real/000000_", 216 | ... out_path="source/_static/gr_5.png", 217 | ... ) 218 | 219 | .. image:: _static/gr_5.png 220 | :align: center 221 | """ 222 | color = Dataset.load_color(prefix + "color.png") 223 | vis_img = Dataset.visualize_by( 224 | color, 225 | prefix, 226 | show_id=show_id, 227 | show_box=show_box, 228 | show_nocs_pred=show_nocs_pred, 229 | show_axes_len=show_axes_len, 230 | ) 231 | vis_img = cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR) 232 | cv2.imwrite(out_path, vis_img) 233 | 234 | @staticmethod 235 | def glob_prefix(root: str) -> "list[str]": 236 | """Recursively find the prefix list of data (by RGB image). 237 | 238 | Since we use the `glob` package for image searching, you may use the asterisk 239 | symbol ``*`` for pattern matching. However, since we internally use the ``**`` 240 | pattern, you're not encouraged to use ``**`` to avoid duplicated prefix outputs. 241 | 242 | :param root: the root directory for searching. 243 | 244 | .. doctest:: 245 | 246 | >>> cutoop.data_loader.Dataset.glob_prefix("../../misc") 247 | ['../../misc/sample/0000_', '../../misc/sample/0001_', '../../misc/sample/0002_', '../../misc/sample_real/000000_'] 248 | >>> cutoop.data_loader.Dataset.glob_prefix("../../*/sample") 249 | ['../../misc/sample/0000_', '../../misc/sample/0001_', '../../misc/sample/0002_'] 250 | 251 | """ 252 | dirs = [root] 253 | prefixes: list[str] = [] 254 | for dir in dirs: 255 | prefs = [ 256 | path[:-9] 257 | for path in glob.glob( 258 | os.path.join(dir, "**/*_color.png"), recursive=True 259 | ) 260 | ] 261 | prefixes.extend(prefs) 262 | return sorted(prefixes) 263 | -------------------------------------------------------------------------------- /common/data_types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Types 3 | ========== 4 | 5 | Data types are defined to unify the parameter types of functions. 6 | Moreover, they're able to be stored to and retrieved from JSON file. 7 | """ 8 | 9 | from dataclasses import dataclass 10 | import numpy as np 11 | from scipy.spatial.transform import Rotation as _Rot 12 | 13 | 14 | @dataclass 15 | class Pose(object): 16 | """Object/Camera pose representation""" 17 | 18 | quaternion: "tuple[float, float, float, float]" 19 | """quaternion in scale-first format (wxyz)""" 20 | 21 | translation: "tuple[float, float, float]" 22 | """translation from object (centered) space to camera space""" 23 | 24 | def __post_init__(self): 25 | assert len(self.quaternion) == 4 26 | assert len(self.translation) == 3 27 | 28 | def to_affine(self, scale=None): 29 | """transform to affine transformation (with no additional scaling) 30 | 31 | :return: 4x4 numpy array. 32 | 33 | Here's an example of getting :class:`Pose` from rotation matrix: 34 | 35 | .. doctest:: 36 | 37 | >>> from cutoop.data_types import Pose 38 | >>> from scipy.spatial.transform import Rotation 39 | >>> x, y, z, w = Rotation.from_matrix([ 40 | ... [ 0., 0., 1.], 41 | ... [ 0., 1., 0.], 42 | ... [-1., 0., 0.] 43 | ... ]).as_quat() 44 | >>> pose = Pose(quaternion=[w, x, y, z], translation=[1, 1, 1]) 45 | >>> pose.to_affine() 46 | array([[ 0., 0., 1., 1.], 47 | [ 0., 1., 0., 1.], 48 | [-1., 0., 0., 1.], 49 | [ 0., 0., 0., 1.]], dtype=float32) 50 | """ 51 | q = self.quaternion 52 | rot = _Rot.from_quat([q[1], q[2], q[3], q[0]]).as_matrix() 53 | if scale is not None: 54 | rot = rot * scale 55 | trans = np.array(self.translation) 56 | mtx = np.eye(4).astype(np.float32) 57 | mtx[:3, :3] = rot 58 | mtx[:3, 3] = trans 59 | return mtx 60 | 61 | 62 | @dataclass 63 | class CameraIntrinsicsBase: 64 | """Camera intrinsics data. 65 | The unit of ``fx, fy, cx, cy, width, height`` are all pixels. 66 | """ 67 | 68 | fx: float # unit: pixel 69 | fy: float # unit: pixel 70 | cx: float # unit: pixel 71 | cy: float # unit: pixel 72 | width: float # unit: pixel 73 | height: float # unit: pixel 74 | 75 | def to_matrix(self): 76 | """Transform to 3x3 K matrix. i. e.:: 77 | 78 | [[fx, 0, cx], 79 | [0, fy, cy], 80 | [0, 0, 1 ]] 81 | 82 | :return: 3x3 numpy array. 83 | """ 84 | fx = self.fx 85 | fy = self.fy 86 | cx = self.cx 87 | cy = self.cy 88 | return np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) 89 | 90 | def fov_x(self): 91 | return np.rad2deg(2 * np.arctan2(self.width, 2 * self.fx)) 92 | -------------------------------------------------------------------------------- /common/dist/cutoop-0.1.0-py3.10.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Omni6DPose/Omni6DPoseAPI/c1ed906eed2345259817b85bc9d852e302c4a730/common/dist/cutoop-0.1.0-py3.10.egg -------------------------------------------------------------------------------- /common/dist/cutoop-0.1.0-py3.8.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Omni6DPose/Omni6DPoseAPI/c1ed906eed2345259817b85bc9d852e302c4a730/common/dist/cutoop-0.1.0-py3.8.egg -------------------------------------------------------------------------------- /common/image_meta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data types for Image Metafile 3 | ============================= 4 | 5 | .. testsetup:: 6 | 7 | from cutoop.image_meta import ImageMetaData 8 | 9 | """ 10 | 11 | from dataclasses import dataclass 12 | import json 13 | 14 | from .data_types import CameraIntrinsicsBase, Pose 15 | 16 | 17 | @dataclass 18 | class ViewInfo(Pose): 19 | intrinsics: CameraIntrinsicsBase 20 | """Camera intrinsics""" 21 | scene_obj_path: str 22 | """Scene object mesh path""" 23 | 24 | background_image_path: str 25 | background_depth_path: str 26 | distances: "list[float]" 27 | kind: str 28 | 29 | def __post_init__(self): 30 | if not isinstance(self.intrinsics, CameraIntrinsicsBase): 31 | self.intrinsics = CameraIntrinsicsBase(**self.intrinsics) 32 | 33 | 34 | @dataclass 35 | class ObjectMetaInfo: 36 | oid: str 37 | """Object ID, which is used to index object metadata""" 38 | class_name: str 39 | """Class name of the object""" 40 | class_label: int 41 | """1-indexed class label of the object""" 42 | instance_path: str 43 | """Path to the model mesh file""" 44 | scale: "list[float]" 45 | """Scale from `object space` (not NOCS) to the `camera space`. 46 | For the size of the object, refer to the object meta file.""" 47 | is_background: bool 48 | """Whether it is marked as a background object.""" 49 | 50 | # bounding box side len after scaling (deprecated) 51 | # should be equal to np.array(obj.meta.scale) * np.array( 52 | # objmeta.instance_dict[obj.meta.oid].dimensions 53 | # ) 54 | bbox_side_len: "list[float]" 55 | 56 | 57 | @dataclass 58 | class ObjectPoseInfo: 59 | mask_id: int 60 | """the value identifying this object in mask image""" 61 | meta: ObjectMetaInfo 62 | """object meta information.""" 63 | quaternion_wxyz: "list[float]" 64 | """object rotation in camera space""" 65 | translation: "list[float]" 66 | """object translation in camera space""" 67 | is_valid: bool 68 | """Whether the object meet requirements (faceup, reasonable real-world location)""" 69 | 70 | # object id in image (from 1 to the number of visible objects, deprecated) 71 | id: int 72 | # for data generation 73 | material: "list[str]" 74 | world_quaternion_wxyz: "list[float]" 75 | world_translation: "list[float]" 76 | 77 | def __post_init__(self): 78 | if not isinstance(self.meta, ObjectMetaInfo): 79 | self.meta = ObjectMetaInfo(**self.meta) 80 | 81 | def pose(self) -> Pose: 82 | """Construct :class:`Pose` relative to the camera coordinate.""" 83 | return Pose(quaternion=self.quaternion_wxyz, translation=self.translation) 84 | 85 | 86 | @dataclass 87 | class ImageMetaData: 88 | objects: "list[ObjectPoseInfo]" 89 | """A list of visiable objects""" 90 | camera: ViewInfo 91 | """Information of the scene""" 92 | scene_dataset: str 93 | """Dataset source of the scene""" 94 | 95 | # for data generation 96 | env_param: dict 97 | face_up: bool 98 | concentrated: bool 99 | comments: str 100 | runtime_seed: int 101 | baseline_dis: int 102 | emitter_dist_l: int 103 | 104 | def __post_init__(self): 105 | if isinstance(self.objects, dict): 106 | self.objects = [ 107 | ObjectPoseInfo(**x, mask_id=int(k.split("_")[0])) 108 | for k, x in self.objects.items() 109 | ] 110 | if not isinstance(self.camera, ViewInfo): 111 | self.camera = ViewInfo(**self.camera) 112 | 113 | @staticmethod 114 | def load_json(path: str) -> "ImageMetaData": 115 | """Load object meta data from json file 116 | 117 | 118 | .. doctest:: 119 | 120 | >>> meta = ImageMetaData.load_json("../../misc/sample_real/000000_meta.json") 121 | >>> meta.camera.intrinsics 122 | CameraIntrinsicsBase(fx=915.0556030273438, fy=914.1288452148438, cx=641.2314453125, cy=363.4847412109375, width=1280, height=720) 123 | 124 | """ 125 | 126 | with open(path, "r") as f: 127 | jsondata = json.load(f) 128 | 129 | return ImageMetaData(**jsondata) 130 | -------------------------------------------------------------------------------- /common/iou/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bounding Box IoU 3 | ================ 4 | """ 5 | 6 | import numpy as np 7 | from .box import Box as _Box 8 | from .iou import IoU as _IoU 9 | 10 | 11 | def compute_3d_bbox_iou( 12 | rA_3x3: np.ndarray, 13 | tA_3: np.ndarray, 14 | sA_3: np.ndarray, 15 | rB_3x3: np.ndarray, 16 | tB_3: np.ndarray, 17 | sB_3: np.ndarray, 18 | ) -> float: 19 | """Compute the exact 3D IoU of two boxes using `scipy.spatial.ConvexHull `_""" 20 | 21 | try: 22 | boxA = _Box.from_transformation(rA_3x3, tA_3, sA_3) 23 | boxB = _Box.from_transformation(rB_3x3, tB_3, sB_3) 24 | 25 | return _IoU(boxA, boxB).iou() 26 | except KeyboardInterrupt: 27 | raise 28 | except: # silently ignore qhull error 29 | return 0 30 | -------------------------------------------------------------------------------- /common/iou/box.py: -------------------------------------------------------------------------------- 1 | """General 3D Bounding Box class. 2 | 3 | Origin: https://github.com/OasisYang/Wild6D/blob/main/lib/box.py 4 | """ 5 | 6 | import numpy as np 7 | from numpy.linalg import lstsq as optimizer 8 | from scipy.spatial.transform import Rotation as rotation_util 9 | 10 | EDGES = ( 11 | [1, 5], 12 | [2, 6], 13 | [3, 7], 14 | [4, 8], # lines along x-axis 15 | [1, 3], 16 | [5, 7], 17 | [2, 4], 18 | [6, 8], # lines along y-axis 19 | [1, 2], 20 | [3, 4], 21 | [5, 6], 22 | [7, 8], # lines along z-axis 23 | ) 24 | 25 | # The vertices are ordered according to the left-hand rule, so the normal 26 | # vector of each face will point inward the box. 27 | FACES = np.array( 28 | [ 29 | [5, 6, 8, 7], # +x on yz plane 30 | [1, 3, 4, 2], # -x on yz plane 31 | [3, 7, 8, 4], # +y on xz plane = top 32 | [1, 2, 6, 5], # -y on xz plane 33 | [2, 4, 8, 6], # +z on xy plane = front 34 | [1, 5, 7, 3], # -z on xy plane 35 | ] 36 | ) 37 | 38 | UNIT_BOX = np.asarray( 39 | [ 40 | [0.0, 0.0, 0.0], 41 | [-0.5, -0.5, -0.5], 42 | [-0.5, -0.5, 0.5], 43 | [-0.5, 0.5, -0.5], 44 | [-0.5, 0.5, 0.5], 45 | [0.5, -0.5, -0.5], 46 | [0.5, -0.5, 0.5], 47 | [0.5, 0.5, -0.5], 48 | [0.5, 0.5, 0.5], 49 | ] 50 | ) 51 | 52 | NUM_KEYPOINTS = 9 53 | FRONT_FACE_ID = 4 54 | TOP_FACE_ID = 2 55 | 56 | 57 | class Box(object): 58 | """General 3D Oriented Bounding Box.""" 59 | 60 | def __init__(self, vertices=None): 61 | if vertices is None: 62 | vertices = self.scaled_axis_aligned_vertices(np.array([1.0, 1.0, 1.0])) 63 | 64 | self._vertices = vertices 65 | self._rotation = None 66 | self._translation = None 67 | self._scale = None 68 | self._transformation = None 69 | self._volume = None 70 | 71 | @classmethod 72 | def from_transformation(cls, rotation, translation, scale): 73 | """Constructs an oriented bounding box from transformation and scale.""" 74 | if rotation.size != 3 and rotation.size != 9: 75 | raise ValueError( 76 | "Unsupported rotation, only 3x1 euler angles or 3x3 " 77 | + "rotation matrices are supported. " 78 | + rotation 79 | ) 80 | if rotation.size == 3: 81 | rotation = rotation_util.from_rotvec(rotation.tolist()).as_dcm() 82 | scaled_identity_box = cls.scaled_axis_aligned_vertices(scale) 83 | vertices = np.zeros((NUM_KEYPOINTS, 3)) 84 | for i in range(NUM_KEYPOINTS): 85 | vertices[i, :] = ( 86 | np.matmul(rotation, scaled_identity_box[i, :]) + translation.flatten() 87 | ) 88 | return cls(vertices=vertices) 89 | 90 | def __repr__(self): 91 | representation = "Box: " 92 | for i in range(NUM_KEYPOINTS): 93 | representation += "[{0}: {1}, {2}, {3}]".format( 94 | i, self.vertices[i, 0], self.vertices[i, 1], self.vertices[i, 2] 95 | ) 96 | return representation 97 | 98 | def __len__(self): 99 | return NUM_KEYPOINTS 100 | 101 | def __name__(self): 102 | return "Box" 103 | 104 | def apply_transformation(self, transformation): 105 | """Applies transformation on the box. 106 | 107 | Group multiplication is the same as rotation concatenation. Therefore return 108 | new box with SE3(R * R2, T + R * T2); Where R2 and T2 are existing rotation 109 | and translation. Note we do not change the scale. 110 | 111 | Args: 112 | transformation: a 4x4 transformation matrix. 113 | 114 | Returns: 115 | transformed box. 116 | """ 117 | if transformation.shape != (4, 4): 118 | raise ValueError("Transformation should be a 4x4 matrix.") 119 | 120 | new_rotation = np.matmul(transformation[:3, :3], self.rotation) 121 | new_translation = transformation[:3, 3] + ( 122 | np.matmul(transformation[:3, :3], self.translation) 123 | ) 124 | return Box.from_transformation(new_rotation, new_translation, self.scale) 125 | 126 | @classmethod 127 | def scaled_axis_aligned_vertices(cls, scale): 128 | """Returns an axis-aligned set of verticies for a box of the given scale. 129 | 130 | Args: 131 | scale: A 3*1 vector, specifiying the size of the box in x-y-z dimension. 132 | """ 133 | w = scale[0] / 2.0 134 | h = scale[1] / 2.0 135 | d = scale[2] / 2.0 136 | 137 | # Define the local coordinate system, w.r.t. the center of the box 138 | aabb = np.array( 139 | [ 140 | [0.0, 0.0, 0.0], 141 | [-w, -h, -d], 142 | [-w, -h, +d], 143 | [-w, +h, -d], 144 | [-w, +h, +d], 145 | [+w, -h, -d], 146 | [+w, -h, +d], 147 | [+w, +h, -d], 148 | [+w, +h, +d], 149 | ] 150 | ) 151 | return aabb 152 | 153 | @classmethod 154 | def fit(cls, vertices): 155 | """Estimates a box 9-dof parameters from the given vertices. 156 | 157 | Directly computes the scale of the box, then solves for orientation and 158 | translation. 159 | 160 | Args: 161 | vertices: A 9*3 array of points. Points are arranged as 1 + 8 (center 162 | keypoint + 8 box vertices) matrix. 163 | 164 | Returns: 165 | orientation: 3*3 rotation matrix. 166 | translation: 3*1 translation vector. 167 | scale: 3*1 scale vector. 168 | """ 169 | orientation = np.identity(3) 170 | translation = np.zeros((3, 1)) 171 | scale = np.zeros(3) 172 | 173 | # The scale would remain invariant under rotation and translation. 174 | # We can safely estimate the scale from the oriented box. 175 | for axis in range(3): 176 | for edge_id in range(4): 177 | # The edges are stored in quadruples according to each axis 178 | begin, end = EDGES[axis * 4 + edge_id] 179 | scale[axis] += np.linalg.norm(vertices[begin, :] - vertices[end, :]) 180 | scale[axis] /= 4.0 181 | 182 | x = cls.scaled_axis_aligned_vertices(scale) 183 | system = np.concatenate((x, np.ones((NUM_KEYPOINTS, 1))), axis=1) 184 | solution, _, _, _ = optimizer(system, vertices, rcond=None) 185 | orientation = solution[:3, :3].T 186 | translation = solution[3, :3] 187 | return orientation, translation, scale 188 | 189 | def inside(self, point): 190 | """Tests whether a given point is inside the box. 191 | 192 | Brings the 3D point into the local coordinate of the box. In the local 193 | coordinate, the looks like an axis-aligned bounding box. Next checks if 194 | the box contains the point. 195 | Args: 196 | point: A 3*1 numpy vector. 197 | 198 | Returns: 199 | True if the point is inside the box, False otherwise. 200 | """ 201 | inv_trans = np.linalg.inv(self.transformation) 202 | scale = self.scale 203 | point_w = np.matmul(inv_trans[:3, :3], point) + inv_trans[:3, 3] 204 | for i in range(3): 205 | if abs(point_w[i]) > scale[i] / 2.0: 206 | return False 207 | return True 208 | 209 | def sample(self): 210 | """Samples a 3D point uniformly inside this box.""" 211 | point = np.random.uniform(-0.5, 0.5, 3) * self.scale 212 | point = np.matmul(self.rotation, point) + self.translation 213 | return point 214 | 215 | @property 216 | def vertices(self): 217 | return self._vertices 218 | 219 | @property 220 | def rotation(self): 221 | if self._rotation is None: 222 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 223 | return self._rotation 224 | 225 | @property 226 | def translation(self): 227 | if self._translation is None: 228 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 229 | return self._translation 230 | 231 | @property 232 | def scale(self): 233 | if self._scale is None: 234 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 235 | return self._scale 236 | 237 | @property 238 | def volume(self): 239 | """Compute the volume of the parallelpiped or the box. 240 | 241 | For the boxes, this is equivalent to np.prod(self.scale). However for 242 | parallelpiped, this is more involved. Viewing the box as a linear function 243 | we can estimate the volume using a determinant. This is equivalent to 244 | sp.ConvexHull(self._vertices).volume 245 | 246 | Returns: 247 | volume (float) 248 | """ 249 | if self._volume is None: 250 | i = self._vertices[2, :] - self._vertices[1, :] 251 | j = self._vertices[3, :] - self._vertices[1, :] 252 | k = self._vertices[5, :] - self._vertices[1, :] 253 | sys = np.array([i, j, k]) 254 | self._volume = abs(np.linalg.det(sys)) 255 | return self._volume 256 | 257 | @property 258 | def transformation(self): 259 | if self._rotation is None: 260 | self._rotation, self._translation, self._scale = self.fit(self._vertices) 261 | if self._transformation is None: 262 | self._transformation = np.identity(4) 263 | self._transformation[:3, :3] = self._rotation 264 | self._transformation[:3, 3] = self._translation 265 | return self._transformation 266 | 267 | def get_ground_plane(self, gravity_axis=1): 268 | """Get ground plane under the box.""" 269 | 270 | gravity = np.zeros(3) 271 | gravity[gravity_axis] = 1 272 | 273 | def get_face_normal(face, center): 274 | """Get a normal vector to the given face of the box.""" 275 | v1 = self.vertices[face[0], :] - center 276 | v2 = self.vertices[face[1], :] - center 277 | normal = np.cross(v1, v2) 278 | return normal 279 | 280 | def get_face_center(face): 281 | """Get the center point of the face of the box.""" 282 | center = np.zeros(3) 283 | for vertex in face: 284 | center += self.vertices[vertex, :] 285 | center /= len(face) 286 | return center 287 | 288 | ground_plane_id = 0 289 | ground_plane_error = 10.0 290 | 291 | # The ground plane is defined as a plane aligned with gravity. 292 | # gravity is the (0, 1, 0) vector in the world coordinate system. 293 | for i in [0, 2, 4]: 294 | face = FACES[i, :] 295 | center = get_face_center(face) 296 | normal = get_face_normal(face, center) 297 | w = np.cross(gravity, normal) 298 | w_sq_norm = np.linalg.norm(w) 299 | if w_sq_norm < ground_plane_error: 300 | ground_plane_error = w_sq_norm 301 | ground_plane_id = i 302 | 303 | face = FACES[ground_plane_id, :] 304 | center = get_face_center(face) 305 | normal = get_face_normal(face, center) 306 | 307 | # For each face, we also have a parallel face that it's normal is also 308 | # aligned with gravity vector. We pick the face with lower height (y-value). 309 | # The parallel to face 0 is 1, face 2 is 3, and face 4 is 5. 310 | parallel_face_id = ground_plane_id + 1 311 | parallel_face = FACES[parallel_face_id] 312 | parallel_face_center = get_face_center(parallel_face) 313 | parallel_face_normal = get_face_normal(parallel_face, parallel_face_center) 314 | if parallel_face_center[gravity_axis] < center[gravity_axis]: 315 | center = parallel_face_center 316 | normal = parallel_face_normal 317 | return center, normal 318 | -------------------------------------------------------------------------------- /common/iou/iou.py: -------------------------------------------------------------------------------- 1 | """The Intersection Over Union (IoU) for 3D oriented bounding boxes. 2 | 3 | Origin: https://github.com/OasisYang/Wild6D/blob/main/lib/iou.py 4 | """ 5 | 6 | import numpy as np 7 | import scipy.spatial as sp 8 | 9 | from .box import FACES, NUM_KEYPOINTS 10 | 11 | _PLANE_THICKNESS_EPSILON = 0.000001 12 | _POINT_IN_FRONT_OF_PLANE = 1 13 | _POINT_ON_PLANE = 0 14 | _POINT_BEHIND_PLANE = -1 15 | 16 | 17 | class IoU(object): 18 | """General Intersection Over Union cost for Oriented 3D bounding boxes.""" 19 | 20 | def __init__(self, box1, box2): 21 | self._box1 = box1 22 | self._box2 = box2 23 | self._intersection_points = [] 24 | 25 | def iou(self): 26 | """Computes the exact IoU using Sutherland-Hodgman algorithm.""" 27 | self._intersection_points = [] 28 | self._compute_intersection_points(self._box1, self._box2) 29 | self._compute_intersection_points(self._box2, self._box1) 30 | if self._intersection_points: 31 | intersection_volume = sp.ConvexHull(self._intersection_points).volume 32 | box1_volume = self._box1.volume 33 | box2_volume = self._box2.volume 34 | union_volume = box1_volume + box2_volume - intersection_volume 35 | return intersection_volume / union_volume 36 | else: 37 | return 0.0 38 | 39 | def iou_sampling(self, num_samples=10000): 40 | """Computes intersection over union by sampling points. 41 | 42 | Generate n samples inside each box and check if those samples are inside 43 | the other box. Each box has a different volume, therefore the number o 44 | samples in box1 is estimating a different volume than box2. To address 45 | this issue, we normalize the iou estimation based on the ratio of the 46 | volume of the two boxes. 47 | 48 | Args: 49 | num_samples: Number of generated samples in each box 50 | 51 | Returns: 52 | IoU Estimate (float) 53 | """ 54 | p1 = [self._box1.sample() for _ in range(num_samples)] 55 | p2 = [self._box2.sample() for _ in range(num_samples)] 56 | box1_volume = self._box1.volume 57 | box2_volume = self._box2.volume 58 | box1_intersection_estimate = 0 59 | box2_intersection_estimate = 0 60 | for point in p1: 61 | if self._box2.inside(point): 62 | box1_intersection_estimate += 1 63 | for point in p2: 64 | if self._box1.inside(point): 65 | box2_intersection_estimate += 1 66 | # We are counting the volume of intersection twice. 67 | intersection_volume_estimate = ( 68 | box1_volume * box1_intersection_estimate 69 | + box2_volume * box2_intersection_estimate 70 | ) / 2.0 71 | union_volume_estimate = ( 72 | box1_volume * num_samples + box2_volume * num_samples 73 | ) - intersection_volume_estimate 74 | iou_estimate = intersection_volume_estimate / union_volume_estimate 75 | return iou_estimate 76 | 77 | def _compute_intersection_points(self, box_src, box_template): 78 | """Computes the intersection of two boxes.""" 79 | # Transform the source box to be axis-aligned 80 | inv_transform = np.linalg.inv(box_src.transformation) 81 | box_src_axis_aligned = box_src.apply_transformation(inv_transform) 82 | template_in_src_coord = box_template.apply_transformation(inv_transform) 83 | for face in range(len(FACES)): 84 | indices = FACES[face, :] 85 | poly = [template_in_src_coord.vertices[indices[i], :] for i in range(4)] 86 | clip = self.intersect_box_poly(box_src_axis_aligned, poly) 87 | for point in clip: 88 | # Transform the intersection point back to the world coordinate 89 | point_w = np.matmul(box_src.rotation, point) + box_src.translation 90 | self._intersection_points.append(point_w) 91 | 92 | for point_id in range(NUM_KEYPOINTS): 93 | v = template_in_src_coord.vertices[point_id, :] 94 | if box_src_axis_aligned.inside(v): 95 | point_w = np.matmul(box_src.rotation, v) + box_src.translation 96 | self._intersection_points.append(point_w) 97 | 98 | def intersect_box_poly(self, box, poly): 99 | """Clips the polygon against the faces of the axis-aligned box.""" 100 | for axis in range(3): 101 | poly = self._clip_poly(poly, box.vertices[1, :], 1.0, axis) 102 | poly = self._clip_poly(poly, box.vertices[8, :], -1.0, axis) 103 | return poly 104 | 105 | def _clip_poly(self, poly, plane, normal, axis): 106 | """Clips the polygon with the plane using the Sutherland-Hodgman algorithm. 107 | 108 | See en.wikipedia.org/wiki/Sutherland-Hodgman_algorithm for the overview of 109 | the Sutherland-Hodgman algorithm. Here we adopted a robust implementation 110 | from "Real-Time Collision Detection", by Christer Ericson, page 370. 111 | 112 | Args: 113 | poly: List of 3D vertices defining the polygon. 114 | plane: The 3D vertices of the (2D) axis-aligned plane. 115 | normal: normal 116 | axis: A tuple defining a 2D axis. 117 | 118 | Returns: 119 | List of 3D vertices of the clipped polygon. 120 | """ 121 | # The vertices of the clipped polygon are stored in the result list. 122 | result = [] 123 | if len(poly) <= 1: 124 | return result 125 | 126 | # polygon is fully located on clipping plane 127 | poly_in_plane = True 128 | 129 | # Test all the edges in the polygon against the clipping plane. 130 | for i, current_poly_point in enumerate(poly): 131 | prev_poly_point = poly[(i + len(poly) - 1) % len(poly)] 132 | d1 = self._classify_point_to_plane(prev_poly_point, plane, normal, axis) 133 | d2 = self._classify_point_to_plane(current_poly_point, plane, normal, axis) 134 | if d2 == _POINT_BEHIND_PLANE: 135 | poly_in_plane = False 136 | if d1 == _POINT_IN_FRONT_OF_PLANE: 137 | intersection = self._intersect( 138 | plane, prev_poly_point, current_poly_point, axis 139 | ) 140 | result.append(intersection) 141 | elif d1 == _POINT_ON_PLANE: 142 | if not result or (not np.array_equal(result[-1], prev_poly_point)): 143 | result.append(prev_poly_point) 144 | elif d2 == _POINT_IN_FRONT_OF_PLANE: 145 | poly_in_plane = False 146 | if d1 == _POINT_BEHIND_PLANE: 147 | intersection = self._intersect( 148 | plane, prev_poly_point, current_poly_point, axis 149 | ) 150 | result.append(intersection) 151 | elif d1 == _POINT_ON_PLANE: 152 | if not result or (not np.array_equal(result[-1], prev_poly_point)): 153 | result.append(prev_poly_point) 154 | 155 | result.append(current_poly_point) 156 | else: 157 | if d1 != _POINT_ON_PLANE: 158 | result.append(current_poly_point) 159 | 160 | if poly_in_plane: 161 | return poly 162 | else: 163 | return result 164 | 165 | def _intersect(self, plane, prev_point, current_point, axis): 166 | """Computes the intersection of a line with an axis-aligned plane. 167 | 168 | Args: 169 | plane: Formulated as two 3D points on the plane. 170 | prev_point: The point on the edge of the line. 171 | current_point: The other end of the line. 172 | axis: A tuple defining a 2D axis. 173 | 174 | Returns: 175 | A 3D point intersection of the poly edge with the plane. 176 | """ 177 | alpha = (current_point[axis] - plane[axis]) / ( 178 | current_point[axis] - prev_point[axis] 179 | ) 180 | # Compute the intersecting points using linear interpolation (lerp) 181 | intersection_point = alpha * prev_point + (1.0 - alpha) * current_point 182 | return intersection_point 183 | 184 | def _inside(self, plane, point, axis): 185 | """Check whether a given point is on a 2D plane.""" 186 | # Cross products to determine the side of the plane the point lie. 187 | x, y = axis 188 | u = plane[0] - point 189 | v = plane[1] - point 190 | 191 | a = u[x] * v[y] 192 | b = u[y] * v[x] 193 | return a >= b 194 | 195 | def _classify_point_to_plane(self, point, plane, normal, axis): 196 | """Classify position of a point w.r.t the given plane. 197 | 198 | See Real-Time Collision Detection, by Christer Ericson, page 364. 199 | 200 | Args: 201 | point: 3x1 vector indicating the point 202 | plane: 3x1 vector indicating a point on the plane 203 | normal: scalar (+1, or -1) indicating the normal to the vector 204 | axis: scalar (0, 1, or 2) indicating the xyz axis 205 | 206 | Returns: 207 | Side: which side of the plane the point is located. 208 | """ 209 | signed_distance = normal * (point[axis] - plane[axis]) 210 | if signed_distance > _PLANE_THICKNESS_EPSILON: 211 | return _POINT_IN_FRONT_OF_PLANE 212 | elif signed_distance < -_PLANE_THICKNESS_EPSILON: 213 | return _POINT_BEHIND_PLANE 214 | else: 215 | return _POINT_ON_PLANE 216 | 217 | @property 218 | def intersection_points(self): 219 | return self._intersection_points 220 | -------------------------------------------------------------------------------- /common/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging 3 | ======= 4 | 5 | In principle, we recommend Python's official `logging` package for logging. 6 | This module contains utilities for logging setup. 7 | """ 8 | 9 | import logging 10 | import json 11 | import sys 12 | 13 | try: 14 | from typing import Any, Literal 15 | except: 16 | pass 17 | 18 | 19 | class _CustomFormatter(logging.Formatter): 20 | grey = "\x1b[38;20m" 21 | green = "\x1b[32;20m" 22 | yellow = "\x1b[33;20m" 23 | red = "\x1b[31;20m" 24 | bold_red = "\x1b[31;1m" 25 | reset = "\x1b[0m" 26 | FORMAT_SHORT = " %(asctime)s [%(name)s] %(message)s" 27 | FORMAT_LONG = " %(asctime)s [%(name)s] %(message)s (%(filename)s:%(lineno)d)" 28 | FILE_FORMAT = ( 29 | "%(levelname)s %(asctime)s [%(name)s] %(message)s (%(filename)s:%(lineno)d)" 30 | ) 31 | 32 | LEVEL_FORMATS = { 33 | logging.DEBUG: ">> " + grey + "%(levelname)s" + reset + FORMAT_LONG, 34 | logging.INFO: green + "%(levelname)s" + reset + FORMAT_SHORT, 35 | logging.WARNING: yellow + "%(levelname)s" + reset + FORMAT_LONG, 36 | logging.ERROR: red + "%(levelname)s" + reset + FORMAT_LONG, 37 | logging.CRITICAL: bold_red + "%(levelname)s" + reset + FORMAT_LONG, 38 | } 39 | 40 | mode: str 41 | 42 | def __init__(self, mode: 'Literal["terminal", "file"]' = "terminal") -> None: 43 | self.mode = mode 44 | super().__init__() 45 | 46 | def format(self, record): 47 | if self.mode == "terminal": 48 | level_fmt = self.LEVEL_FORMATS.get(record.levelno) 49 | formatter = logging.Formatter(level_fmt) 50 | else: 51 | formatter = logging.Formatter(self.FILE_FORMAT) 52 | 53 | return formatter.format(record) 54 | 55 | 56 | class _JsonFormatter(logging.Formatter): 57 | """ 58 | Formatter that outputs JSON strings after parsing the LogRecord. 59 | 60 | :param fmt_dict: Key: logging format attribute pairs. Defaults to ``{"message": "message"}``. 61 | :param time_format: time.strftime() format string. Default: "%Y-%m-%dT%H:%M:%S" 62 | :param msec_format: Microsecond formatting. Appended at the end. Default: "%s.%03dZ" 63 | """ 64 | 65 | def __init__( 66 | self, 67 | fmt_dict: dict = { 68 | "level": "levelname", 69 | "message": "message", 70 | "loggerName": "name", 71 | "processName": "processName", 72 | "processID": "process", 73 | "threadName": "threadName", 74 | "threadID": "thread", 75 | "timestamp": "asctime", 76 | }, 77 | time_format: str = "%Y-%m-%dT%H:%M:%S", 78 | msec_format: str = "%s.%03dZ", 79 | ): 80 | self.fmt_dict = fmt_dict if fmt_dict is not None else {"message": "message"} 81 | self.default_time_format = time_format 82 | self.default_msec_format = msec_format 83 | self.datefmt = None 84 | 85 | def usesTime(self) -> bool: 86 | """ 87 | Overwritten to look for the attribute in the format dict values instead of the fmt string. 88 | """ 89 | return "asctime" in self.fmt_dict.values() 90 | 91 | def formatMessage(self, record) -> dict: 92 | """ 93 | Overwritten to return a dictionary of the relevant LogRecord attributes instead of a string. 94 | KeyError is raised if an unknown attribute is provided in the fmt_dict. 95 | """ 96 | return { 97 | fmt_key: record.__dict__[fmt_val] 98 | for fmt_key, fmt_val in self.fmt_dict.items() 99 | } 100 | 101 | def format(self, record) -> str: 102 | """ 103 | Mostly the same as the parent's class method, the difference being that a dict is manipulated and dumped as JSON 104 | instead of a string. 105 | """ 106 | record.message = record.getMessage() 107 | 108 | if self.usesTime(): 109 | record.asctime = self.formatTime(record, self.datefmt) 110 | 111 | message_dict = self.formatMessage(record) 112 | 113 | if record.exc_info: 114 | # Cache the traceback text to avoid converting it multiple times 115 | # (it's constant anyway) 116 | if not record.exc_text: 117 | record.exc_text = self.formatException(record.exc_info) 118 | 119 | if record.exc_text: 120 | message_dict["exc_info"] = record.exc_text 121 | 122 | if record.stack_info: 123 | message_dict["stack_info"] = self.formatStack(record.stack_info) 124 | 125 | return json.dumps(message_dict, default=str) 126 | 127 | 128 | def setup_logging( 129 | log_mode: str, debug=False, logfile: "str | None" = None, json=True, delay=True 130 | ): 131 | """Setup a simple logging for Python's official ``logging`` package. 132 | 133 | :param log_mode: ``file`` for only logging to file, `terminal` for only logging to terminal, otherwise both are taken. 134 | :param debug: whether set loglevel to DEBUG (default is INFO) 135 | :param logfile: specify logging filename 136 | :param json: whether output json format log, whose file name is ``logfile + ".jl"``. 137 | :param delay: See `FileHandler `_ 138 | 139 | .. highlight:: python 140 | 141 | Usage:: 142 | 143 | # only logging to terminal, enabling debug level logging. 144 | setup_logging("terminal", debug=True) 145 | 146 | # only logging to runtime.log file and runtime.log.jl (json log) 147 | setup_logging("file", logfile="runtime.log") 148 | 149 | # logging to terminal and file without json 150 | setup_logging("both", logfile="runtime.log", json=false) 151 | 152 | """ 153 | while len(logging.root.handlers) > 0: 154 | logging.root.removeHandler(logging.root.handlers[0]) 155 | 156 | level = logging.INFO 157 | if debug: 158 | level = logging.DEBUG 159 | logging.root.setLevel(level=level) 160 | 161 | if log_mode != "file": 162 | handler = logging.StreamHandler(sys.stderr) 163 | handler.setLevel(level) 164 | handler.setFormatter(_CustomFormatter("terminal")) 165 | logging.root.addHandler(handler) 166 | 167 | if log_mode != "terminal": 168 | assert logfile is not None 169 | handler = logging.FileHandler(logfile, delay=delay) 170 | handler.setLevel(level) 171 | handler.setFormatter(_CustomFormatter("file")) 172 | logging.root.addHandler(handler) 173 | if json: 174 | handler = logging.FileHandler(logfile + ".jl", delay=delay) 175 | handler.setLevel(level) 176 | handler.setFormatter(_JsonFormatter()) 177 | logging.root.addHandler(handler) 178 | -------------------------------------------------------------------------------- /common/obj_meta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data types for Object Metafile 3 | ============================== 4 | """ 5 | 6 | from dataclasses import dataclass 7 | from .rotation import SymLabel 8 | import json 9 | 10 | 11 | @dataclass 12 | class ObjectTag: 13 | datatype: str 14 | """Which split does this object belongs to (`train` or `test`)""" 15 | sceneChanger: bool 16 | """whether this object has a relatively large size that can be placed objects on""" 17 | symmetry: SymLabel 18 | """Symmetry label.""" 19 | 20 | # only for Omni6DPose generation 21 | materialOptions: "list[str]" 22 | 23 | # only for Omni6DPose generation 24 | upAxis: "list[str]" 25 | 26 | def __post_init__(self): 27 | if not isinstance(self.symmetry, SymLabel): 28 | self.symmetry = SymLabel(**self.symmetry) 29 | 30 | 31 | @dataclass 32 | class ObjectInfo: 33 | object_id: str 34 | """Global unique object id.""" 35 | 36 | source: str 37 | """Source dataset of this object. 38 | 39 | - `phocal`: the `PhoCaL `_ dataset. 40 | - `omniobject3d`: the `OmniObject3D `_ dataset. 41 | - `google_scan`: the `Google Scanned Objects `_ dataset. 42 | - `real`: supplementary scanned objects collected from real-world. 43 | """ 44 | 45 | # name of this object (deprecated, use object_id instead) 46 | name: str 47 | 48 | obj_path: str 49 | """path to the object mesh file, relative to the source dataset path""" 50 | 51 | tag: ObjectTag 52 | """See :class:`ObjectTag`""" 53 | 54 | class_label: int 55 | """class ID of this object""" 56 | 57 | class_name: str 58 | """class name of this object""" 59 | 60 | dimensions: "list[float]" 61 | """Bounding box size under object space (not NOCS)""" 62 | 63 | def __post_init__(self): 64 | if not isinstance(self.tag, ObjectTag): 65 | self.tag = ObjectTag(**self.tag) 66 | 67 | 68 | @dataclass 69 | class ClassListItem: 70 | name: str 71 | """Name of this class""" 72 | 73 | label: int 74 | """ID of this class""" 75 | 76 | instance_ids: "list[str]" 77 | """List of the `object_id` of the objects belonging to this class""" 78 | 79 | stat: dict 80 | 81 | 82 | @dataclass 83 | class ObjectMetaData: 84 | class_list: "list[ClassListItem]" 85 | """The complete list of class""" 86 | 87 | instance_dict: "dict[str, ObjectInfo]" 88 | """Objects indexed by `object_id`""" 89 | 90 | def __post_init__(self): 91 | if len(self.class_list) > 0 and not isinstance( 92 | self.class_list[0], ClassListItem 93 | ): 94 | self.class_list = [ClassListItem(**x) for x in self.class_list] 95 | if len(self.instance_dict) > 0 and not isinstance( 96 | next(iter(self.instance_dict.values())), ObjectInfo 97 | ): 98 | self.instance_dict = { 99 | k: ObjectInfo(**v) for k, v in self.instance_dict.items() 100 | } 101 | 102 | @staticmethod 103 | def load_json(path: str) -> "ObjectMetaData": 104 | """Load object meta data from json file""" 105 | 106 | with open(path, "r") as f: 107 | jsondata = json.load(f) 108 | 109 | return ObjectMetaData(**jsondata) 110 | -------------------------------------------------------------------------------- /common/rotation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rotation and Symmetry 3 | ===================== 4 | 5 | You can add ``CUTOOP_STRICT_MODE=1`` to environment to enable a strict value assertion 6 | before calculating rotation error. 7 | 8 | SymLabel 9 | -------- 10 | 11 | .. autoclass:: SymLabel 12 | :show-inheritance: 13 | :members: 14 | :special-members: __str__ 15 | 16 | Rotation Manipulation 17 | --------------------- 18 | 19 | .. autofunction:: rot_canonical_sym 20 | 21 | .. autofunction:: rots_of_sym 22 | 23 | Rotation Error Computation 24 | -------------------------- 25 | 26 | .. autofunction:: rot_diff_axis 27 | 28 | .. autofunction:: rot_diff_sym 29 | 30 | .. autofunction:: rot_diff_theta 31 | 32 | .. autofunction:: rot_diff_theta_pointwise 33 | 34 | Prelude Rotations 35 | ----------------- 36 | 37 | .. autodata:: STANDARD_SYMMETRY 38 | 39 | .. autodata:: cube_group 40 | 41 | .. autodata:: cube_flip_group 42 | 43 | .. autodata:: qtr 44 | 45 | .. autodata:: ax 46 | 47 | .. autodata:: rot90 48 | 49 | .. autodata:: rot180 50 | 51 | """ 52 | 53 | try: # just for type hints 54 | from typing import Literal 55 | except: 56 | pass 57 | 58 | import numpy as np 59 | from numpy import ndarray 60 | from scipy.spatial.transform import Rotation as _Rotation 61 | from dataclasses import dataclass 62 | import copy 63 | import os 64 | 65 | 66 | @dataclass 67 | class SymLabel: 68 | """ 69 | Symmetry labels for real-world objects. 70 | 71 | Axis rotation details: 72 | 73 | - `any`: arbitrary rotation around this axis is ok 74 | - `half`: rotate 180 degrees along this axis (central symmetry) 75 | - `quarter`: rotate 90 degrees along this axis (like square) 76 | 77 | .. doctest:: 78 | 79 | >>> from cutoop.rotation import SymLabel 80 | >>> sym = SymLabel(any=False, x='any', y='none', z='none') 81 | >>> str(sym) 82 | 'x-cone' 83 | >>> sym = SymLabel(any=False, x='any', y='any', z='none') # two any produces 'any' 84 | >>> str(sym) 85 | 'any' 86 | >>> sym = SymLabel(any=False, x='half', y='half', z='half') 87 | >>> str(sym) 88 | 'box' 89 | 90 | """ 91 | 92 | any: bool # sphere 93 | """Whether arbitrary rotation is allowed""" 94 | x: "Literal['none', 'any', 'half', 'quarter']" 95 | """axis rotation for x""" 96 | y: "Literal['none', 'any', 'half', 'quarter']" 97 | """axis rotation for y""" 98 | z: "Literal['none', 'any', 'half', 'quarter']" 99 | """axis rotation for z""" 100 | 101 | def get_only(self, tag: 'Literal["any", "half", "quarter"]'): 102 | """Get the only axis marked with the tag. If multiple or none is find, return ``None``.""" 103 | ret: 'list[Literal["x", "y", "z"]]' = [] 104 | if self.x == tag: 105 | ret.append("x") 106 | if self.y == tag: 107 | ret.append("y") 108 | if self.z == tag: 109 | ret.append("z") 110 | if len(ret) != 1: 111 | return None 112 | return ret[0] 113 | 114 | @staticmethod 115 | def from_str(s: str) -> "SymLabel": 116 | """Construct symmetry from string. 117 | 118 | .. note:: See also :obj:`STANDARD_SYMMETRY`. 119 | """ 120 | if s in STANDARD_SYMMETRY: 121 | return copy.deepcopy(STANDARD_SYMMETRY[s]) 122 | else: 123 | raise ValueError(f"invalid symmetry: {s}") 124 | 125 | def __str__(self) -> str: 126 | """ 127 | For human readability, rotations are divided into the following types (names): 128 | 129 | - ``any``: arbitrary rotation is ok; 130 | - ``cube``: the same symmetry as a cube; 131 | - ``box``: the same symmetry as a box (flipping along x, y, and z axis); 132 | - ``none``: no symmetry is provided; 133 | - ``{x,y,z}-flip``: flip along a single axis; 134 | - ``{x,y,z}-square-tube``: the same symmetry as a square tube alone the axis; 135 | - ``{x,y,z}-square-pyramid``: the same symmetry as a pyramid alone the axis; 136 | - ``{x,y,z}-cylinder``: the same symmetry as a cylinder the axis; 137 | - ``{x,y,z}-cone``: the same symmetry as a cone the axis. 138 | 139 | """ 140 | c_any = (self.x == "any") + (self.y == "any") + (self.z == "any") 141 | c_quarter = ( 142 | (self.x == "quarter") + (self.y == "quarter") + (self.z == "quarter") 143 | ) 144 | c_half = (self.x == "half") + (self.y == "half") + (self.z == "half") 145 | 146 | if self.any or c_any > 1 or (c_any > 0 and c_quarter > 0): # any rotation is ok 147 | return "any" 148 | 149 | if c_any == 0: 150 | if c_quarter > 1: 151 | return "cube" # cube group 152 | elif c_quarter == 0: 153 | if c_half > 1: 154 | return "box" # cube_flip_group 155 | else: # one half or none 156 | axis = self.get_only("half") 157 | return f"{axis}-flip" if axis is not None else "none" 158 | else: # c_quarter == 1 159 | axis = self.get_only("quarter") 160 | if c_half > 0: 161 | return f"{axis}-square-tube" 162 | else: 163 | return f"{axis}-square-pyramid" 164 | else: 165 | assert c_any == 1 and c_quarter == 0 166 | axis = self.get_only("any") 167 | if c_half > 0: 168 | return f"{axis}-cylinder" 169 | else: 170 | return f"{axis}-cone" 171 | 172 | 173 | STANDARD_SYMMETRY = { 174 | "any": SymLabel(any=True, x="any", y="any", z="any"), 175 | "cube": SymLabel(any=False, x="quarter", y="quarter", z="quarter"), 176 | "box": SymLabel(any=False, x="half", y="half", z="half"), 177 | "none": SymLabel(any=False, x="none", y="none", z="none"), 178 | "x-flip": SymLabel(any=False, x="half", y="none", z="none"), 179 | "y-flip": SymLabel(any=False, x="none", y="half", z="none"), 180 | "z-flip": SymLabel(any=False, x="none", y="none", z="half"), 181 | "x-square-tube": SymLabel(any=False, x="quarter", y="half", z="half"), 182 | "y-square-tube": SymLabel(any=False, x="half", y="quarter", z="half"), 183 | "z-square-tube": SymLabel(any=False, x="half", y="half", z="quarter"), 184 | "x-square-pyramid": SymLabel(any=False, x="quarter", y="none", z="none"), 185 | "y-square-pyramid": SymLabel(any=False, x="none", y="quarter", z="none"), 186 | "z-square-pyramid": SymLabel(any=False, x="none", y="none", z="quarter"), 187 | "x-cylinder": SymLabel(any=False, x="any", y="half", z="half"), 188 | "y-cylinder": SymLabel(any=False, x="half", y="any", z="half"), 189 | "z-cylinder": SymLabel(any=False, x="half", y="half", z="any"), 190 | "x-cone": SymLabel(any=False, x="any", y="none", z="none"), 191 | "y-cone": SymLabel(any=False, x="none", y="any", z="none"), 192 | "z-cone": SymLabel(any=False, x="none", y="none", z="any"), 193 | } 194 | """ 195 | All standard symmetries. 196 | 197 | .. doctest:: 198 | 199 | >>> for name, sym in cutoop.rotation.STANDARD_SYMMETRY.items(): 200 | ... assert str(sym) == name, f"name: {name}, sym: {repr(sym)}" 201 | 202 | """ 203 | 204 | ax = { 205 | "x": np.array([1, 0, 0]), 206 | "y": np.array([0, 1, 0]), 207 | "z": np.array([0, 0, 1]), 208 | } 209 | """Normalized axis vectors.""" 210 | 211 | rot90 = { 212 | "x": _Rotation.from_rotvec(np.pi / 2 * ax["x"]).as_matrix(), 213 | "y": _Rotation.from_rotvec(np.pi / 2 * ax["y"]).as_matrix(), 214 | "z": _Rotation.from_rotvec(np.pi / 2 * ax["z"]).as_matrix(), 215 | } 216 | """90-degree rotation along each axis.""" 217 | 218 | rot180 = { 219 | "x": _Rotation.from_rotvec(np.pi * ax["x"]).as_matrix(), 220 | "y": _Rotation.from_rotvec(np.pi * ax["y"]).as_matrix(), 221 | "z": _Rotation.from_rotvec(np.pi * ax["z"]).as_matrix(), 222 | } 223 | """flipping along each axis""" 224 | 225 | qtr = { 226 | "x": [np.eye(3), rot90["x"], rot180["x"], rot90["x"].T], 227 | "y": [np.eye(3), rot90["y"], rot180["y"], rot90["y"].T], 228 | "z": [np.eye(3), rot90["z"], rot180["z"], rot90["z"].T], 229 | } 230 | """All transformations composed by 90-degree rotations along each axis.""" 231 | 232 | cube_flip_group: "list[ndarray]" = [np.eye(3), rot180["x"], rot180["y"], rot180["z"]] 233 | """ 234 | All 4 rotations of a box, as 3x3 matrices. 235 | That is rotating 180 degrees around x y z, respectively (abelian group). 236 | 237 | :meta hide-value: 238 | """ 239 | 240 | cube_group: "list[ndarray]" = [ 241 | s @ g @ h 242 | for s in [np.eye(3), rot90["x"]] 243 | for g in [rot90["x"] @ rot90["y"], rot90["x"] @ rot90["z"], np.eye(3)] 244 | for h in cube_flip_group 245 | ] 246 | """ 247 | All 24 rotations of a cube (Sym(4)). 248 | 249 | The correctness of this implementation lies in: 250 | 251 | 1. ``cube_flip_group`` is a normal subgroup of alternating group 252 | 2. alternating group Alt(4) is a normal subgroup of Sym(4) 253 | 254 | Thus we can construct Sym(4) by enumerating all cosets of Alt(4), 255 | which is construct by enumerating all cosets of ``cube_flip_group``. 256 | 257 | One can check validity of it by 258 | 259 | .. doctest:: 260 | 261 | >>> from cutoop.rotation import cube_group 262 | >>> for i in range(24): 263 | ... for j in range(24): 264 | ... if i < j: 265 | ... diff = cube_group[i] @ cube_group[j].T 266 | ... assert np.linalg.norm(diff - np.eye(3)) > 0.1 267 | 268 | :meta hide-value: 269 | """ 270 | 271 | 272 | def rot_diff_theta(rA_3x3: ndarray, rB_Nx3x3: ndarray) -> ndarray: 273 | """ 274 | Compute the difference angle of one rotation with a series of rotations. 275 | 276 | Note that since `arccos` gets quite steep around 0, the computational loss is 277 | somehow inevitable (around 1e-4). 278 | 279 | :return: theta (unit: radius) array of length N. 280 | """ 281 | R = rA_3x3 @ rB_Nx3x3.reshape(-1, 3, 3).transpose(0, 2, 1) 282 | val = (np.trace(R, axis1=1, axis2=2) - 1) / 2 283 | 284 | if int(os.environ.get("CUTOOP_STRICT_MODE", "0")) == 1: 285 | assert np.all(np.abs(val) < 1 + 1e-5), f"invalid rotation matrix, cos = {val}" 286 | theta = np.arccos(np.clip(val, -1, 1)) 287 | return theta 288 | 289 | 290 | def rot_diff_theta_pointwise(rA_Nx3x3: ndarray, rB_Nx3x3: ndarray) -> ndarray: 291 | """ 292 | compute the difference angle of two sequences of rotations pointwisely. 293 | 294 | :return: theta (unit: radius) array of length N 295 | """ 296 | R = np.einsum("ijk,ilk->ijl", rA_Nx3x3, rB_Nx3x3) # rA @ rB.T 297 | val = (np.trace(R, axis1=1, axis2=2) - 1) / 2 298 | 299 | if int(os.environ.get("CUTOOP_STRICT_MODE", "0")) == 1: 300 | assert np.all(np.abs(val) < 1 + 1e-5), f"invalid rotation matrix, cos = {val}" 301 | theta = np.arccos(np.clip(val, -1, 1)) 302 | return theta 303 | 304 | 305 | def rot_diff_axis(rA_3x3: ndarray, rB_Nx3x3: ndarray, axis: ndarray) -> ndarray: 306 | """compute the difference angle where rotation aroud axis is ignored. 307 | 308 | :param axis: One of :obj:`ax`. 309 | 310 | """ 311 | axis = axis.reshape(3) 312 | y1 = rA_3x3.reshape(3, 3) @ axis 313 | y2 = rB_Nx3x3.reshape(-1, 3, 3) @ axis 314 | val = y2.dot(y1) / (np.linalg.norm(y1) * np.linalg.norm(y2, axis=1)) 315 | 316 | if int(os.environ.get("CUTOOP_STRICT_MODE", "0")) == 1: 317 | assert np.all(np.abs(val) < 1 + 1e-5), f"invalid rotation matrix, cos = {val}" 318 | theta = np.arccos(np.clip(val, -1, 1)) 319 | return theta 320 | 321 | 322 | def rot_diff_sym(rA_3x3: ndarray, rB_3x3: ndarray, sym: SymLabel) -> float: 323 | """compute the difference angle (rotation error) with regard of symmetry. 324 | 325 | This function use analytic method to calculate the difference angle, 326 | which is more accurate than :func:`rot_canonical_sym`. 327 | 328 | :return: the difference angle. 329 | """ 330 | 331 | rA_3x3 = rA_3x3.reshape(3, 3) 332 | rB_3x3 = rB_3x3.reshape(3, 3) 333 | 334 | c_any = (sym.x == "any") + (sym.y == "any") + (sym.z == "any") 335 | symname = str(sym) 336 | 337 | if symname == "any": # any rotation is ok 338 | return 0 339 | elif c_any == 0: 340 | _, theta = rot_canonical_sym(rA_3x3, rB_3x3, sym, return_theta=True) 341 | return theta 342 | else: # c_any == 1 and c_quarter == 0 343 | assert c_any == 1 344 | axis = sym.get_only("any") 345 | vec = ax[axis] 346 | rB1 = rB_3x3[None, ...] 347 | if symname.endswith("cylinder"): 348 | half_axis = "x" if axis == "y" else "y" # any other axis is ok 349 | t1 = rot_diff_axis(rA_3x3 @ rot180[half_axis], rB1, vec.reshape(3))[0] 350 | t2 = rot_diff_axis(rA_3x3, rB1, vec.reshape(3))[0] 351 | theta = min(t1, t2) 352 | else: 353 | theta = rot_diff_axis(rA_3x3, rB1, vec.reshape(3))[0] 354 | 355 | return theta 356 | 357 | 358 | def rot_canonical_sym( 359 | rA_3x3: ndarray, rB_3x3: ndarray, sym: SymLabel, split=100, return_theta=False 360 | ) -> "ndarray | tuple[ndarray, float]": 361 | """Find the optimal rotation ``rot`` that minimize ``theta(rA, rB @ rot)``. 362 | 363 | :param rA_3x3: often the ground truth rotation. 364 | :param rB_3x3: often the predicted rotation. 365 | :param sym: symmetry label. 366 | :param split: see :func:`rots_of_sym`. 367 | :param return_theta: if enabled, a tuple of the ``rot`` and its theta will both 368 | be returned. 369 | 370 | :returns: the optimal ``rot`` 371 | """ 372 | 373 | if str(sym) == "any": 374 | return rB_3x3.T @ rA_3x3 375 | 376 | rots = rots_of_sym(sym, split=split) 377 | rBs = rB_3x3 @ rots 378 | thetas = rot_diff_theta(rA_3x3, rBs) 379 | index = np.argmin(thetas) 380 | if return_theta: 381 | return rots[index], thetas[index] 382 | else: 383 | return rots[index] 384 | 385 | 386 | def rots_of_sym(sym: SymLabel, split=20) -> ndarray: 387 | """Get a list of rotation group corresponding to the sym label. 388 | 389 | :param split: Set the snap of rotation to ``2 * pi / split`` for continuous 390 | symmetry. 391 | 392 | :return: ndarray of shape [?, 3, 3] containing a set of rotation matrix. 393 | """ 394 | snap = 2 * np.pi / split 395 | symname = str(sym) 396 | 397 | if symname == "any": 398 | rots = [] 399 | for sx in range(split): 400 | for sy in range(split): 401 | for sz in range(split): 402 | r = ( 403 | _Rotation.from_rotvec(ax["x"] * snap * sx) 404 | @ _Rotation.from_rotvec(ax["y"] * snap * sy) 405 | @ _Rotation.from_rotvec(ax["z"] * snap * sz) 406 | ) 407 | rots.append(r.as_matrix()) 408 | return np.array(rots) # this is extremely not efficient 409 | elif symname == "cube": 410 | return np.array(cube_group) 411 | elif symname == "box": 412 | return np.array(cube_flip_group) 413 | elif symname.endswith("flip"): 414 | return np.array([np.eye(3), rot180[symname[:1]]]) 415 | elif symname == "none": 416 | return np.array([np.eye(3)]) 417 | elif symname.endswith("square-tube"): 418 | return np.array([g @ h for g in qtr[symname[:1]][:2] for h in cube_flip_group]) 419 | elif symname.endswith("square-pyramid"): 420 | return np.array(qtr[symname[:1]]) 421 | else: 422 | axis = symname[:1] 423 | vec = ax[axis] # norm(vec) == 1 424 | rots = np.array( 425 | [_Rotation.from_rotvec(vec * snap * s).as_matrix() for s in range(split)] 426 | ) 427 | if symname.endswith("cylinder"): 428 | half_axis = "x" if axis == "y" else "y" # any other axis is ok 429 | rots = np.concatenate([rots, rots @ rot180[half_axis]], axis=0) 430 | # otherwise cone 431 | return rots 432 | -------------------------------------------------------------------------------- /common/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = cutoop 3 | version = 0.1.0 4 | author = Weiyao Huang 5 | author_email = jy.cat@qq.com 6 | description = Common UTilities for Omni6DPose 7 | long_description = file: README.rst, CHANGELOG.rst, LICENSE.rst 8 | long_description_content_type = text/x-rst 9 | keywords = post_estimation 10 | license = MIT 11 | classifiers = 12 | Programming Language :: Python :: 3 13 | 14 | [options] 15 | python_requires = >=3.7 16 | install_requires = 17 | importlib-metadata; python_version<"3.10" 18 | numpy>=1.21 19 | ipdb>=0.13.9 20 | matplotlib>=3.5 21 | opencv_python>=4.8.0.76 22 | Pillow>=8.3 23 | rich>=13.7.0 24 | scipy>=1.7.3 25 | tqdm>=4.65.2 26 | typing_extensions>=4.7.1 27 | 28 | package_dir= 29 | cutoop=. 30 | 31 | [build-system] 32 | requires = ["setuptools"] 33 | build-backend = ["setuptools.build_meta"] -------------------------------------------------------------------------------- /common/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == "__main__": 4 | setup() 5 | -------------------------------------------------------------------------------- /common/transform.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3D/2D Affine Transformation 3 | =========================== 4 | """ 5 | 6 | import numpy as np 7 | from numpy import ndarray 8 | 9 | from .data_types import CameraIntrinsicsBase 10 | 11 | 12 | def toAffine(scale_N: ndarray, rot_Nx3x3: ndarray, tr_Nx3: ndarray): 13 | """return affine transformation: [N, 4, 4]""" 14 | if scale_N.shape[0] == 0: 15 | return np.zeros((0, 4, 4)), np.zeros((0, 3)) 16 | rot_Nx3x3 = rot_Nx3x3.reshape(-1, 3, 3) * scale_N.reshape(-1, 1, 1) 17 | tr_Nx3 = tr_Nx3.reshape(-1, 3) 18 | res = np.repeat(np.eye(4)[np.newaxis, ...], rot_Nx3x3.shape[0], axis=0) 19 | res[:, :3, :3] = rot_Nx3x3 20 | res[:, :3, 3] = tr_Nx3 21 | return res 22 | 23 | 24 | def sRTdestruct(rts_Nx4x4: ndarray) -> "tuple[ndarray, ndarray, ndarray]": 25 | """Destruct multiple 4x4 affine transformation into scales, rotations and translations. 26 | 27 | :return: scales, rotations and translations each of shape [N], [N, 3, 3], [N, 3] 28 | """ 29 | if rts_Nx4x4.shape[0] == 0: 30 | return np.zeros(0), np.zeros((0, 3, 3)), np.zeros((0, 3)) 31 | scales = np.cbrt(np.linalg.det(rts_Nx4x4[:, :3, :3])) 32 | return ( 33 | scales, 34 | (rts_Nx4x4[:, :3, :3] / scales.reshape(-1, 1, 1)), 35 | rts_Nx4x4[:, :3, 3], 36 | ) 37 | 38 | 39 | def depth2xyz(depth_img: ndarray, intrinsics: CameraIntrinsicsBase): 40 | """Transform ``depth_img`` pixel value to 3D coordinates under cv space, using camera intrinsics. 41 | 42 | About different camera space: 43 | 44 | - cv space: x goes right, y goes down, and you look through the positive direction of z axis 45 | - blender camera space: x goes right, y goes up, and you look through the negative direction of z axis 46 | 47 | :param depth_img: 2D matrix with shape (H, W) 48 | :returns: coordinates of each pixel with shape (H, W, 3) 49 | """ 50 | 51 | # scale camera parameters 52 | h, w = depth_img.shape 53 | scale_x = w / intrinsics.width 54 | scale_y = h / intrinsics.height 55 | fx = intrinsics.fx * scale_x 56 | fy = intrinsics.fy * scale_y 57 | x_offset = intrinsics.cx * scale_x 58 | y_offset = intrinsics.cy * scale_y 59 | 60 | indices = np.indices((h, w), dtype=np.float32).transpose(1, 2, 0) 61 | z_e = depth_img 62 | x_e = (indices[..., 1] - x_offset) * z_e / fx 63 | y_e = (indices[..., 0] - y_offset) * z_e / fy 64 | xyz_img = np.stack([x_e, y_e, z_e], axis=-1) # Shape: [H x W x 3] 65 | return xyz_img 66 | 67 | 68 | def pixel2xyz(h: int, w: int, pixel: ndarray, intrinsics: CameraIntrinsicsBase): 69 | """ 70 | Transform `(pixel[0], pixel[1])` to normalized 3D vector under cv space, using camera intrinsics. 71 | 72 | :param h: height of the actual image 73 | :param w: width of the actual image 74 | """ 75 | 76 | # scale camera parameters 77 | scale_x = w / intrinsics.width 78 | scale_y = h / intrinsics.height 79 | fx = intrinsics.fx * scale_x 80 | fy = intrinsics.fy * scale_y 81 | x_offset = intrinsics.cx * scale_x 82 | y_offset = intrinsics.cy * scale_y 83 | 84 | x = (pixel[1] - x_offset) / fx 85 | y = (pixel[0] - y_offset) / fy 86 | vec = np.array([x, y, 1]) 87 | return vec / np.linalg.norm(vec) 88 | 89 | 90 | def transform_coordinates_3d(coordinates: ndarray, sRT: ndarray): 91 | """Apply 3D affine transformation to pointcloud. 92 | 93 | :param coordinates: ndarray of shape [3, N] 94 | :param sRT: ndarray of shape [4, 4] 95 | 96 | :returns: new pointcloud of shape [3, N] 97 | """ 98 | assert coordinates.shape[0] == 3 99 | coordinates = np.vstack( 100 | [coordinates, np.ones((1, coordinates.shape[1]), dtype=np.float32)] 101 | ) 102 | new_coordinates = sRT @ coordinates 103 | new_coordinates = new_coordinates[:3, :] / new_coordinates[3, :] 104 | return new_coordinates 105 | 106 | 107 | def calculate_2d_projections(coordinates_3d: ndarray, intrinsics_K: ndarray): 108 | """ 109 | :param coordinates_3d: [3, N] 110 | :param intrinsics_K: K matrix [3, 3] (the return value of :meth:`.data_types.CameraIntrinsicsBase.to_matrix`) 111 | 112 | :returns: projected_coordinates: [N, 2] 113 | """ 114 | projected_coordinates = intrinsics_K @ coordinates_3d 115 | projected_coordinates = projected_coordinates[:2, :] / projected_coordinates[2, :] 116 | projected_coordinates = projected_coordinates.transpose() 117 | projected_coordinates = np.array(projected_coordinates, dtype=np.int32) 118 | 119 | return projected_coordinates 120 | -------------------------------------------------------------------------------- /common/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper Functions 3 | ================ 4 | 5 | Common Helpers 6 | -------------- 7 | 8 | .. autofunction:: mock_time 9 | 10 | .. autofunction:: pool_map 11 | 12 | .. autofunction:: save_pctxt 13 | 14 | Visualization Helpers 15 | --------------------- 16 | 17 | .. autofunction:: draw_bboxes 18 | 19 | .. autofunction:: draw_text 20 | 21 | .. autofunction:: draw_object_label 22 | 23 | .. autofunction:: draw_3d_bbox 24 | 25 | .. autofunction:: draw_pose_axes 26 | 27 | """ 28 | 29 | from contextlib import contextmanager 30 | import logging 31 | import time 32 | from typing import Callable, TypeVar 33 | from tqdm import tqdm 34 | import multiprocessing.pool 35 | import cv2 36 | import numpy as np 37 | 38 | from .bbox import create_3d_bbox 39 | from .transform import ( 40 | calculate_2d_projections, 41 | transform_coordinates_3d, 42 | ) 43 | from .data_types import CameraIntrinsicsBase 44 | 45 | # used for drawing 46 | _bbox_coef = np.array( 47 | [ 48 | [1, 1, 1], 49 | [1, 1, -1], 50 | [-1, 1, 1], 51 | [-1, 1, -1], 52 | [1, -1, 1], 53 | [1, -1, -1], 54 | [-1, -1, 1], 55 | [-1, -1, -1], 56 | ] 57 | ) 58 | 59 | 60 | def _draw_seg( 61 | img: np.ndarray, 62 | start: np.ndarray, 63 | end: np.ndarray, 64 | start_color: np.ndarray, 65 | end_color: "np.ndarray | None" = None, 66 | thickness_coef=1, 67 | ): 68 | if end_color is None: 69 | end_color = start_color 70 | 71 | # automatically adjust line thickness according to image resolution 72 | thickness = round(img.shape[1] / 320 * thickness_coef) 73 | steps = 10 74 | step = (end - start) / steps 75 | s_step = (end_color - start_color) / steps 76 | for i in range(steps): 77 | x, y = start + (i * step), start + ((i + 1) * step) 78 | sx = start_color + ((i + 0.5) * s_step) 79 | x = tuple(x.astype(int)) 80 | y = tuple(y.astype(int)) 81 | sx = tuple(sx.astype(int)) 82 | # set_trace() 83 | img = cv2.line( 84 | np.ascontiguousarray(img.copy()), 85 | x, 86 | y, 87 | (int(sx[0]), int(sx[1]), int(sx[2])), 88 | thickness, 89 | ) 90 | 91 | return img 92 | 93 | 94 | def draw_bboxes(img, projected_bbox_Nx2, transformed_bbox_Nx3, color=None, thickness=1): 95 | """ 96 | :param color: can be a 3-element array/tuple. 97 | """ 98 | if color is None: 99 | colors = (_bbox_coef * 255 + 255) / 2 100 | else: 101 | colors = np.tile(np.array(color).reshape(3), [8, 1]) 102 | 103 | projected_bbox_Nx2 = np.int32(projected_bbox_Nx2).reshape(-1, 2) 104 | lines = [ 105 | [4, 5], 106 | [5, 7], 107 | [6, 4], 108 | [7, 6], 109 | [0, 4], 110 | [1, 5], 111 | [2, 6], 112 | [3, 7], 113 | [0, 1], 114 | [1, 3], 115 | [2, 0], 116 | [3, 2], 117 | ] 118 | # sort by distance 119 | lines.sort( 120 | reverse=True, 121 | key=lambda x: ( 122 | (transformed_bbox_Nx3[x[0]] + transformed_bbox_Nx3[x[1]]) ** 2 123 | ).sum(), 124 | ) 125 | for i, j in lines: 126 | img = _draw_seg( 127 | img, 128 | projected_bbox_Nx2[i], 129 | projected_bbox_Nx2[j], 130 | colors[i], 131 | colors[j], 132 | thickness_coef=thickness, 133 | ) 134 | 135 | return img 136 | 137 | 138 | def draw_text(img: np.ndarray, text: str, pos: np.ndarray) -> np.ndarray: 139 | """draw black text with red border""" 140 | text_args = { 141 | "fontFace": cv2.FONT_HERSHEY_SIMPLEX, 142 | "fontScale": 0.4 * img.shape[1] / 640, 143 | "lineType": cv2.LINE_AA, 144 | } 145 | img = cv2.putText( 146 | img, str(text), org=pos, color=(255, 0, 0), thickness=6, **text_args 147 | ) 148 | img = cv2.putText( 149 | img, str(text), org=pos, color=(0, 0, 0), thickness=1, **text_args 150 | ) 151 | return img 152 | 153 | 154 | def draw_3d_bbox( 155 | img: np.ndarray, 156 | intrinsics: CameraIntrinsicsBase, 157 | sRT_4x4: np.ndarray, 158 | bbox_side_len: "list[float]", 159 | color=None, 160 | thickness=1, 161 | ) -> np.ndarray: 162 | """Visualize predicted 3D bounding box. 163 | 164 | :param color: See :func:`draw_bboxes`. 165 | 166 | :return: The image with drawn bounding box. 167 | """ 168 | 169 | # set_trace() 170 | matK = intrinsics.to_matrix() 171 | h, w = img.shape[0], img.shape[1] 172 | scale_y = h / intrinsics.height 173 | scale_x = w / intrinsics.width 174 | scale_2d = np.array([scale_x, scale_y]) 175 | 176 | bbox_3d = create_3d_bbox(bbox_side_len) 177 | transformed_bbox_Nx3 = transform_coordinates_3d(bbox_3d, sRT_4x4) 178 | projected_bbox = calculate_2d_projections(transformed_bbox_Nx3, matK) 179 | projected_bbox = np.array(projected_bbox * scale_2d[None, ...], dtype=np.int32) 180 | img = draw_bboxes( 181 | img, projected_bbox, transformed_bbox_Nx3.T, color=color, thickness=thickness 182 | ) 183 | 184 | return img 185 | 186 | 187 | def draw_pose_axes( 188 | img: np.ndarray, 189 | intrinsics: CameraIntrinsicsBase, 190 | sRT_4x4: np.ndarray, 191 | length: float, 192 | thickness=1, 193 | ) -> np.ndarray: 194 | """Visualize predicted pose. The _XYZ_ axes are colored by _RGB_ respectively. 195 | 196 | :return: The image with drawn pose axes. 197 | """ 198 | 199 | # set_trace() 200 | matK = intrinsics.to_matrix() 201 | h, w = img.shape[0], img.shape[1] 202 | scale_y = h / intrinsics.height 203 | scale_x = w / intrinsics.width 204 | scale_2d = np.array([scale_x, scale_y]) 205 | 206 | axes_ends = transform_coordinates_3d( 207 | np.concatenate( 208 | [np.zeros((3, 1)), np.diag([length, length, length]) / 2], axis=1 209 | ), 210 | sRT_4x4, 211 | ) 212 | origin, ax, ay, az = np.array( 213 | calculate_2d_projections(axes_ends, matK) * scale_2d[None, ...], 214 | dtype=np.int32, 215 | ) 216 | img = _draw_seg(img, origin, ax, np.array([255, 0, 0]), thickness_coef=thickness) 217 | img = _draw_seg(img, origin, ay, np.array([0, 255, 0]), thickness_coef=thickness) 218 | img = _draw_seg(img, origin, az, np.array([0, 0, 255]), thickness_coef=thickness) 219 | 220 | return img 221 | 222 | 223 | def draw_object_label( 224 | img: np.ndarray, intrinsics: CameraIntrinsicsBase, sRT_4x4: np.ndarray, label: str 225 | ) -> np.ndarray: 226 | """draw label text at the center of the object. 227 | 228 | :return: The image with drawn object label. 229 | """ 230 | 231 | matK = intrinsics.to_matrix() 232 | h, w = img.shape[0], img.shape[1] 233 | scale_y = h / intrinsics.height 234 | scale_x = w / intrinsics.width 235 | scale_2d = np.array([scale_x, scale_y]) 236 | 237 | pos3d = sRT_4x4[:3, 3].reshape(3, 1) 238 | pos = np.array(calculate_2d_projections(pos3d, matK)[0] * scale_2d, dtype=np.int32) 239 | 240 | img = draw_text(img, label, pos) 241 | 242 | return img 243 | 244 | 245 | @contextmanager 246 | def mock_time( 247 | fmt: str, logger: "logging.Logger | None" = None, use_print=False, debug=False 248 | ): 249 | """Record time usage. 250 | 251 | :param fmt: info print format 252 | :param use_print: use `print` instead of `logger.info` to display information 253 | :param logger: specify logger. Defaults to the ``'timer' logger of root``. 254 | :param debug: logging using debug level instead of info level. 255 | 256 | .. testsetup:: 257 | 258 | from cutoop.utils import mock_time 259 | 260 | Usage: 261 | 262 | .. testcode:: 263 | 264 | with mock_time("action took %.4fs"): 265 | # some heavy operation 266 | pass 267 | 268 | """ 269 | if logger == None: 270 | logger = logging.root.getChild("timer") 271 | 272 | start = time.perf_counter() 273 | yield 274 | stop = time.perf_counter() 275 | 276 | if use_print: 277 | print(fmt % (stop - start)) 278 | elif debug: 279 | logger.debug(fmt, stop - start) 280 | else: 281 | logger.info(fmt, stop - start) 282 | 283 | 284 | def save_pctxt( 285 | file: str, 286 | pts: np.ndarray, 287 | rgb: "np.ndarray | None" = None, 288 | normal: "np.ndarray | None" = None, 289 | ): 290 | """Save point cloud array to file. 291 | 292 | :param pts: point cloud array with shape (n, 3) 293 | :param file: dest file name 294 | :param rgb: optional rgb data with shape (n, 3), **conflicting with normal**. 295 | The result format would be ``X Y Z R G B`` for one line. 296 | :param normal: optional normal data with shape (n, 3), **conflicting with rgb**. 297 | The result format would be ``X Y Z NX NY NZ`` for one line. 298 | """ 299 | x, y, z = pts.T 300 | if rgb is not None: 301 | r, g, b = rgb.T 302 | with open(file, "w") as f: 303 | for i in range(x.shape[0]): 304 | f.write(f"{x[i]} {y[i]} {z[i]} {r[i]} {g[i]} {b[i]}\n") 305 | elif normal is not None: 306 | r, g, b = normal.T 307 | with open(file, "w") as f: 308 | for i in range(x.shape[0]): 309 | f.write(f"{x[i]} {y[i]} {z[i]} {r[i]} {g[i]} {b[i]}\n") 310 | else: 311 | np.savetxt(file, pts, fmt="%.6f") 312 | 313 | 314 | _R = TypeVar("_R") 315 | 316 | 317 | def pool_map( 318 | func: Callable[..., _R], items: list, processes=8, use_tqdm=True, desc="pool map" 319 | ) -> "list[_R]": 320 | """Thread pooling version of `map`. Equivalent to ``map(func, items)``, except that 321 | the argument should be a list (implementing ``__len__``) instead of an iterator. 322 | 323 | :param processes: number of threads. 324 | :param use_tqdm: whether to display a tqdm bar. 325 | :param desc: set the title of tqdm progress bar if ``use_tqdm`` is set. 326 | 327 | .. doctest:: 328 | 329 | >>> def square(x): 330 | ... return x * x 331 | >>> cutoop.utils.pool_map(square, list(range(10))) 332 | [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] 333 | 334 | """ 335 | 336 | def run(pair): 337 | id, arg = pair 338 | return id, func(arg) 339 | 340 | with multiprocessing.pool.ThreadPool(processes=processes) as pool: 341 | mapper = pool.imap_unordered(run, enumerate(items)) 342 | ret = [None] * len(items) 343 | if use_tqdm: 344 | mapper = tqdm(mapper, total=len(items), desc=f"{desc} (proc={processes})") 345 | for id, r in mapper: 346 | ret[id] = r 347 | return ret 348 | --------------------------------------------------------------------------------