├── .editorconfig
├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── ho-cap-demo-all-cameras.gif
    ├── image_label_viewer.png
    ├── sequence_3d_viewer.gif
    ├── sequence_pose_viewer.png
    ├── sequence_renderer_color.png
    ├── sequence_renderer_mask.png
    └── vis_labels.png
├── config
    ├── .gitignore
    ├── benchmarks
    │   └── benchmark_downloader.py
    ├── hocap_benchmarks.yaml
    ├── hocap_hpe.json
    ├── hocap_info.yaml
    ├── hocap_odet.json
    ├── hocap_ope.json
    ├── hocap_recordings.yaml
    └── mano_info.yaml
├── datasets
    └── .gitignore
├── examples
    ├── evaluate_hand_pose.py
    ├── evaluate_object_detection.py
    ├── evaluate_object_pose.py
    ├── image_label_viewer.py
    ├── sequence_3d_viewer.py
    ├── sequence_pose_viewer.py
    └── sequence_renderer.py
├── hocap_toolkit
    ├── benchmarks
    │   ├── __init__.py
    │   └── groundtruth_generator.py
    ├── factory
    │   ├── __init__.py
    │   └── dataset_factory.py
    ├── layers
    │   ├── __init__.py
    │   ├── mano_group_layer.py
    │   ├── mano_layer.py
    │   ├── object_group_layer.py
    │   └── object_layer.py
    ├── loaders
    │   ├── __init__.py
    │   └── sequence_loader.py
    ├── renderers
    │   ├── __init__.py
    │   ├── renderer_pyrd.py
    │   └── sequence_renderer.py
    └── utils
    │   ├── __init__.py
    │   ├── color_info.py
    │   ├── common_imports.py
    │   ├── cv_utils.py
    │   ├── io.py
    │   ├── mano_info.py
    │   ├── misc.py
    │   └── transforms.py
├── pyproject.toml
├── results
    └── .gitignore
└── tools
    ├── hocap_dataset_split.py
    └── hocap_downloader.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # .editorconfig
 2 | # Check http://editorconfig.org for more information
 3 | # This file is for unifying the coding style for different editors and IDEs
 4 | 
 5 | # top-most EditorConfig file
 6 | root = true
 7 | 
 8 | # Unix-style newlines with a newline ending every file
 9 | [*]
10 | end_of_line = lf
11 | insert_final_newline = true
12 | trim_trailing_whitespace = true
13 | indent_style = space
14 | charset = utf-8
15 | 
16 | [*.py]
17 | indent_size = 4
18 | 
19 | [Makefile]
20 | indent_style = tab
21 | 
22 | [*.md]
23 | trim_trailing_whitespace = false
24 | tab_width = 2
25 | indent_size = 2
26 | 
27 | [*.{json,yml,yaml,xml,sh,launch}]
28 | indent_size = 2
29 | tab_width = 2
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.zip
 2 | 
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | *.egg-info/
13 | 
14 | # IDEs and editors
15 | .idea
16 | .eclipse
17 | .vscode
18 | 
19 | # Mac
20 | .DS_Store
21 | 
22 | # Environments
23 | .env
24 | .venv
25 | env/
26 | venv/
27 | ENV/
28 | env.bak/
29 | venv.bak/
30 | 
31 | # Jupyter Notebook
32 | .ipynb_checkpoints
33 | 
34 | # Others
35 | .*_history
36 | build.log
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HOCap Toolkit
  2 | 
  3 | [![Python 3.10](https://img.shields.io/badge/Python-3.10-3776AB.svg)](https://www.python.org/downloads/release/python-31015/) [![PyTorch 2.3.1](https://img.shields.io/badge/PyTorch-2.3.1-EE4C2C.svg)](https://pytorch.org/) [![CUDA 11.8](https://img.shields.io/badge/CUDA-11.8-76B900.svg)](https://developer.nvidia.com/cuda-toolkit/) [![ROS Melodic](https://img.shields.io/badge/ROS-Melodic-22314E.svg)](http://wiki.ros.org/melodic/) ![GPLv3.0 License](https://img.shields.io/badge/License-GPL--3.0-3DA639.svg)
  4 | 
  5 | The HOCap Toolkit is a Python package that provides evaluation and visualization tools for the HO-Cap dataset.
  6 | 
  7 | ---
  8 | 
  9 | **HO-Cap: A Capture System and Dataset for 3D Reconstruction and Pose Tracking of Hand-Object Interaction**
 10 | 
 11 | Jikai Wang, Qifan Zhang, Yu-Wei Chao, Bowen Wen, Xiaohu Guo, Yu Xiang
 12 | 
 13 | [ [arXiv](https://arxiv.org/abs/2406.06843) ] [ [Project page](https://irvlutd.github.io/HOCap/) ]
 14 | 
 15 | ![hocap-demo-video](./assets/ho-cap-demo-all-cameras.gif)
 16 | 
 17 | ---
 18 | 
 19 | ## Contents
 20 | 
 21 | - [HOCap Toolkit](#hocap-toolkit)
 22 |   - [Contents](#contents)
 23 |   - [News](#news)
 24 |   - [BibTeX Citation](#bibtex-citation)
 25 |     - [License](#license)
 26 |   - [Installation](#installation)
 27 |   - [Download the HOCap Dataset](#download-the-hocap-dataset)
 28 |   - [Labels in the HOCap Dataset](#labels-in-the-hocap-dataset)
 29 |   - [Loading Dataset and Visualizing Samples](#loading-dataset-and-visualizing-samples)
 30 |   - [Evaluation](#evaluation)
 31 |     - [Hand Pose Estimation Evaluation](#hand-pose-estimation-evaluation)
 32 |     - [Object Pose Estimation Evaluation](#object-pose-estimation-evaluation)
 33 |     - [Object Detection Evaluation](#object-detection-evaluation)
 34 |   - [HOCap Dataset Split for Training and Testing](#hocap-dataset-split-for-training-and-testing)
 35 | 
 36 | ## News
 37 | - :warning::warning: **2025-01-13**: We fixed the bug in image labels for "hand_joints_3d" and "hand_joints_2d". Please **re-download** the [labels](https://utdallas.box.com/s/ayd4st2wo588z2yqbuxalptxnz2qxlj5) and **regenerate** the HPE split dataset.
 38 | - **2025-01-13**: The code for image label visualization is added! Please check the [here](#loading-dataset-and-visualizing-samples) (item 4).
 39 | - **2024-12-15**: The training codes and datasets for YOLO11 and RT-DETR are added! Please check the [here](#training-yolo11-and-rt-detr-for-object-detection).
 40 | - **2024-12-15**: The Object Collection dataset is added! Please check the [project page](https://irvlutd.github.io/HOCap/) for more details.
 41 | - **2024-12-14**: The Object Collection dataset is added! Please check the [project page](https://irvlutd.github.io/HOCap/) for more details.
 42 | - **2024-12-14**: The HO-Cap dataset is updated! Please check the [project page](https://irvlutd.github.io/HOCap/) for more details.
 43 | - **2024-06-24**: The HO-Cap dataset is released! Please check the [project page](https://irvlutd.github.io/HOCap/) for more details.
 44 | 
 45 | ## BibTeX Citation
 46 | 
 47 | If HO-Cap helps your research, please consider citing the following:
 48 | 
 49 | ```
 50 | @misc{wang2024hocapcapturedataset3d,
 51 |       title={HO-Cap: A Capture System and Dataset for 3D Reconstruction and Pose Tracking of Hand-Object Interaction},
 52 |       author={Jikai Wang and Qifan Zhang and Yu-Wei Chao and Bowen Wen and Xiaohu Guo and Yu Xiang},
 53 |       year={2024},
 54 |       eprint={2406.06843},
 55 |       archivePrefix={arXiv},
 56 |       primaryClass={cs.CV},
 57 |       url={https://arxiv.org/abs/2406.06843},
 58 | }
 59 | ```
 60 | 
 61 | ### License
 62 | 
 63 | HOCap Toolkit is released under the [GNU General Public License v3.0](./LICENSE).
 64 | 
 65 | ## Installation
 66 | 
 67 | This code is tested with [Python 3.10](https://docs.python.org/3.10) and [CUDA 11.8](https://developer.nvidia.com/cuda-11-8-0-download-archive) on [Ubuntu 20.04](https://releases.ubuntu.com/focal/). **Make sure CUDA 11.8 is installed on your system before running the code.**
 68 | 
 69 | 1. Clone the HO-Cap repository from GitHub.
 70 | 
 71 |    ```bash
 72 |    git clone https://github.com/IRVLUTD/HO-Cap.git
 73 |    ```
 74 | 
 75 | 2. Change the current directory to the cloned repository.
 76 | 
 77 |    ```bash
 78 |    cd HO-Cap
 79 |    ```
 80 | 
 81 | 3. Create conda environment
 82 | 
 83 |    ```bash
 84 |    conda create -n hocap-toolkit python=3.10
 85 |    ```
 86 | 
 87 | 4. Activate conda environment
 88 | 
 89 |    ```bash
 90 |    conda activate hocap-toolkit
 91 |    ```
 92 | 
 93 | 5. Install Pytorch and torchvision
 94 | 
 95 |    ```bash
 96 |    python -m pip install torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118 --no-cache-dir
 97 |    ```
 98 | 
 99 | 6. Install hocap-toolkit package.
100 | 
101 |    ```bash
102 |    python -m pip install -e .
103 |    ```
104 | 
105 | 7. Download MANO models and code (`mano_v1_2.zip`) from the [MANO website](https://mano.is.tue.mpg.de) and place the extracted `.pkl` files under `config/mano_models` directory. The directory should look like this:
106 | 
107 |    ```
108 |    ./config/mano_models
109 |    ├── MANO_LEFT.pkl
110 |    └── MANO_RIGHT.pkl
111 |    ```
112 | 
113 | ## Download the HOCap Dataset
114 | 
115 | 1. Run below code to download the whole dataset:
116 | 
117 |    ```
118 |    python tools/hocap_downloader.py --subject_id all
119 |    ```
120 | 
121 | 2. Or you can download the dataset for a specific subject:
122 | 
123 |    ```
124 |    python tools/hocap_downloader.py --subject_id subject_1
125 |    ```
126 | 
127 | 3. The downloaded `.zip` files will be extracted to the `./datasets` directory. And the directory should look like this:
128 | 
129 |    ```bash
130 |    ./datasets
131 |    ├── calibration
132 |    ├── models
133 |    ├── subject_1
134 |    │   ├── 20231025_165502
135 |    │   │   ├── 037522251142
136 |    │   │   │   ├── color_000000.jpg
137 |    │   │   │   ├── depth_000000.png
138 |    │   │   │   ├── label_000000.npz
139 |    │   │   │   └── ...
140 |    │   │   ├── 043422252387
141 |    │   │   ├── ...
142 |    │   │   ├── hololens_kv5h72
143 |    │   │   ├── meta.yaml
144 |    │   │   ├── poses_m.npy
145 |    │   │   ├── poses_o.npy
146 |    │   │   └── poses_pv.npy
147 |    │   ├── 20231025_165502
148 |    │   └── ...
149 |    ├── ...
150 |    └── subject_9
151 |    ```
152 | 
153 | ## Labels in the HOCap Dataset
154 | 
155 | The HOCap dataset provides the following labels:
156 | 
157 | - 3d hand keypoints
158 | - 2d hand keypoints
159 | - hand bounding boxes
160 | - hand sides
161 | - hand MANO poses
162 | - object 6OD poses
163 | - segmentation masks
164 | 
165 | ![vis_labels](./assets/vis_labels.png)
166 | 
167 | ## Loading Dataset and Visualizing Samples
168 | 
169 | 1. Below example shows how to visualize the pose annotations of one frame:
170 | 
171 |    ```bash
172 |    python examples/sequence_pose_viewer.py
173 |    ```
174 | 
175 |    ![sequence_pose_viewer](./assets/sequence_pose_viewer.png)
176 | 
177 | 2. Below example shows how to visualize sequence by the interactive 3D viewer:
178 | 
179 |    ```bash
180 |    python examples/sequence_3d_viewer.py
181 |    ```
182 | 
183 |    ![sequence_3d_viewer](./assets/sequence_3d_viewer.gif)
184 | 
185 |    The 3D viewer provides the following functionalities:
186 | 
187 |    - `Background`: change the background color.
188 |    - `Point Size`: change the point size.
189 |    - `Show Skybox`: display/hide the skybox.
190 |    - `Show Axes`: display/hide the axes of world coordinate.
191 |    - `Crop Points`: crop the points outside the table area.
192 |    - `Point Clouds`: display/hide the point clouds.
193 |    - `Hand Mesh`: display/hide the hand mesh.
194 |    - `Object Mesh`: display/hide the object mesh.
195 |    - `Frame Slider`: change the frame index.
196 |    - `Reset`: reset the camera view and the frame index.
197 |    - `Pause/Play`: pause/play the sequence.
198 |    - `Exit`: close the viewer.
199 |    - `Help Tab`: show the help information.
200 | 
201 | 3. Below example shows how to offline render the sequence:
202 | 
203 |    ```bash
204 |    python examples/sequence_renderer.py
205 |    ```
206 | 
207 |    This will render the color image and segmentation map for all the frames in the sequence. The rendered images will be saved in the `<sequence_folder>/renders/` directory.
208 | 
209 |    ![sequence_renderer_color](./assets/sequence_renderer_color.png)
210 |    ![sequence_renderer_mask](./assets/sequence_renderer_mask.png)
211 | 
212 | 4. Below example shows how to visualize the image labels:
213 | 
214 |    ```bash
215 |    python examples/image_label_viewer.py
216 |    ```
217 | 
218 |    ![image_label_viewer](./assets/image_label_viewer.png)
219 | 
220 | ## Evaluation
221 | 
222 | HO-Cap provides the benchmark evaluation for three tasks:
223 | 
224 | - **Hand Pose Estimation (HPE)** (A2J-Transformer[^1] and HaMeR[^2])
225 | - **Object Pose Estimation (OPE)** (MegaPose[^3] and FoundationPose[^4])
226 | - **Object Detection (ODET)** (CNOS[^5], GroundingDINO[^6], YOLO11[^7] and RT-DETR[^8]).
227 | 
228 | Run below code to download the example evaluation results:
229 | 
230 | ```bash
231 | python config/benchmarks/benchmark_downloader.py
232 | ```
233 | 
234 | If the evaluation results are saved in the same format, the evaluation codes below can be used to evaluate the results.
235 | 
236 | ### Hand Pose Estimation Evaluation
237 | 
238 | - Evaluate the hand pose estimation performance:
239 | 
240 |   ```bash
241 |   python examples/evaluate_hand_pose.py
242 |   ```
243 | 
244 |    <details>
245 |    <summary> You should see the following output: </summary>
246 | 
247 |   ```
248 |   PCK (0.05)  PCK (0.10)  PCK (0.15)  PCK (0.20)  MPJPE (mm)
249 |    45.319048   81.247619   91.357143   95.080952   25.657379
250 |   ```
251 | 
252 |    </details>
253 | 
254 | ### Object Pose Estimation Evaluation
255 | 
256 | - Evaluate the novel object pose estimation performance:
257 | 
258 |   ```bash
259 |   python examples/evaluate_object_pose.py
260 |   ```
261 | 
262 |    <details>
263 |    <summary> You should see the following output: </summary>
264 | 
265 |   ```
266 |         Object_ID  ADD-S_err (cm)    ADD_err (cm)   ADD-S_AUC (%)     ADD_AUC (%)
267 |   |-------------- |-------------- |-------------- |-------------- |-------------- |
268 |             G01_1            0.42            0.72           95.79           92.82
269 |             G01_2            0.37            0.69           96.39           93.38
270 |             G01_3            0.45            0.82           95.72           92.08
271 |             G01_4            0.61            2.73           94.14           74.19
272 |           Average            0.46            1.24           95.43           88.04
273 |   ```
274 | 
275 |    </details>
276 | 
277 | ### Object Detection Evaluation
278 | 
279 | - Evaluate the object detection performance:
280 | 
281 |   ```bash
282 |   python examples/evaluate_object_detection.py
283 |   ```
284 | 
285 |    <details>
286 |    <summary> You should see the following output: (click to expand) </summary>
287 | 
288 |   ```
289 |   Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.016
290 |   Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.023
291 |   Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.018
292 |   Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.002
293 |   Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.018
294 |   Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.014
295 |   Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.036
296 |   Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.036
297 |   Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.036
298 |   Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.005
299 |   Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.037
300 |   Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.017
301 |   AP: 0.016 | AP_50: 0.023 | AP_75: 0.018 | AP_s: 0.002 | AP_m: 0.018 | AP_l: 0.014
302 |   ```
303 | 
304 |    </details>
305 | 
306 | ## HOCap Dataset Split for Training and Testing
307 | 
308 | The train/valid/test split is defined separately for each task (HPE, ODET, OPE) by files `config/hocap_hpt.json`, `config/hocap_odt.json`, and `config/hocap_ope.json`. Each configuration file has the following structure:
309 | 
310 | ```json
311 | {
312 |   "train": [[0, 0, 0, 0], ...],
313 |   "valid": [...],
314 |   "test": [...]
315 | }
316 | ```
317 | 
318 | Each item is in format `[subject_index, sequence_index, camera_index, frame_index]`. For example, `[0, 0, 0, 0]` refers to `subject_1/20231022_190534/105322251564` folder and frame `color_000000.jpg`/ `depth_000000.png`.
319 | 
320 | To save time, we provide the pre-defined splits for each task, the split datasets could be downloaded [here](https://utdallas.box.com/s/dt19tcvhwitz223cjqa5riot6zcf6yba).
321 | 
322 | Or run below code to split the HOCap dataset manually, the split dataset will be saved in the `./datasets` directory.
323 | 
324 | - Hand Pose Estimation (HPE) task:
325 | 
326 |   ```bash
327 |   python tools/hocap_dataset_split.py --task hpe
328 |   ```
329 | 
330 | - Object Pose Estimation (OPE) task:
331 | 
332 |   ```bash
333 |   python tools/hocap_dataset_split.py --task ope
334 |   ```
335 | 
336 | - Object Detection (ODET) task:
337 |   - COCO annotation type:
338 |     ```bash
339 |     python tools/hocap_dataset_split.py --task odet --anno_type coco
340 |     ```
341 |   - YOLO annotation type:
342 |     ```bash
343 |     python tools/hocap_dataset_split.py --task odet --anno_type yolo
344 |     ```
345 | 
346 | [^1]: [A2J-Transformer: Anchor-to-Joint Transformer Network for 3D Interacting Hand Pose Estimation from a Single RGB Image](https://arxiv.org/abs/2304.03635)
347 | [^2]: [Reconstructing Hands in 3D with Transformers](https://arxiv.org/abs/2312.05251)
348 | [^3]: [MegaPose: 6D Pose Estimation of Novel Objects via Render & Compare](https://arxiv.org/abs/2212.06870)
349 | [^4]: [FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects](https://arxiv.org/abs/2312.08344)
350 | [^5]: [CNOS: A Strong Baseline for CAD-based Novel Object Segmentation](http://arxiv.org/abs/2307.11067)
351 | [^6]: [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499)
352 | [^7]: [YOLOv11: An Overview of the Key Architectural Enhancements](https://arxiv.org/html/2410.17725v1)
353 | [^8]: [DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069)
354 | 


--------------------------------------------------------------------------------
/assets/ho-cap-demo-all-cameras.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/ho-cap-demo-all-cameras.gif


--------------------------------------------------------------------------------
/assets/image_label_viewer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/image_label_viewer.png


--------------------------------------------------------------------------------
/assets/sequence_3d_viewer.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/sequence_3d_viewer.gif


--------------------------------------------------------------------------------
/assets/sequence_pose_viewer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/sequence_pose_viewer.png


--------------------------------------------------------------------------------
/assets/sequence_renderer_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/sequence_renderer_color.png


--------------------------------------------------------------------------------
/assets/sequence_renderer_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/sequence_renderer_mask.png


--------------------------------------------------------------------------------
/assets/vis_labels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/assets/vis_labels.png


--------------------------------------------------------------------------------
/config/.gitignore:
--------------------------------------------------------------------------------
1 | *.task
2 | *.ckpt
3 | *.pth
4 | *.pkl
5 | benchmarks/*[.json, .txt]


--------------------------------------------------------------------------------
/config/benchmarks/benchmark_downloader.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from hocap_toolkit.utils import *
 3 | 
 4 | PROJ_ROOT = Path(__file__).parent.parent.parent
 5 | 
 6 | 
 7 | def download_box_file(box_link, output_file):
 8 |     output_path = Path(output_file)
 9 |     file_name = output_file.name
10 | 
11 |     resume_header = {}
12 |     downloaded_size = 0
13 | 
14 |     with requests.get(box_link, headers=resume_header, stream=True) as response:
15 |         # Check if the request was successful
16 |         if response.status_code == 200:
17 |             total_size = int(response.headers.get("content-length", 0))
18 |         else:
19 |             print(f"Failed to retrieve file info. Status code: {response.status_code}")
20 |             return
21 | 
22 |     if output_path.exists():
23 |         downloaded_size = output_path.stat().st_size
24 |         # Check if there's a partial download and get its size
25 |         resume_header = {"Range": f"bytes={downloaded_size}-"}
26 | 
27 |     # Check if the file is already fully downloaded
28 |     if downloaded_size == total_size:
29 |         tqdm.write(f"  ** {file_name} is already downloaded.")
30 |         return
31 | 
32 |     # Send a GET request with the range header if needed
33 |     with requests.get(box_link, headers=resume_header, stream=True) as response:
34 |         # Check if the request was successful
35 |         if response.status_code in [200, 206]:
36 |             # Initialize tqdm progress bar
37 |             with tqdm(
38 |                 total=total_size,
39 |                 initial=downloaded_size,
40 |                 unit="B",
41 |                 unit_scale=True,
42 |                 ncols=80,
43 |             ) as pbar:
44 |                 # Download the file in chunks
45 |                 with output_path.open("ab") as file:
46 |                     for chunk in response.iter_content(
47 |                         chunk_size=1024 * 1024
48 |                     ):  # 1 MB chunks
49 |                         if chunk:
50 |                             file.write(chunk)
51 |                             pbar.update(len(chunk))
52 |         else:
53 |             print(f"Failed to download file. Status code: {response.status_code}")
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     behchmark_data = read_data_from_yaml("config/hocap_benchmarks.yaml")
58 | 
59 |     for file_name, file_link in behchmark_data.items():
60 |         tqdm.write(f"- Downloading {file_name}...")
61 |         if "demo" in file_name:
62 |             save_path = PROJ_ROOT / "results" / f"{file_name}.json"
63 |         else:
64 |             save_path = PROJ_ROOT / "config" / "benchmarks" / f"{file_name}.json"
65 |         download_box_file(file_link, save_path)
66 | 


--------------------------------------------------------------------------------
/config/hocap_benchmarks.yaml:
--------------------------------------------------------------------------------
1 | hpe_gt: https://utdallas.box.com/shared/static/bjt8jty6ngwjj76nbuj8tncpnfnjrp2i.json
2 | odet_gt_images: https://utdallas.box.com/shared/static/6hd03ii4fzpqgjq1d8jekrp94ohvfo8z.json
3 | odet_gt: https://utdallas.box.com/shared/static/iq005s6yc8g3ktc08wfi28vxu7lerime.json
4 | ope_gt: https://utdallas.box.com/shared/static/w3akltk94wnz5nj25r371rpqbemmkd53.json
5 | hpe_demo: https://utdallas.box.com/shared/static/evtuw2iyk4okkpuv23z4ur0r4you2w74.json
6 | odet_demo: https://utdallas.box.com/shared/static/024x6jcjgfhu0dum89shy5urxgy5fmps.json
7 | ope_demo: https://utdallas.box.com/shared/static/mg67undy02c0roeasxv3hnjalg9059n5.json
8 | 


--------------------------------------------------------------------------------
/config/hocap_info.yaml:
--------------------------------------------------------------------------------
  1 | subject_ids:
  2 |   - subject_1
  3 |   - subject_2
  4 |   - subject_3
  5 |   - subject_4
  6 |   - subject_5
  7 |   - subject_6
  8 |   - subject_7
  9 |   - subject_8
 10 |   - subject_9
 11 | object_classes:
 12 |   - G01_1
 13 |   - G01_2
 14 |   - G01_3
 15 |   - G01_4
 16 |   - G02_1
 17 |   - G02_2
 18 |   - G02_3
 19 |   - G02_4
 20 |   - G04_1
 21 |   - G04_2
 22 |   - G04_3
 23 |   - G04_4
 24 |   - G05_1
 25 |   - G05_2
 26 |   - G05_3
 27 |   - G05_4
 28 |   - G06_1
 29 |   - G06_2
 30 |   - G06_3
 31 |   - G06_4
 32 |   - G07_1
 33 |   - G07_2
 34 |   - G07_3
 35 |   - G07_4
 36 |   - G09_1
 37 |   - G09_2
 38 |   - G09_3
 39 |   - G09_4
 40 |   - G10_1
 41 |   - G10_2
 42 |   - G10_3
 43 |   - G10_4
 44 |   - G11_1
 45 |   - G11_2
 46 |   - G11_3
 47 |   - G11_4
 48 |   - G15_1
 49 |   - G15_2
 50 |   - G15_3
 51 |   - G15_4
 52 |   - G16_1
 53 |   - G16_2
 54 |   - G16_3
 55 |   - G16_4
 56 |   - G18_1
 57 |   - G18_2
 58 |   - G18_3
 59 |   - G18_4
 60 |   - G19_1
 61 |   - G19_2
 62 |   - G19_3
 63 |   - G19_4
 64 |   - G20_1
 65 |   - G20_2
 66 |   - G20_3
 67 |   - G20_4
 68 |   - G21_1
 69 |   - G21_2
 70 |   - G21_3
 71 |   - G21_4
 72 |   - G22_1
 73 |   - G22_2
 74 |   - G22_3
 75 |   - G22_4
 76 |   - RIGHT_HAND
 77 |   - LEFT_HAND
 78 | object_descriptors:
 79 |   G01_1: fruit_snacks
 80 |   G01_2: water_softener_bottle
 81 |   G01_3: coconut_milk_carton
 82 |   G01_4: hammer
 83 |   G02_1: whole_milk_carton
 84 |   G02_2: cooked_ham
 85 |   G02_3: chocolate_drink_powder
 86 |   G02_4: sauce_bottle
 87 |   G04_1: flapjack_mix
 88 |   G04_2: chocolate_fudge
 89 |   G04_3: herring_fillets_can
 90 |   G04_4: crackers_box
 91 |   G05_1: vegetable_oil_spread
 92 |   G05_2: body_lotion
 93 |   G05_3: peanut_chocolate_box
 94 |   G05_4: sponge
 95 |   G06_1: dvd
 96 |   G06_2: pumpkin_creamer
 97 |   G06_3: yellow_mustard
 98 |   G06_4: dish_brush
 99 |   G07_1: game_controller
100 |   G07_2: hot_cocoa_mix
101 |   G07_3: dandruff_shampoo
102 |   G07_4: toy_axe
103 |   G09_1: candy_box
104 |   G09_2: toy_car
105 |   G09_3: toothpaste_box
106 |   G09_4: chocolate_syrup_bottle
107 |   G10_1: soup_mix
108 |   G10_2: gel_toothpaste
109 |   G10_3: chocolate_biscuit_sticks
110 |   G10_4: body_wash
111 |   G11_1: coconut_water
112 |   G11_2: baby_powder
113 |   G11_3: baking_soda
114 |   G11_4: chocolate_bar
115 |   G15_1: mens_body_wash
116 |   G15_2: dandelion_tea
117 |   G15_3: cooking_spray
118 |   G15_4: joy_controller
119 |   G16_1: toilet_cleaner
120 |   G16_2: laundry_detergent
121 |   G16_3: small_coconut_water
122 |   G16_4: fabric_softener_sheets
123 |   G18_1: green_tea_latte_mix
124 |   G18_2: projector_remote
125 |   G18_3: right_shoe
126 |   G18_4: left_shoe
127 |   G19_1: electric_screwdriver
128 |   G19_2: blue_spatula
129 |   G19_3: deodorant
130 |   G19_4: ping_pong_paddle
131 |   G20_1: cappuccino_mix
132 |   G20_2: mustard_bottle
133 |   G20_3: toilet_cleaner
134 |   G20_4: dog_toy_bone
135 |   G21_1: moisturizing_lotion
136 |   G21_2: playing_cards
137 |   G21_3: pink_spatula
138 |   G21_4: blue_brush
139 |   G22_1: chocolate_bar
140 |   G22_2: mayomust_sauce
141 |   G22_3: soup_mix
142 |   G22_4: gray_spatula
143 |   RIGHT_HAND: right_hand
144 |   LEFT_HAND: left_hand
145 | sequence_ids:
146 |   - '20231022_190534'
147 |   - '20231022_192832'
148 |   - '20231022_193506'
149 |   - '20231022_193630'
150 |   - '20231022_193809'
151 |   - '20231022_200657'
152 |   - '20231022_201316'
153 |   - '20231022_201449'
154 |   - '20231022_201556'
155 |   - '20231022_201942'
156 |   - '20231022_202115'
157 |   - '20231022_202617'
158 |   - '20231022_203100'
159 |   - '20231023_162803'
160 |   - '20231023_163653'
161 |   - '20231023_163929'
162 |   - '20231023_164242'
163 |   - '20231023_164741'
164 |   - '20231023_170018'
165 |   - '20231024_154531'
166 |   - '20231024_154810'
167 |   - '20231024_155008'
168 |   - '20231024_161209'
169 |   - '20231024_161306'
170 |   - '20231024_161937'
171 |   - '20231024_162028'
172 |   - '20231024_162327'
173 |   - '20231024_162409'
174 |   - '20231024_162756'
175 |   - '20231024_162842'
176 |   - '20231024_180111'
177 |   - '20231024_180651'
178 |   - '20231024_180733'
179 |   - '20231024_181413'
180 |   - '20231025_110646'
181 |   - '20231025_110808'
182 |   - '20231025_111118'
183 |   - '20231025_111357'
184 |   - '20231025_112229'
185 |   - '20231025_112332'
186 |   - '20231025_112546'
187 |   - '20231025_165502'
188 |   - '20231025_165807'
189 |   - '20231025_170105'
190 |   - '20231025_170231'
191 |   - '20231025_170650'
192 |   - '20231025_170959'
193 |   - '20231025_171117'
194 |   - '20231026_162155'
195 |   - '20231026_162248'
196 |   - '20231026_163223'
197 |   - '20231026_164131'
198 |   - '20231026_164812'
199 |   - '20231026_164958'
200 |   - '20231027_112303'
201 |   - '20231027_113202'
202 |   - '20231027_113535'
203 |   - '20231027_123403'
204 |   - '20231027_123725'
205 |   - '20231027_123814'
206 |   - '20231027_124057'
207 |   - '20231027_124926'
208 |   - '20231027_125019'
209 |   - '20231027_125315'
210 | device_serials:
211 |   - '105322251564'
212 |   - '043422252387'
213 |   - '037522251142'
214 |   - '105322251225'
215 |   - '108222250342'
216 |   - '117222250549'
217 |   - '046122250168'
218 |   - '115422250549'
219 |   - hololens_kv5h72
220 | 


--------------------------------------------------------------------------------
/config/hocap_recordings.yaml:
--------------------------------------------------------------------------------
 1 | models: https://utdallas.box.com/shared/static/con44iqej33weg9f3rpxof61eh3x2x21.zip
 2 | calibration: https://utdallas.box.com/shared/static/nlp4c6vtd0n8o0entxlh1vxdpcdeh0h8.zip
 3 | subject_1: https://utdallas.box.com/shared/static/w0voy9bixtxyclo52841xyamock2lxpt.zip
 4 | subject_2: https://utdallas.box.com/shared/static/j498kxxrkvaf674tvmt4su4ad0bz9s9f.zip
 5 | subject_3: https://utdallas.box.com/shared/static/shklq33yaoozh9gm681nxwnq0o3y0y1d.zip
 6 | subject_4: https://utdallas.box.com/shared/static/dew68k7b3ya09t40818gpfxm95oa4yeq.zip
 7 | subject_5: https://utdallas.box.com/shared/static/mutor2a09kudze1yw173gsfetsru7ces.zip
 8 | subject_6: https://utdallas.box.com/shared/static/iyja7rdbjx2ksgjhmdu6mx3zqvaurdni.zip
 9 | subject_7: https://utdallas.box.com/shared/static/4g5qyig6i4uz1rgrzkcu9n4mhdjs74m2.zip
10 | subject_8: https://utdallas.box.com/shared/static/khrb5guy8rdwnoqi4euk2w0mk5lslxkn.zip
11 | subject_9: https://utdallas.box.com/shared/static/3x5yitydmbmwolq9bty5dd2udu5v52fc.zip
12 | poses: https://utdallas.box.com/shared/static/2lofbp2yd005d8o213ns77mdrtxg8eep.zip
13 | labels: https://utdallas.box.com/shared/static/ayd4st2wo588z2yqbuxalptxnz2qxlj5.zip
14 | 


--------------------------------------------------------------------------------
/config/mano_info.yaml:
--------------------------------------------------------------------------------
 1 | joint_names:
 2 |   - wrist
 3 |   - thumb_mcp
 4 |   - thumb_pip
 5 |   - thumb_dip
 6 |   - thumb_tip
 7 |   - index_mcp
 8 |   - index_pip
 9 |   - index_dip
10 |   - index_tip
11 |   - middle_mcp
12 |   - middle_pip
13 |   - middle_dip
14 |   - middle_tip
15 |   - ring_mcp
16 |   - ring_pip
17 |   - ring_dip
18 |   - ring_tip
19 |   - little_mcp
20 |   - little_pip
21 |   - little_dip
22 |   - little_tip
23 | joint_connections:
24 |   -   - 0
25 |       - 1
26 |   -   - 1
27 |       - 2
28 |   -   - 2
29 |       - 3
30 |   -   - 3
31 |       - 4
32 |   -   - 0
33 |       - 5
34 |   -   - 5
35 |       - 6
36 |   -   - 6
37 |       - 7
38 |   -   - 7
39 |       - 8
40 |   -   - 0
41 |       - 9
42 |   -   - 9
43 |       - 10
44 |   -   - 10
45 |       - 11
46 |   -   - 11
47 |       - 12
48 |   -   - 0
49 |       - 13
50 |   -   - 13
51 |       - 14
52 |   -   - 14
53 |       - 15
54 |   -   - 15
55 |       - 16
56 |   -   - 0
57 |       - 17
58 |   -   - 17
59 |       - 18
60 |   -   - 18
61 |       - 19
62 |   -   - 19
63 |       - 20
64 | 


--------------------------------------------------------------------------------
/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/examples/evaluate_hand_pose.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from hocap_toolkit.utils import *
  3 | 
  4 | PCK_THRESH = [0.05, 0.1, 0.15, 0.2]  # Distance threshold for PCK calculation
  5 | 
  6 | r_or_l = ["left", "right"]
  7 | 
  8 | 
  9 | def calculate_mpjpe_3d(predicted, ground_truth):
 10 |     """
 11 |     Calculate the Mean Per Joint Position Error (MPJPE) between predicted and ground truth 3D joint positions.
 12 | 
 13 |     Parameters:
 14 |     predicted (numpy.ndarray): The predicted 3D joint positions with shape (N, J, 3).
 15 |     ground_truth (numpy.ndarray): The ground truth 3D joint positions with shape (N, J, 3).
 16 | 
 17 |     Returns:
 18 |     float: The MPJPE value.
 19 |     """
 20 |     # Calculate the Euclidean distance between the predicted and ground truth positions for each joint
 21 |     errors = np.linalg.norm(predicted - ground_truth, axis=2)
 22 | 
 23 |     # Calculate the mean distance across all joints for each sample
 24 |     sample_mpjpes = np.mean(errors, axis=1)
 25 | 
 26 |     # Calculate the mean MPJPE across all samples
 27 |     mpjpe = np.mean(sample_mpjpes)
 28 | 
 29 |     return mpjpe
 30 | 
 31 | 
 32 | # def calculate_pck(predicted, ground_truth, bboxes, threshold, normalize):
 33 | #     """
 34 | #     Calculate the Percentage of Correct Keypoints (PCK) for 2D hand pose estimation.
 35 | 
 36 | #     Parameters:
 37 | #     predicted (numpy.ndarray): The predicted 2D joint positions with shape (N, J, 2).
 38 | #     ground_truth (numpy.ndarray): The ground truth 2D joint positions with shape (N, J, 2).
 39 | #     bboxes (numpy.ndarray): Bounding boxes of the hands with shape (N, 4).
 40 | #     threshold (float): The distance threshold within which a predicted keypoint is considered correct.
 41 | #     normalize (numpy.ndarray): Normalization factors for distances with shape (N, 2).
 42 | 
 43 | #     Returns:
 44 | #     float: The PCK value (percentage of correct keypoints).
 45 | #     """
 46 | #     N, K, _ = predicted.shape
 47 | 
 48 | #     predicted = predicted.astype(np.float32)
 49 | #     ground_truth = ground_truth.astype(np.float32)
 50 | #     normalize = normalize.astype(np.float32)
 51 | 
 52 | #     box_s = np.zeros((N, 2), dtype=np.float32)
 53 | #     for i in range(N):
 54 | #         box_s[i, 0] = bboxes[i, 2] - bboxes[i, 0]
 55 | #         box_s[i, 1] = bboxes[i, 3] - bboxes[i, 1]
 56 | 
 57 | #     distances = np.linalg.norm((predicted - ground_truth) / box_s[:, None, :], axis=-1)
 58 | 
 59 | #     acc = np.array([acc_distance(d, threshold) for d in distances.T])
 60 | #     valid_acc = acc[acc >= 0]
 61 | #     cnt = len(valid_acc)
 62 | #     avg_acc = valid_acc.mean() if cnt > 0 else 0
 63 | 
 64 | #     return avg_acc * 100
 65 | 
 66 | 
 67 | def calculate_pck(predicted, ground_truth, bboxes, thresholds, normalize):
 68 |     """
 69 |     Calculate the Percentage of Correct Keypoints (PCK) for 2D hand pose estimation.
 70 | 
 71 |     Parameters:
 72 |     predicted (numpy.ndarray): The predicted 2D joint positions with shape (N, J, 2).
 73 |     ground_truth (numpy.ndarray): The ground truth 2D joint positions with shape (N, J, 2).
 74 |     bboxes (numpy.ndarray): Bounding boxes of the hands with shape (N, 4).
 75 |     thresholds (list[float]): A list of distance thresholds within which a predicted keypoint is considered correct.
 76 |     normalize (numpy.ndarray): Normalization factors for distances with shape (N, 2).
 77 | 
 78 |     Returns:
 79 |     dict: A dictionary where the keys are thresholds and the values are the PCK values for each threshold.
 80 |     """
 81 |     N, K, _ = predicted.shape
 82 | 
 83 |     predicted = predicted.astype(np.float32)
 84 |     ground_truth = ground_truth.astype(np.float32)
 85 |     normalize = normalize.astype(np.float32)
 86 | 
 87 |     box_s = np.zeros((N, 2), dtype=np.float32)
 88 |     for i in range(N):
 89 |         box_s[i, 0] = bboxes[i, 2] - bboxes[i, 0]
 90 |         box_s[i, 1] = bboxes[i, 3] - bboxes[i, 1]
 91 | 
 92 |     # Normalize the predicted and ground truth keypoints
 93 |     distances = np.linalg.norm((predicted - ground_truth) / box_s[:, None, :], axis=-1)
 94 | 
 95 |     pck_results = []
 96 |     for threshold in thresholds:
 97 |         acc = np.array([acc_distance(d, threshold) for d in distances.T])
 98 |         valid_acc = acc[acc >= 0]
 99 |         cnt = len(valid_acc)
100 |         avg_acc = valid_acc.mean() if cnt > 0 else 0
101 |         pck_results.append(avg_acc)
102 | 
103 |     return pck_results
104 | 
105 | 
106 | def acc_distance(distances, thr=0.5):
107 |     """
108 |     Return the percentage below the distance threshold, while ignoring
109 |     distances values with -1.
110 | 
111 |     Parameters:
112 |     distances (np.ndarray[N, ]): The normalized distances.
113 |     thr (float): Threshold of the distances.
114 | 
115 |     Returns:
116 |     float: Percentage of distances below the threshold.
117 |            If all target keypoints are missing, return -1.
118 |     """
119 |     distance_valid = distances != -1
120 |     num_distance_valid = distance_valid.sum()
121 |     if num_distance_valid > 0:
122 |         return (distances[distance_valid] < thr).sum() / num_distance_valid
123 |     return -1
124 | 
125 | 
126 | def get_hand_pose_evaluation(gt_file, pred_file):
127 |     gt_result_file = Path(gt_file)
128 |     pred_result_file = Path(pred_file)
129 | 
130 |     hand_json = read_data_from_json(gt_result_file)
131 |     hamer_out_json = read_data_from_json(pred_result_file)
132 | 
133 |     all_pred_keypoints_3d = []
134 |     all_gt_keypoints_3d_full = []
135 |     all_gt_keypoints_2d_full = []
136 |     all_pred_keypoints_2d_full = []
137 |     all_gt_bboxes = []
138 | 
139 |     for out_id, out_data in hamer_out_json.items():
140 |         if not out_data:
141 |             continue
142 |         gt_data = hand_json[out_id]
143 |         is_right = np.array(out_data["is_right"], dtype=bool)
144 |         pred_keypoints_2d_full = out_data["landmarks_2d"]
145 |         pred_keypoints_3d = out_data["landmarks_3d"]
146 | 
147 |         gt_keypoints_2d_full = []
148 |         gt_bboxes = []
149 |         gt_keypoints_3d_full = []
150 |         pred_keypoints_3d_r_and_l = []
151 |         pred_keypoints_2d_r_and_l = []
152 | 
153 |         for n in range(is_right.shape[0]):
154 |             rl = r_or_l[int(is_right[n])]
155 | 
156 |             gt_s_keypoints_2d_full = gt_data["landmarks_2d"][rl]
157 |             gt_keypoints_2d_full.append(np.array(gt_s_keypoints_2d_full))
158 |             gt_s_bboxes = np.array(gt_data["bbox"][rl])
159 |             gt_bboxes.append(gt_s_bboxes)
160 | 
161 |             gt_s_keypoints_3d_full = gt_data["landmarks_3d"][rl]
162 |             gt_keypoints_3d_full.append(np.array(gt_s_keypoints_3d_full))
163 | 
164 |             pred_keypoints_2d_r_and_l.append(np.array(pred_keypoints_2d_full[rl]))
165 | 
166 |         gt_keypoints_2d_full = np.stack(gt_keypoints_2d_full)
167 |         gt_bboxes = np.stack(gt_bboxes)
168 | 
169 |         gt_keypoints_3d_full = np.stack(gt_keypoints_3d_full) * 1000
170 | 
171 |         for n in range(is_right.shape[0]):
172 |             rl = r_or_l[int(is_right[n])]
173 |             pred_keypoints_3d[rl] = np.array(pred_keypoints_3d[rl])
174 |             if not is_right[n]:
175 |                 pred_keypoints_3d[rl][:, 0] = -pred_keypoints_3d[rl][:, 0]
176 | 
177 |             align = pred_keypoints_3d[rl][0] - gt_keypoints_3d_full[n][0]
178 |             gt_keypoints_3d_full[n] += align
179 | 
180 |             pred_keypoints_3d_r_and_l.append(pred_keypoints_3d[rl])
181 | 
182 |         all_pred_keypoints_3d.append(np.stack(pred_keypoints_3d_r_and_l))
183 |         all_gt_keypoints_3d_full.append(gt_keypoints_3d_full)
184 | 
185 |         all_pred_keypoints_2d_full.append(np.stack(pred_keypoints_2d_r_and_l))
186 |         all_gt_keypoints_2d_full.append(gt_keypoints_2d_full)
187 | 
188 |         all_gt_bboxes.append(gt_bboxes)
189 | 
190 |     all_pred_keypoints_3d = np.concatenate(all_pred_keypoints_3d, axis=0)
191 |     all_gt_keypoints_3d_full = np.concatenate(all_gt_keypoints_3d_full, axis=0)
192 | 
193 |     all_pred_keypoints_2d_full = np.concatenate(all_pred_keypoints_2d_full, axis=0)
194 |     all_gt_keypoints_2d_full = np.concatenate(all_gt_keypoints_2d_full, axis=0)
195 | 
196 |     all_gt_bboxes = np.concatenate(all_gt_bboxes, axis=0)
197 | 
198 |     # Calculate PCK for each threshold in PCK_THRESH
199 |     pcks = calculate_pck(
200 |         all_pred_keypoints_2d_full,
201 |         all_gt_keypoints_2d_full,
202 |         all_gt_bboxes,
203 |         PCK_THRESH,
204 |         normalize=np.ones((len(all_pred_keypoints_2d_full), 2)),
205 |     )
206 | 
207 |     # Calculate MPJPE
208 |     mpjpe = calculate_mpjpe_3d(all_pred_keypoints_3d, all_gt_keypoints_3d_full)
209 | 
210 |     # Prepare data for the DataFrame
211 |     pd_data = {}
212 |     for i, thresh in enumerate(PCK_THRESH):
213 |         pd_data[f"PCK ({thresh:.2f})"] = pcks[i] * 100
214 | 
215 |     pd_data["MPJPE (mm)"] = mpjpe
216 | 
217 |     # Convert the data to a DataFrame and print it
218 |     df = pd.DataFrame([pd_data])
219 |     result_str = df.to_string(index=False)
220 | 
221 |     print(result_str)
222 | 
223 |     # save to txt
224 |     save_txt_file = pred_result_file.parent / f"{pred_result_file.stem}_pck_mpjpe.txt"
225 |     save_txt_file.write_text(result_str)
226 |     tqdm.write(f"  * Results saved to {save_txt_file}")
227 | 
228 | 
229 | if __name__ == "__main__":
230 |     gt_file = "config/benchmarks/hpe_gt.json"
231 |     pred_file = "results/hpe_demo.json"
232 | 
233 |     tqdm.write("- Evaluating Hand Pose Estimation results...")
234 |     get_hand_pose_evaluation(gt_file, pred_file)
235 |     tqdm.write("- Evaluation Done...")
236 | 


--------------------------------------------------------------------------------
/examples/evaluate_object_detection.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pycocotools.coco import COCO
 3 | from pycocotools.cocoeval import COCOeval
 4 | from hocap_toolkit.utils import *
 5 | 
 6 | 
 7 | def evaluate_object_detection_results(gt_file, pred_file):
 8 |     coco_gt = COCO(str(gt_file))
 9 |     coco_dt = coco_gt.loadRes(str(pred_file))
10 | 
11 |     coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
12 |     coco_eval.evaluate()
13 |     coco_eval.accumulate()
14 |     coco_eval.summarize()
15 | 
16 |     ap_metrics = {
17 |         "AP": coco_eval.stats[0],
18 |         "AP50": coco_eval.stats[1],
19 |         "AP75": coco_eval.stats[2],
20 |         "APs": coco_eval.stats[3],
21 |         "APm": coco_eval.stats[4],
22 |         "APl": coco_eval.stats[5],
23 |         "AR1": coco_eval.stats[6],
24 |         "AR10": coco_eval.stats[7],
25 |         "AR100": coco_eval.stats[8],
26 |         "ARs": coco_eval.stats[9],
27 |         "ARm": coco_eval.stats[10],
28 |         "ARl": coco_eval.stats[11],
29 |     }
30 |     print(
31 |         f"AP: {ap_metrics['AP']:.3f} | AP_50: {ap_metrics['AP50']:.3f} | AP_75: {ap_metrics['AP75']:.3f} | AP_s: {ap_metrics['APs']:.3f} | AP_m: {ap_metrics['APm']:.3f} | AP_l: {ap_metrics['APl']:.3f}"
32 |     )
33 | 
34 |     # Save to csv
35 |     df = pd.DataFrame([ap_metrics])
36 |     save_csv_file = Path(pred_file).parent / f"{Path(pred_file).stem}_ap.csv"
37 |     df.to_csv(save_csv_file, index=False)
38 | 
39 |     # Save to txt
40 |     str_metrics = [
41 |         f" Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = {coco_eval.stats[0]*100:.2f}",
42 |         f" Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = {coco_eval.stats[1]*100:.2f}",
43 |         f" Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = {coco_eval.stats[2]*100:.2f}",
44 |         f" Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = {coco_eval.stats[3]*100:.2f}",
45 |         f" Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = {coco_eval.stats[4]*100:.2f}",
46 |         f" Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = {coco_eval.stats[5]*100:.2f}",
47 |         f" Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = {coco_eval.stats[6]*100:.2f}",
48 |         f" Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = {coco_eval.stats[7]*100:.2f}",
49 |         f" Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = {coco_eval.stats[8]*100:.2f}",
50 |         f" Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = {coco_eval.stats[9]*100:.2f}",
51 |         f" Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = {coco_eval.stats[10]*100:.2f}",
52 |         f" Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = {coco_eval.stats[11]*100:.2f}",
53 |     ]
54 |     str_metrics = "\n".join(str_metrics)
55 |     save_txt_file = Path(pred_file).parent / f"{Path(pred_file).stem}_ap.txt"
56 |     save_txt_file.write_text(str_metrics)
57 |     print(f"AP metrics saved to '{save_csv_file}' and '{save_txt_file}'")
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     gt_file = "config/benchmarks/odet_gt.json"
62 |     pred_file = "results/odet_demo.json"
63 | 
64 |     tqdm.write("- Evaluating Object Detection results...")
65 |     evaluate_object_detection_results(gt_file, pred_file)
66 |     tqdm.write("- Evaluation Done...")
67 | 


--------------------------------------------------------------------------------
/examples/evaluate_object_pose.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from scipy.spatial import cKDTree
  3 | from hocap_toolkit.utils import *
  4 | 
  5 | PROJ_ROOT = Path(__file__).parent.parent
  6 | 
  7 | 
  8 | def to_homo(pts):
  9 |     """
 10 |     @pts: (N,3 or 2) will homogeneliaze the last dimension
 11 |     """
 12 |     assert len(pts.shape) == 2, f"pts.shape: {pts.shape}"
 13 |     homo = np.concatenate((pts, np.ones((pts.shape[0], 1))), axis=-1)
 14 |     return homo
 15 | 
 16 | 
 17 | def add_err(pred, gt, model_pts):
 18 |     """
 19 |     Average Distance of Model Points for objects with no indistinguishable views
 20 |     - by Hinterstoisser et al. (ACCV 2012).
 21 |     """
 22 |     pred_pts = (pred @ to_homo(model_pts).T).T[:, :3]
 23 |     gt_pts = (gt @ to_homo(model_pts).T).T[:, :3]
 24 |     e = np.linalg.norm(pred_pts - gt_pts, axis=1).mean()
 25 |     return e
 26 | 
 27 | 
 28 | def adi_err(pred, gt, model_pts):
 29 |     """
 30 |     @pred: 4x4 mat
 31 |     @gt:
 32 |     @model: (N,3)
 33 |     """
 34 |     pred_pts = (pred @ to_homo(model_pts).T).T[:, :3]
 35 |     gt_pts = (gt @ to_homo(model_pts).T).T[:, :3]
 36 |     nn_index = cKDTree(pred_pts)
 37 |     nn_dists, _ = nn_index.query(gt_pts, k=1, workers=-1)
 38 |     e = nn_dists.mean()
 39 |     return e
 40 | 
 41 | 
 42 | def compute_auc(rec, max_val=0.1):
 43 |     """
 44 |     Compute the Area Under Curve (AUC) for precision-recall curve up to a maximum recall value.
 45 | 
 46 |     This function calculates the AUC considering only the part of the precision-recall curve
 47 |     where the recall value is less than `max_val`. This is useful for scenarios where recall beyond
 48 |     a certain threshold is not relevant.
 49 | 
 50 |     Parameters:
 51 |     - rec (list or np.array): The recall values for different thresholds.
 52 |     - max_val (float): The maximum recall value to consider for AUC calculation.
 53 | 
 54 |     Returns:
 55 |     - float: The computed AUC value.
 56 | 
 57 |     Reference:
 58 |     - https://github.com/wenbowen123/iros20-6d-pose-tracking/blob/main/eval_ycb.py
 59 |     """
 60 |     if len(rec) == 0:
 61 |         return 0
 62 | 
 63 |     rec = np.sort(np.array(rec))
 64 |     n = len(rec)
 65 | 
 66 |     # Compute precision values based on the recall array
 67 |     prec = np.arange(1, n + 1) / n
 68 | 
 69 |     # Filter recall and precision arrays to include only recall values less than `max_val`
 70 |     valid_indices = np.where(rec < max_val)[0]
 71 |     rec = rec[valid_indices]
 72 |     prec = prec[valid_indices]
 73 | 
 74 |     # Prepare modified recall and precision arrays for AUC calculation
 75 |     mrec = np.concatenate(([0], rec, [max_val]))
 76 |     mpre = np.concatenate(([0], prec, [prec[-1] if len(prec) > 0 else 0]))
 77 | 
 78 |     # Ensure precision is non-decreasing
 79 |     for i in range(1, len(mpre)):
 80 |         mpre[i] = max(mpre[i], mpre[i - 1])
 81 | 
 82 |     # Calculate the differences in recall
 83 |     i = np.where(mrec[1:] != mrec[:-1])[0] + 1
 84 |     ap = np.sum((mrec[i] - mrec[i - 1]) * mpre[i])
 85 | 
 86 |     return ap / max_val
 87 | 
 88 | 
 89 | def get_object_pose_evaluation(gt_file, pred_file):
 90 |     gt_results_file = Path(gt_file)
 91 |     pred_results_file = Path(pred_file)
 92 | 
 93 |     gt_poses = read_data_from_json(gt_results_file)
 94 |     pred_poses = read_data_from_json(pred_results_file)
 95 |     object_ids = sorted(pred_poses.keys())
 96 | 
 97 |     pd_data = {
 98 |         "Object_ID": [],
 99 |         "ADD-S_err (cm)": [],
100 |         "ADD_err (cm)": [],
101 |         "ADD-S_AUC (%)": [],
102 |         "ADD_AUC (%)": [],
103 |     }
104 |     adi_errs = []
105 |     add_errs = []
106 | 
107 |     for object_id in tqdm(object_ids, total=len(object_ids), ncols=60):
108 |         if object_id not in gt_poses:
109 |             continue
110 | 
111 |         object_mesh = trimesh.load(
112 |             PROJ_ROOT / "datasets" / f"models/{object_id}/cleaned_mesh_10000.obj",
113 |             process=False,
114 |         )
115 |         vertices = object_mesh.vertices.astype(np.float32)
116 | 
117 |         adi_errs_obj = []
118 |         add_errs_obj = []
119 |         for key in sorted(pred_poses[object_id].keys()):
120 |             if key not in gt_poses[object_id]:
121 |                 continue
122 | 
123 |             gt_ob_in_cam = np.array(gt_poses[object_id][key], dtype=np.float32)
124 |             pred_ob_in_cam = np.array(pred_poses[object_id][key], dtype=np.float32)
125 | 
126 |             adi = adi_err(pred_ob_in_cam, gt_ob_in_cam, vertices.copy())
127 |             add = add_err(pred_ob_in_cam, gt_ob_in_cam, vertices.copy())
128 | 
129 |             adi_errs_obj.append(adi)
130 |             add_errs_obj.append(add)
131 | 
132 |             adi_errs.append(adi)
133 |             add_errs.append(add)
134 | 
135 |         ADDS_ERR = np.mean(adi_errs_obj) * 100
136 |         ADD_ERR = np.mean(add_errs_obj) * 100
137 |         ADDS_AUC = compute_auc(adi_errs_obj, max_val=0.1) * 100
138 |         ADD_AUC = compute_auc(add_errs_obj, max_val=0.1) * 100
139 | 
140 |         pd_data["Object_ID"].append(object_id)
141 |         pd_data["ADD-S_err (cm)"].append(ADDS_ERR)
142 |         pd_data["ADD_err (cm)"].append(ADD_ERR)
143 |         pd_data["ADD-S_AUC (%)"].append(ADDS_AUC)
144 |         pd_data["ADD_AUC (%)"].append(ADD_AUC)
145 | 
146 |     # Average
147 |     ADDS_ERR = np.mean(adi_errs) * 100
148 |     ADD_ERR = np.mean(add_errs) * 100
149 |     ADDS_AUC = compute_auc(adi_errs, max_val=0.1) * 100
150 |     ADD_AUC = compute_auc(add_errs, max_val=0.1) * 100
151 |     pd_data["Object_ID"].append("Average")
152 |     pd_data["ADD-S_err (cm)"].append(ADDS_ERR)
153 |     pd_data["ADD_err (cm)"].append(ADD_ERR)
154 |     pd_data["ADD-S_AUC (%)"].append(ADDS_AUC)
155 |     pd_data["ADD_AUC (%)"].append(ADD_AUC)
156 | 
157 |     df = pd.DataFrame(pd_data)
158 | 
159 |     # Save to csv
160 |     save_csv_file = pred_results_file.parent / f"{pred_results_file.stem}_add_adds.csv"
161 |     df.to_csv(save_csv_file, index=False)
162 | 
163 |     # Save to txt
164 |     iStr = "{:>15} {:>15} {:>15} {:>15} {:>15}"
165 |     result_str = [
166 |         iStr.format(
167 |             "Object_ID",
168 |             "ADD-S_err (cm)",
169 |             "ADD_err (cm)",
170 |             "ADD-S_AUC (%)",
171 |             "ADD_AUC (%)",
172 |         ),
173 |         iStr.format(
174 |             "|" + "-" * 14,
175 |             "|" + "-" * 14,
176 |             "|" + "-" * 14,
177 |             "|" + "-" * 14,
178 |             "|" + "-" * 14 + " |",
179 |         ),
180 |     ]
181 |     for i in range(len(pd_data["Object_ID"])):
182 |         result_str.append(
183 |             iStr.format(
184 |                 pd_data["Object_ID"][i],
185 |                 f"{pd_data['ADD-S_err (cm)'][i]:.2f}",
186 |                 f"{pd_data['ADD_err (cm)'][i]:.2f}",
187 |                 f"{pd_data['ADD-S_AUC (%)'][i]:.2f}",
188 |                 f"{pd_data['ADD_AUC (%)'][i]:.2f}",
189 |             )
190 |         )
191 |     result_str = "\n".join(result_str)
192 |     save_txt_file = pred_results_file.parent / f"{pred_results_file.stem}_add_adds.txt"
193 |     save_txt_file.write_text(result_str)
194 |     tqdm.write(f"  * Results saved to {save_csv_file}, {save_txt_file}")
195 | 
196 |     print(result_str)
197 | 
198 | 
199 | if __name__ == "__main__":
200 |     gt_file = "config/benchmarks/ope_gt.json"
201 |     pred_file = "results/ope_demo.json"
202 | 
203 |     tqdm.write(f"- Evaluating Object Pose Estimation results...")
204 |     get_object_pose_evaluation(gt_file, pred_file)
205 | 
206 |     tqdm.write("- Evaluation Done...")
207 | 


--------------------------------------------------------------------------------
/examples/image_label_viewer.py:
--------------------------------------------------------------------------------
 1 | """Example of visualizing hand and object poses of one frame in a sequence."""
 2 | 
 3 | import os
 4 | 
 5 | os.environ["PYOPENGL_PLATFORM"] = "egl"  # GPU-based offscreen rendering
 6 | 
 7 | from hocap_toolkit.utils import *
 8 | from hocap_toolkit.loaders import SequenceLoader
 9 | from hocap_toolkit.renderers import OffscreenRenderer
10 | 
11 | PROJ_ROOT = Path(__file__).parent.parent
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     sequence_folder = PROJ_ROOT / "datasets/HOCap/subject_2/20231022_201449"
16 | 
17 |     data_loader = SequenceLoader(str(sequence_folder), device="cuda")
18 |     rs_serials = data_loader.rs_serials
19 |     rs_height = data_loader.rs_height
20 |     rs_width = data_loader.rs_width
21 |     num_frames = data_loader.num_frames
22 |     mano_sides = data_loader.mano_sides
23 |     obj_meshes = [trimesh.load(p) for p in data_loader.object_textured_mesh_files]
24 | 
25 |     # Initialize renderer
26 |     renderer = OffscreenRenderer(rs_width, rs_height)
27 | 
28 |     for frame_id in range(num_frames):
29 |         for serial in rs_serials:
30 |             image_color = data_loader.get_rgb_image(frame_id, serial)
31 |             image_label = data_loader.get_image_label(frame_id, serial)
32 | 
33 |             if image_label:
34 |                 cam_K = image_label["cam_K"]
35 |                 obj_poses = image_label["obj_poses"]
36 |                 hand_joints_3d = image_label["hand_joints_3d"]
37 |                 hand_joints_2d = image_label["hand_joints_2d"]
38 |                 segmentation_mask = image_label["seg_mask"]
39 |                 obj_class_inds = image_label["obj_class_inds"].astype(int)
40 |                 obj_class_names = image_label["obj_class_names"].astype(str)
41 | 
42 |                 # Render object poses
43 |                 render_color, render_depth = renderer.get_render_image(
44 |                     obj_meshes, obj_poses, cam_K
45 |                 )
46 |                 image_pose = draw_image_overlay(image_color, render_color)
47 | 
48 |                 # Draw hand joints
49 |                 image_handmarks = image_color.copy()
50 |                 for idx, marks in enumerate(hand_joints_2d):
51 |                     side = mano_sides[idx]
52 |                     image_handmarks = draw_hand_landmarks(image_handmarks, marks, side)
53 | 
54 |                 # Draw segmentation visualization
55 |                 image_seg = np.zeros_like(image_color)
56 |                 for idx in np.unique(segmentation_mask):
57 |                     if idx == 0:  # skip background
58 |                         continue
59 |                     image_seg[segmentation_mask == idx] = HO_CAP_SEG_COLOR[idx].rgb
60 |                 image_seg = draw_image_overlay(image_color, image_seg)
61 | 
62 |                 labels_vis = draw_image_grid(
63 |                     [image_pose, image_handmarks, image_seg],
64 |                     ["ObjectPose", "Handmarks", "Segmentation"],
65 |                 )
66 | 
67 |                 # Display visualization
68 |                 plt.imshow(labels_vis)
69 |                 plt.title(f"{serial} - frame_{frame_id:06d}")
70 |                 plt.axis("off")
71 |                 plt.tight_layout()
72 |                 plt.show()
73 |                 plt.close()
74 | 
75 |                 exit()
76 | 


--------------------------------------------------------------------------------
/examples/sequence_3d_viewer.py:
--------------------------------------------------------------------------------
  1 | import open3d as o3d
  2 | import open3d.core as o3c
  3 | import open3d.visualization.gui as gui
  4 | import open3d.visualization.rendering as rendering
  5 | 
  6 | from time import sleep
  7 | from torch.utils import dlpack
  8 | from hocap_toolkit.utils import *
  9 | from hocap_toolkit.loaders import SequenceLoader
 10 | from hocap_toolkit.layers import MANOGroupLayer
 11 | 
 12 | PROJ_ROOT = Path(__file__).parents[1]
 13 | 
 14 | HELP_INFO = """
 15 | =============================
 16 | Keyboard commands:
 17 | =============================
 18 | H: display control panel
 19 | SPACE: pause
 20 | Q: quit
 21 | R: reset camera
 22 | =============================
 23 | """
 24 | 
 25 | 
 26 | class SequenceViewer:
 27 |     def __init__(self, sequence_folder, device="cuda") -> None:
 28 |         self._data_folder = Path(sequence_folder)
 29 |         self._device = device
 30 |         self._logger = get_logger(self.__class__.__name__)
 31 | 
 32 |         self._loader = SequenceLoader(sequence_folder, device=device)
 33 |         self._num_frames = self._loader.num_frames
 34 |         self._rs_serials = self._loader.rs_serials
 35 |         self._rs_master = self._loader.rs_master
 36 |         self._master_id = self._rs_serials.index(self._rs_master)
 37 |         self._num_cameras = len(self._rs_serials)
 38 |         self._rs_height = self._loader.rs_height
 39 |         self._rs_width = self._loader.rs_width
 40 |         self._rs_Ks = self._loader.rs_Ks.cpu().numpy()
 41 |         self._rs_RTs = self._loader.rs_RTs.cpu().numpy()
 42 |         self._mano_sides = self._loader.mano_sides
 43 | 
 44 |         self._mano_group_layer = self._init_mano_group_layer()
 45 |         self._mano_verts = self._get_mano_verts()
 46 |         self._mano_faces = self._get_mano_faces()
 47 |         self._mano_colors = self._get_mano_colors()
 48 | 
 49 |         self._poses_o = self._load_poses_o()
 50 | 
 51 |     def run(self):
 52 |         self._is_done = False
 53 |         self._frame_id = -1
 54 | 
 55 |         # rendering settings
 56 |         self._bg_color = (0.0, 0.0, 0.0, 1.0)  # black
 57 |         self._point_size = 1
 58 |         self._update_flag = (
 59 |             rendering.Scene.UPDATE_POINTS_FLAG | rendering.Scene.UPDATE_COLORS_FLAG
 60 |         )  # update points and colors
 61 | 
 62 |         # control flags
 63 |         self._cropped = False  # crop points
 64 |         self._is_paused = False  # pause
 65 |         self._show_skybox = False  # show skybox background
 66 |         self._show_axes = False  # show axes frame
 67 |         self._show_pcds = True  # show point clouds
 68 |         self._show_mano = False  # show mano mesh
 69 |         self._show_object = False  # show object mesh
 70 |         self._cam_id = self._rs_serials.index(self._rs_master)  # camera view
 71 | 
 72 |         # materials
 73 |         self._mat_pcd = rendering.MaterialRecord()
 74 |         self._mat_pcd.shader = "defaultUnlit"
 75 |         self._mat_pcd.point_size = self._point_size
 76 |         self._mat_mesh = rendering.MaterialRecord()
 77 |         self._mat_mesh.shader = "defaultUnlit"
 78 |         self._mat_line = rendering.MaterialRecord()
 79 |         self._mat_line.shader = "unlitLine"
 80 | 
 81 |         # dummy geometry
 82 |         zeros = o3c.Tensor.zeros(
 83 |             (self._rs_width * self._rs_height * self._num_cameras, 3), dtype=o3c.float32
 84 |         )
 85 |         self._pcd = o3d.t.geometry.PointCloud()
 86 |         self._pcd.point.positions = zeros
 87 |         self._pcd.point.colors = zeros
 88 |         self._pcd.point.normals = zeros
 89 | 
 90 |         mano_mesh = o3d.geometry.TriangleMesh()
 91 |         mano_mesh.vertices = o3d.utility.Vector3dVector(self._mano_verts[0].numpy())
 92 |         mano_mesh.triangles = o3d.utility.Vector3iVector(self._mano_faces)
 93 |         mano_mesh.vertex_colors = o3d.utility.Vector3dVector(self._mano_colors)
 94 |         mano_mesh.compute_vertex_normals()
 95 |         mano_ls = o3d.geometry.LineSet.create_from_triangle_mesh(mano_mesh)
 96 |         mano_ls.paint_uniform_color((0.0, 0.0, 0.0))  # black
 97 |         self._mano_mesh = o3d.t.geometry.TriangleMesh.from_legacy(mano_mesh)
 98 |         self._mano_ls = o3d.t.geometry.LineSet.from_legacy(mano_ls)
 99 | 
100 |         # init gui
101 |         self._app = gui.Application.instance
102 |         self._app.initialize()
103 | 
104 |         # create window
105 |         self._window = self._create_window()
106 | 
107 |         # set callbacks
108 |         self._window.set_on_layout(self._on_layout)
109 |         self._window.set_on_key(self._on_key)
110 |         self._window.set_on_close(self._on_close)
111 | 
112 |         # add initial dummy geometry
113 |         self._widget3d.scene.add_geometry("pcd", self._pcd, self._mat_pcd)
114 |         self._widget3d.scene.show_geometry("pcd", self._show_pcds)
115 |         self._widget3d.scene.add_geometry("mano", self._mano_mesh, self._mat_mesh)
116 |         self._widget3d.scene.add_geometry("mano_ls", self._mano_ls, self._mat_line)
117 |         self._widget3d.scene.show_geometry("mano", self._show_mano)
118 |         self._widget3d.scene.show_geometry("mano_ls", self._show_mano)
119 |         for i, mesh_file in enumerate(self._loader.object_textured_mesh_files):
120 |             self._widget3d.scene.add_model(
121 |                 f"object_{i}", o3d.io.read_triangle_model(mesh_file)
122 |             )
123 |             self._widget3d.scene.show_geometry(f"object_{i}", self._show_object)
124 | 
125 |         # update camera
126 |         self._reset_camera()
127 | 
128 |         # run
129 |         self._app.run_in_thread(self.update)
130 |         self._app.run()
131 | 
132 |     def _create_window(self, title="Sequence Viewer", width=1280, height=720):
133 |         # create window
134 |         window = self._app.create_window(title, width, height)
135 | 
136 |         ## add widget3d
137 |         self._widget3d = gui.SceneWidget()
138 |         self._widget3d.scene = rendering.Open3DScene(window.renderer)
139 |         self._widget3d.scene.set_background(self._bg_color)
140 |         self._widget3d.scene.scene.enable_sun_light(False)
141 |         self._widget3d.scene.scene.enable_indirect_light(True)
142 |         point_light_postions = [
143 |             np.array([0.5, 0.5, 1.0]).astype(np.float32),
144 |             np.array([-0.5, 0.5, 1.0]).astype(np.float32),
145 |             np.array([-0.5, -0.5, 1.0]).astype(np.float32),
146 |             np.array([0.5, -0.5, 1.0]).astype(np.float32),
147 |             np.array([0.5, -0.5, 0.0]).astype(np.float32),
148 |             np.array([0.5, 0.5, 0.0]).astype(np.float32),
149 |             np.array([-0.5, 0.5, 0.0]).astype(np.float32),
150 |             np.array([-0.5, -0.5, 0.0]).astype(np.float32),
151 |         ]
152 |         for idx, pos in enumerate(point_light_postions):
153 |             self._widget3d.scene.scene.add_point_light(
154 |                 name=f"light_{idx}",
155 |                 color=np.array([1.0, 1.0, 1.0]).astype(np.float32),
156 |                 position=pos,
157 |                 intensity=1e6,
158 |                 falloff=1e2,
159 |                 cast_shadows=False,
160 |             )
161 | 
162 |         view = self._widget3d.scene.view
163 |         view.set_post_processing(False)
164 |         window.add_child(self._widget3d)
165 | 
166 |         ## add settings panel
167 |         em = window.theme.font_size
168 |         margin = 0.25 * em
169 |         self._panel = gui.Vert(margin, gui.Margins(margin, margin, margin, margin))
170 | 
171 |         ### render settings
172 |         settings = gui.CollapsableVert(
173 |             "Render Settings", margin, gui.Margins(margin, margin, margin, margin)
174 |         )
175 |         settings.set_is_open(True)
176 |         render_blk = gui.VGrid(2, margin)
177 |         self._bg_color_edit = gui.ColorEdit()
178 |         self._bg_color_edit.color_value = gui.Color(*self._bg_color)
179 |         self._bg_color_edit.set_on_value_changed(self._on_bg_color)
180 |         render_blk.add_child(gui.Label("Background Color"))
181 |         render_blk.add_child(self._bg_color_edit)
182 |         point_size = gui.Slider(gui.Slider.INT)
183 |         point_size.double_value = self._point_size
184 |         point_size.set_limits(1, 10)
185 |         point_size.set_on_value_changed(self._on_point_size)
186 |         render_blk.add_child(gui.Label("Point Size"))
187 |         render_blk.add_child(point_size)
188 |         chk_box = gui.Checkbox("Show Skybox")
189 |         chk_box.checked = self._show_skybox
190 |         chk_box.set_on_checked(self._on_skybox)
191 |         render_blk.add_child(chk_box)
192 |         chk_box = gui.Checkbox("Show Axes")
193 |         chk_box.checked = self._show_axes
194 |         chk_box.set_on_checked(self._on_axes)
195 |         render_blk.add_child(chk_box)
196 |         crop_box = gui.Checkbox("Crop Points")
197 |         crop_box.checked = self._cropped
198 |         crop_box.set_on_checked(self._on_crop)
199 |         render_blk.add_child(crop_box)
200 |         settings.add_child(render_blk)
201 |         self._panel.add_child(settings)
202 |         ### geometry settings
203 |         settings = gui.CollapsableVert(
204 |             "Geometry Settings", margin, gui.Margins(margin, margin, margin, margin)
205 |         )
206 |         settings.set_is_open(True)
207 |         geo_blk = gui.Vert(margin, gui.Margins(margin, margin, margin, margin))
208 |         chk_box = gui.Checkbox("Point Clouds")
209 |         chk_box.checked = self._show_pcds
210 |         chk_box.set_on_checked(self._on_pcds)
211 |         geo_blk.add_child(chk_box)
212 |         chk_box = gui.Checkbox("Hand Mesh")
213 |         chk_box.enabled = True
214 |         chk_box.checked = self._show_mano
215 |         chk_box.set_on_checked(self._on_mano)
216 |         geo_blk.add_child(chk_box)
217 |         chk_box = gui.Checkbox("Object Mesh")
218 |         chk_box.enabled = True
219 |         chk_box.checked = self._show_object
220 |         chk_box.set_on_checked(self._on_object)
221 |         geo_blk.add_child(chk_box)
222 |         settings.add_child(geo_blk)
223 |         self._panel.add_child(settings)
224 |         ### progress bar
225 |         bar = gui.VGrid(3, margin)
226 |         self._slider = gui.Slider(gui.Slider.INT)
227 |         self._slider.set_limits(0, self._num_frames - 1)
228 |         self._slider.set_on_value_changed(self._on_progress_slider)
229 |         self._num_edit = gui.NumberEdit(gui.NumberEdit.INT)
230 |         self._num_edit.set_limits(0, self._num_frames - 1)
231 |         self._num_edit.set_on_value_changed(self._on_progress_slider)
232 |         bar.add_child(gui.Label("Frame Slider"))
233 |         bar.add_child(self._slider)
234 |         bar.add_child(self._num_edit)
235 |         self._panel.add_child(bar)
236 |         ### reset button
237 |         btns = gui.Horiz(margin, gui.Margins(margin, margin, margin, margin))
238 |         botton1 = gui.Button("Reset")
239 |         botton1.set_on_clicked(self._on_reset)
240 |         botton2 = gui.Button("Pause/Play")
241 |         botton2.set_on_clicked(self._on_pause)
242 |         botton3 = gui.Button("Exit")
243 |         botton3.set_on_clicked(self._on_exit)
244 |         btns.add_stretch()
245 |         btns.add_child(botton1)
246 |         btns.add_child(botton2)
247 |         btns.add_child(botton3)
248 |         btns.add_stretch()
249 |         self._panel.add_child(btns)
250 | 
251 |         self._panel.add_stretch()
252 |         ####################
253 |         # add tab control
254 |         self._tabs = gui.TabControl()
255 |         help_tab = gui.Vert(margin, gui.Margins(margin, margin, margin, margin))
256 |         help_info = gui.VGrid(2, margin)
257 |         help_info.add_child(gui.Label(HELP_INFO))
258 |         help_tab.add_child(help_info)
259 |         self._tabs.add_tab("Settings", self._panel)
260 |         self._tabs.add_tab("Help", help_tab)
261 | 
262 |         # add tabs
263 |         window.add_child(self._tabs)
264 | 
265 |         return window
266 | 
267 |     def _on_layout(self, ctx):
268 |         r = self._window.content_rect
269 |         panel_size = self._tabs.calc_preferred_size(ctx, gui.Widget.Constraints())
270 |         if (r.width < self._rs_width + panel_size.width) or r.height < self._rs_height:
271 |             self._window.size = gui.Size(
272 |                 self._rs_width + panel_size.width, self._rs_height
273 |             )
274 |         self._width = r.width - panel_size.width
275 |         self._height = r.height
276 |         self._widget3d.frame = gui.Rect(0, 0, self._width, self._height)
277 |         self._tabs.frame = gui.Rect(
278 |             self._widget3d.frame.get_right(), 0, panel_size.width, self._height
279 |         )
280 |         self._update_camera_K()
281 | 
282 |     def _on_close(self):
283 |         self._is_done = True
284 |         sleep(0.10)
285 |         return True
286 | 
287 |     def _on_key(self, event):
288 |         if event.key == gui.KeyName.Q:  # quit
289 |             if event.type == gui.KeyEvent.DOWN:
290 |                 self._window.close()
291 |                 return True
292 | 
293 |         if event.key == gui.KeyName.SPACE:  # pause
294 |             if event.type == gui.KeyEvent.DOWN:
295 |                 self._on_pause()
296 |                 return True
297 | 
298 |         if event.key == gui.KeyName.R:  # reset camera
299 |             if event.type == gui.KeyEvent.DOWN:
300 |                 self._reset_camera()
301 |                 return True
302 | 
303 |         return False
304 | 
305 |     def _on_exit(self):
306 |         self._window.close()
307 |         self._app.quit()
308 | 
309 |     def _on_pause(self):
310 |         self._is_paused = not self._is_paused
311 | 
312 |     def _on_reset(self):
313 |         self._cam_id = self._rs_serials.index(self._rs_master)
314 |         self._reset_camera()
315 |         self._frame_id = -1
316 |         self._slider.int_value = 0
317 |         self._num_edit.int_value = 0
318 | 
319 |     def _on_progress_slider(self, value):
320 |         value = int(value) % self._num_frames
321 |         self._frame_id = value
322 |         self._num_edit.int_value = value
323 | 
324 |     def _on_bg_color(self, color):
325 |         self._bg_color_edit.color_value = color
326 |         self._widget3d.scene.set_background(
327 |             [color.red, color.green, color.blue, color.alpha]
328 |         )
329 | 
330 |     def _on_skybox(self, checked):
331 |         self._widget3d.scene.show_skybox(checked)
332 | 
333 |     def _on_axes(self, checked):
334 |         self._widget3d.scene.show_axes(checked)
335 | 
336 |     def _on_crop(self, checked):
337 |         self._cropped = checked
338 | 
339 |     def _on_pcds(self, checked):
340 |         self._show_pcds = checked
341 |         self._widget3d.scene.show_geometry("pcd", checked)
342 | 
343 |     def _on_mano(self, checked):
344 |         self._show_mano = checked
345 |         self._widget3d.scene.show_geometry("mano", checked)
346 |         self._widget3d.scene.show_geometry("mano_ls", checked)
347 | 
348 |     def _on_object(self, checked):
349 |         self._show_object = checked
350 |         for i in range(len(self._loader.object_textured_mesh_files)):
351 |             self._widget3d.scene.show_geometry(f"object_{i}", checked)
352 | 
353 |     def _on_point_size(self, value):
354 |         self._mat_pcd.point_size = int(value)
355 |         self._widget3d.scene.modify_geometry_material("pcd", self._mat_pcd)
356 | 
357 |     def _reset_camera(self):
358 |         self._widget3d.scene.camera.look_at([0, 0, 0], [0, 0, 0.8], [0, -1, 0])
359 | 
360 |     def _update_camera_K(self):
361 |         def create_K_matrix(image_width, image_height, fov_degrees):
362 |             # The principal point is at the center of the image.
363 |             cx = image_width / 2.0
364 |             cy = image_height / 2.0
365 |             # Compute the focal length from the field of view.
366 |             fov_rad = np.deg2rad(fov_degrees)
367 |             fx = fy = cx / np.tan(fov_rad / 2)
368 |             # Create the intrinsic matrix.
369 |             K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
370 |             return K
371 | 
372 |         K = create_K_matrix(self._width, self._height, 90)
373 |         self._widget3d.scene.camera.set_projection(
374 |             K, 0.001, 1000.0, self._width, self._height
375 |         )
376 | 
377 |     def _update_camera_pose(self):
378 |         def extrinsics_to_look_at(pose):
379 |             R = pose[:3, :3]
380 |             T = pose[:3, 3]
381 |             # The camera's position (eye) is the negative rotation by R of T.
382 |             eye = -np.matmul(R.T, T)
383 |             # The center point is one unit down the z-axis in the camera's space,
384 |             # then transformed to world space by the pose matrix.
385 |             center = np.matmul(R.T, np.array([0, 0, 1])) + eye
386 |             # The up vector is the y-axis in the camera's space, then transformed to world space by the rotation matrix.
387 |             # This assumes that the y-axis is the down-direction in the camera's local space.
388 |             up = np.matmul(R.T, np.array([0, -1, 0]))
389 |             return center, eye, up
390 | 
391 |         extrinsics = self._rs_RTs[self._cam_id]
392 |         center, eye, up = extrinsics_to_look_at(extrinsics)
393 |         # self._widget3d.scene.camera.look_at(center, eye, up)
394 |         self._widget3d.look_at(center, eye, up)
395 | 
396 |     def _init_mano_group_layer(self):
397 |         betas = [self._loader.mano_beta.cpu().numpy() for _ in self._mano_sides]
398 |         return MANOGroupLayer(self._mano_sides, betas).to(self._device)
399 | 
400 |     def _load_poses_m(self):
401 |         poses = np.load(self._data_folder / "poses_m.npy").astype(np.float32)
402 |         poses = np.concatenate(
403 |             [poses[0 if side == "right" else 1] for side in self._mano_sides], axis=1
404 |         )
405 |         poses = torch.from_numpy(poses).to(self._device)
406 |         return poses
407 | 
408 |     def _load_poses_o(self):
409 |         poses = np.load(self._data_folder / "poses_o.npy").astype(np.float32)
410 |         poses = np.stack(
411 |             [quat_to_mat(p) for p in poses], axis=1
412 |         )  # (num_frames, num_objects, 4, 4)
413 |         print(f"poses_o: {poses.shape}")
414 |         return poses
415 | 
416 |     def _get_mano_verts(self):
417 |         pose_file = self._data_folder / "poses_m.npy"
418 |         poses = np.load(pose_file).astype(np.float32)
419 |         poses = np.concatenate(
420 |             [poses[0 if side == "right" else 1] for side in self._mano_sides], axis=1
421 |         )
422 |         poses = torch.from_numpy(poses).to(self._device)
423 |         verts, _ = self._mano_group_layer(poses)
424 |         return verts.cpu()
425 | 
426 |     def _get_mano_faces(self):
427 |         faces = [self._mano_group_layer.f.cpu().numpy()]
428 |         for i, side in enumerate(self._mano_sides):
429 |             faces.append(np.array(NEW_MANO_FACES[side]) + i * NUM_MANO_VERTS)
430 |         faces = np.concatenate(faces, axis=0).astype(np.int64)
431 |         return faces
432 | 
433 |     def _get_mano_colors(self):
434 |         colors = np.stack(
435 |             [
436 |                 [HAND_COLORS[1 if side == "right" else 2].rgb_norm] * NUM_MANO_VERTS
437 |                 for side in self._mano_sides
438 |             ]
439 |         ).reshape(-1, 3)
440 |         return colors
441 | 
442 |     def step(self):
443 |         if not self._is_paused:
444 |             self._frame_id = (self._frame_id + 1) % self._num_frames
445 |             self._slider.int_value = self._frame_id
446 |             self._num_edit.int_value = self._frame_id
447 |         self._loader.step_by_frame_id(self._frame_id)
448 | 
449 |     def update(self):
450 |         def update():
451 |             if self._show_pcds:
452 |                 points = self._loader.points
453 |                 colors = self._loader.colors
454 |                 masks = self._loader.masks
455 |                 if self._cropped:
456 |                     points[~masks] = 0.0
457 |                     colors[~masks] = 0.0
458 |                 self._pcd.point.positions = o3c.Tensor.from_dlpack(
459 |                     dlpack.to_dlpack(points.cpu().view((-1, 3)))
460 |                 )
461 |                 self._pcd.point.colors = o3c.Tensor.from_dlpack(
462 |                     dlpack.to_dlpack(colors.cpu().view((-1, 3)))
463 |                 )
464 |                 self._widget3d.scene.scene.update_geometry(
465 |                     "pcd", self._pcd, self._update_flag
466 |                 )
467 | 
468 |             if self._show_mano:
469 |                 self._mano_mesh.vertex.positions = self._mano_ls.point.positions = (
470 |                     o3c.Tensor.from_dlpack(
471 |                         dlpack.to_dlpack(self._mano_verts[self._frame_id])
472 |                     )
473 |                 )
474 |                 self._widget3d.scene.remove_geometry("mano")
475 |                 self._widget3d.scene.add_geometry(
476 |                     "mano", self._mano_mesh, self._mat_mesh
477 |                 )
478 |                 self._widget3d.scene.remove_geometry("mano_ls")
479 |                 self._widget3d.scene.add_geometry(
480 |                     "mano_ls", self._mano_ls, self._mat_line
481 |                 )
482 | 
483 |             if self._show_object:
484 |                 for i, pose in enumerate(self._poses_o[self._frame_id]):
485 |                     self._widget3d.scene.set_geometry_transform(f"object_{i}", pose)
486 | 
487 |         while not self._is_done:
488 |             sleep(0.067)
489 |             if not self._is_done:
490 |                 self.step()
491 |                 self._app.post_to_main_thread(self._window, update)
492 | 
493 | 
494 | if __name__ == "__main__":
495 |     sequence_folder = "datasets/subject_1/20231025_165502"
496 |     device = "cuda" if torch.cuda.is_available() else "cpu"
497 | 
498 |     viewer = SequenceViewer(sequence_folder, device=device)
499 |     viewer.run()
500 | 


--------------------------------------------------------------------------------
/examples/sequence_pose_viewer.py:
--------------------------------------------------------------------------------
 1 | """Example of visualizing hand and object poses of one frame in a sequence."""
 2 | 
 3 | import os
 4 | 
 5 | os.environ["PYOPENGL_PLATFORM"] = "egl"  # GPU-based offscreen rendering
 6 | 
 7 | from hocap_toolkit.utils import *
 8 | from hocap_toolkit.renderers import SequenceRenderer
 9 | 
10 | PROJ_ROOT = Path(__file__).parent.parent
11 | 
12 | if __name__ == "__main__":
13 |     sequence_folder = PROJ_ROOT / "datasets/subject_1/20231025_165502"
14 |     renderer = SequenceRenderer(sequence_folder, device="cuda")
15 | 
16 |     frame_id = 70
17 | 
18 |     # Render the scene and get the rendered images
19 |     renderer.create_scene(frame_id)
20 |     render_colors = renderer.get_render_colors()
21 | 
22 |     # Display the rendered images
23 |     overlays = [
24 |         cv2.addWeighted(
25 |             renderer.get_rgb_image(frame_id, serial), 0.4, render_color, 0.6, 0
26 |         )
27 |         for serial, render_color in render_colors.items()
28 |     ]
29 | 
30 |     draw_all_camera_images(overlays, list(render_colors.keys()), show_only=True)
31 | 


--------------------------------------------------------------------------------
/examples/sequence_renderer.py:
--------------------------------------------------------------------------------
 1 | """Example of rendering a sequence."""
 2 | 
 3 | import os
 4 | 
 5 | os.environ["PYOPENGL_PLATFORM"] = "egl"  # GPU-based offscreen rendering
 6 | 
 7 | from hocap_toolkit.utils import *
 8 | from hocap_toolkit.renderers import SequenceRenderer
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     sequence_folder = "datasets/subject_1/20231025_165502"
13 | 
14 |     renderer = SequenceRenderer(sequence_folder, device="cuda")
15 | 
16 |     for frame_id in tqdm(range(renderer.num_frames), desc="Rendering", ncols=80):
17 |         # Render the scene and get the rendered images
18 |         renderer.create_scene(frame_id)
19 |         render_colors = renderer.get_render_colors()
20 |         render_masks = renderer.get_render_masks()
21 |         overlays = {
22 |             serial: cv2.addWeighted(
23 |                 renderer.get_rgb_image(frame_id, serial), 0.4, render_color, 0.6, 0
24 |             )
25 |             for serial, render_color in render_colors.items()
26 |         }
27 | 
28 |         # Save the rendered images
29 |         for serial in render_colors:
30 |             save_folder = Path(sequence_folder) / "renders" / serial
31 |             save_folder.mkdir(parents=True, exist_ok=True)
32 |             write_rgb_image(save_folder / f"vis_{frame_id:06d}.png", overlays[serial])
33 |             write_rgb_image(
34 |                 save_folder / f"color_{frame_id:06d}.png", render_colors[serial]
35 |             )
36 |             write_mask_image(
37 |                 save_folder / f"seg_{frame_id:06d}.png", render_masks[serial]
38 |             )
39 | 


--------------------------------------------------------------------------------
/hocap_toolkit/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IRVLUTD/HO-Cap/723ae6f8f5291f074ae309f179eb8556f67ffd19/hocap_toolkit/benchmarks/__init__.py


--------------------------------------------------------------------------------
/hocap_toolkit/benchmarks/groundtruth_generator.py:
--------------------------------------------------------------------------------
 1 | from ..utils import *
 2 | from ..utils.common import *
 3 | from ..loaders import SequenceLoader
 4 | 
 5 | 
 6 | class BenchmarkGTGenerator:
 7 |     def __init__(self):
 8 |         self._data_root = PROJ_ROOT / "data"
 9 | 
10 |     def generate_hand_pose_gt(self):
11 |         keys_file = (
12 |             self._data_root / "data/benchmarks/hand_pose_benchmark_gt_demo_keys.json"
13 |         )
14 |         keys = read_data_from_json(keys_file)
15 | 


--------------------------------------------------------------------------------
/hocap_toolkit/factory/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_factory import HOCapFactory
2 | 


--------------------------------------------------------------------------------
/hocap_toolkit/factory/dataset_factory.py:
--------------------------------------------------------------------------------
  1 | import pycocotools.mask as mask_util
  2 | from hocap_toolkit.utils import *
  3 | 
  4 | PROJ_ROOT = Path(__file__).parents[2]
  5 | HOCAP_DATASET_ROOT = PROJ_ROOT / "datasets"
  6 | 
  7 | HOCAP_INFO = read_data_from_yaml(PROJ_ROOT / "config/hocap_info.yaml")
  8 | 
  9 | # The train/valid/test split is defined separately for each task (HPE, ODET, OPE)
 10 | #   - The split is defined as a list of items
 11 | #   - Each item is a list in the format [subject_index, sequence_index, camera_index, frame_index]
 12 | #   - For example, [0, 0, 0, 0] refers "subject_1/20231022_190534/105322251564" folder and frame "color_000000.jpg" & "depth_000000.png"
 13 | HPE_CONFIG = read_data_from_json(PROJ_ROOT / "config/hocap_hpe.json")
 14 | ODET_CONFIG = read_data_from_json(PROJ_ROOT / "config/hocap_odet.json")
 15 | OPE_CONFIG = read_data_from_json(PROJ_ROOT / "config/hocap_ope.json")
 16 | 
 17 | COCO_CATEGORIES = [
 18 |     {
 19 |         "id": i + 1,
 20 |         "name": obj_class,
 21 |         "supercategory": "object",
 22 |     }
 23 |     for i, obj_class in enumerate(HOCAP_INFO["object_classes"])
 24 |     if "HAND" not in obj_class
 25 | ]
 26 | 
 27 | YOLO_CLASSES = [
 28 |     obj_class for obj_class in HOCAP_INFO["object_classes"] if "HAND" not in obj_class
 29 | ]
 30 | 
 31 | 
 32 | class HOCapFactory:
 33 |     def __init__(self) -> None:
 34 |         self._logger = get_logger(__class__.__name__)
 35 | 
 36 |         self._calib_dir = HOCAP_DATASET_ROOT / "calibration"
 37 |         self._models_dir = HOCAP_DATASET_ROOT / "models"
 38 |         self._rs_width = 640
 39 |         self._rs_height = 480
 40 |         self._mano_betas = [
 41 |             self._read_mano_beta(sub_id) for sub_id in HOCAP_INFO["subject_ids"]
 42 |         ]
 43 |         self._rs_RTs = self._load_rs_cam_RTs()
 44 |         self._rs_RTs_inv = [np.linalg.inv(RT) for RT in self._rs_RTs]
 45 | 
 46 |     def _world_mano_pose_to_camera(self, mano_pose, cam_RT_inv):
 47 |         if np.all(mano_pose == -1):
 48 |             return mano_pose
 49 | 
 50 |         pose_c = mano_pose.copy()
 51 |         rvt_w = np.concatenate([pose_c[:3], pose_c[-3:]], axis=0)
 52 |         mat_w = rvt_to_mat(rvt_w)
 53 |         mat_c = cam_RT_inv @ mat_w
 54 |         rvt_c = mat_to_rvt(mat_c)
 55 |         pose_c[:3] = rvt_c[:3]
 56 |         pose_c[-3:] = rvt_c[-3:]
 57 |         return pose_c
 58 | 
 59 |     def _read_mano_beta(self, sub_id):
 60 |         file_path = self._calib_dir / "mano" / f"{sub_id}.yaml"
 61 |         mano_data = read_data_from_yaml(file_path)
 62 |         mano_betas = np.array(mano_data["betas"]).astype(np.float32)
 63 |         return mano_betas
 64 | 
 65 |     def _load_pose_m(self, sub_id, seq_id):
 66 |         file_path = HOCAP_DATASET_ROOT / sub_id / seq_id / "poses_m.npy"
 67 |         poses_m = np.load(file_path).astype(np.float32)
 68 |         return poses_m
 69 | 
 70 |     def _load_rs_cam_RTs(self):
 71 |         def create_mat(values):
 72 |             return np.array(
 73 |                 [values[0:4], values[4:8], values[8:12], [0, 0, 0, 1]], dtype=np.float32
 74 |             )
 75 | 
 76 |         file_path = self._calib_dir / f"extrinsics/extrinsics_20231014.yaml"
 77 |         extrinsics = read_data_from_yaml(file_path)["extrinsics"]
 78 |         tag_1 = create_mat(extrinsics["tag_1"])
 79 |         tag_1_inv = np.linalg.inv(tag_1)
 80 |         rs_RTs_master = [
 81 |             create_mat(extrinsics[serial])
 82 |             for serial in HOCAP_INFO["device_serials"][:-1]  # Exclude the hololens
 83 |         ]
 84 |         rs_RTs_world = [tag_1_inv @ RT for RT in rs_RTs_master]
 85 |         return rs_RTs_world
 86 | 
 87 |     def _get_obj_model_path(self, obj_id):
 88 |         mesh_file = self._models_dir / obj_id / "textured_mesh.obj"
 89 |         texture_file = self._models_dir / obj_id / "textured_mesh_0.png"
 90 |         material_file = self._models_dir / obj_id / "textured_mesh.mtl"
 91 |         return (mesh_file, texture_file, material_file)
 92 | 
 93 |     def _load_object_vertices(self):
 94 |         object_vertices = {}
 95 |         for obj_id in HOCAP_INFO["object_classes"]:
 96 |             if "HAND" in obj_id:
 97 |                 continue  # Exclude hands
 98 |             mesh_file, _, _ = self._get_obj_model_path(obj_id)
 99 |             mesh = trimesh.load(mesh_file)
100 |             object_vertices[obj_id] = mesh.vertices.astype(np.float32)
101 |         return object_vertices
102 | 
103 |     def _calculate_model_info(self, mesh):
104 |         # Diameter (approximate) as the max distance between any two vertices
105 |         diameter = mesh.bounding_sphere.primitive.radius * 2
106 |         # Bounding box dimensions
107 |         min_bounds, max_bounds = mesh.bounds
108 |         size = max_bounds - min_bounds
109 |         return {
110 |             "diameter": float(diameter),
111 |             "min_x": float(min_bounds[0]),
112 |             "min_y": float(min_bounds[1]),
113 |             "min_z": float(min_bounds[2]),
114 |             "size_x": float(size[0]),
115 |             "size_y": float(size[1]),
116 |             "size_z": float(size[2]),
117 |         }
118 | 
119 |     def _calculate_projected_bbox(self, cam_K, obj_pose, object_vertices):
120 |         """Calculate the 2D bounding box of the projected 3D object mesh."""
121 |         # Transform vertices to camera space
122 |         object_vertices_homogeneous = np.hstack(
123 |             (object_vertices, np.ones((object_vertices.shape[0], 1)))
124 |         )
125 |         vertices_cam = (obj_pose @ object_vertices_homogeneous.T).T[:, :3]
126 | 
127 |         # Project vertices into 2D
128 |         vertices_2d = (cam_K @ vertices_cam.T).T
129 |         vertices_2d = (
130 |             vertices_2d[:, :2] / vertices_2d[:, 2:3]
131 |         )  # Normalize by depth to get 2D coordinates
132 | 
133 |         # Get min/max x and y for the bounding box
134 |         x_min, y_min = np.min(vertices_2d, axis=0)
135 |         x_max, y_max = np.max(vertices_2d, axis=0)
136 | 
137 |         # Return bbox as [x_min, y_min, width, height]
138 |         bbox = [int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)]
139 |         return bbox
140 | 
141 |     def _binary_mask_to_rle(self, mask):
142 |         """
143 |         Convert binary mask to COCO RLE format using pycocotools.
144 |         """
145 |         # Ensure mask is uint8
146 |         binary_mask = mask.astype(np.uint8)
147 | 
148 |         rle = mask_util.encode(np.asfortranarray(binary_mask))
149 |         rle["counts"] = rle["counts"].decode("utf-8")  # Convert to string (COCO format)
150 |         return rle
151 | 
152 |     def create_odet_dataset(self, dataset_type):
153 |         if dataset_type not in ["coco", "yolo"]:
154 |             msg = f"Invalid dataset type: {dataset_type}, choose from 'coco' or 'yolo'"
155 |             self._logger.error(msg)
156 |             raise ValueError(msg)
157 | 
158 |         self._logger.info(
159 |             f">>>>>>>>>> Creating HOCap Object Detection Dataset ({dataset_type})..."
160 |         )
161 |         output_dir = HOCAP_DATASET_ROOT / f"hocap_odet_{dataset_type}"
162 |         make_clean_folder(output_dir)
163 | 
164 |         if dataset_type == "yolo":
165 |             yolo_classes = [
166 |                 obj_c for obj_c in HOCAP_INFO["object_classes"] if "HAND" not in obj_c
167 |             ]
168 |             dataset_info = {
169 |                 "train": "../train/images",
170 |                 "val": "../valid/images",
171 |                 "test": "../test/images",
172 |                 "nc": len(yolo_classes),
173 |                 "names": yolo_classes,
174 |             }
175 |             write_data_to_yaml(output_dir / "data.yaml", dataset_info)
176 | 
177 |             for split, split_data in ODET_CONFIG.items():
178 |                 self._logger.info(f"Extracting {split} data...")
179 | 
180 |                 save_image_dir = output_dir / split / "images"
181 |                 make_clean_folder(save_image_dir)
182 | 
183 |                 save_label_dir = output_dir / split / "labels"
184 |                 make_clean_folder(save_label_dir)
185 | 
186 |                 tqbar = tqdm(total=len(split_data), ncols=100)
187 |                 for image_idx, (sub_idx, seq_idx, cam_idx, frame_idx) in enumerate(
188 |                     split_data
189 |                 ):
190 |                     sub_id = HOCAP_INFO["subject_ids"][sub_idx]
191 |                     seq_id = HOCAP_INFO["sequence_ids"][seq_idx]
192 |                     cam_id = HOCAP_INFO["device_serials"][cam_idx]
193 | 
194 |                     # Copy image
195 |                     src_img_path = (
196 |                         HOCAP_DATASET_ROOT
197 |                         / f"{sub_id}/{seq_id}/{cam_id}/color_{frame_idx:06d}.jpg"
198 |                     )
199 |                     save_img_name = f"sub{sub_idx:02d}_seq{seq_idx:02d}_cam{cam_idx:02d}_frame{frame_idx:06d}.jpg"
200 |                     shutil.copy(src_img_path, save_image_dir / save_img_name)
201 | 
202 |                     # Generate yolo annotations
203 |                     yolo_annotations = []
204 |                     label_data = np.load(
205 |                         src_img_path.parent / f"label_{frame_idx:06d}.npz"
206 |                     )
207 |                     seg_mask = label_data["seg_mask"]
208 |                     obj_class_inds = label_data["obj_class_inds"]
209 |                     obj_class_names = label_data["obj_class_names"]
210 |                     for idx, mask_i in enumerate(np.unique(seg_mask)):
211 |                         if mask_i == 0:  # Background
212 |                             continue
213 |                         mask = seg_mask == mask_i
214 |                         if mask.sum() < 10:
215 |                             continue  # Ignore tiny/noisy masks
216 | 
217 |                         x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
218 |                         category_id = obj_class_inds[idx - 1].item()
219 |                         cx = (x + w / 2) / self._rs_width
220 |                         cy = (y + h / 2) / self._rs_height
221 |                         w /= self._rs_width
222 |                         h /= self._rs_height
223 | 
224 |                         yolo_annotations.append(
225 |                             f"{category_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}"
226 |                         )
227 | 
228 |                     # Save yolo annotations
229 |                     save_label_name = f"sub{sub_idx:02d}_seq{seq_idx:02d}_cam{cam_idx:02d}_frame{frame_idx:06d}.txt"
230 |                     (save_label_dir / save_label_name).write_text(
231 |                         "\n".join(yolo_annotations)
232 |                     )
233 | 
234 |                     tqbar.update(1)
235 |                 tqbar.close()
236 | 
237 |         elif dataset_type == "coco":
238 |             save_anno_dir = output_dir / "annotations"
239 |             make_clean_folder(save_anno_dir)
240 | 
241 |             for split, split_data in ODET_CONFIG.items():
242 |                 self._logger.info(f"Extracting {split} data...")
243 |                 save_image_dir = output_dir / split
244 |                 make_clean_folder(save_image_dir)
245 | 
246 |                 save_anno_path = save_anno_dir / f"instances_{split}HOCap.json"
247 | 
248 |                 annotations = []
249 |                 images = []
250 |                 tqbar = tqdm(total=len(split_data), ncols=100)
251 |                 for image_idx, (sub_idx, seq_idx, cam_idx, frame_idx) in enumerate(
252 |                     split_data
253 |                 ):
254 |                     sub_id = HOCAP_INFO["subject_ids"][sub_idx]
255 |                     seq_id = HOCAP_INFO["sequence_ids"][seq_idx]
256 |                     cam_id = HOCAP_INFO["device_serials"][cam_idx]
257 | 
258 |                     src_img_path = (
259 |                         HOCAP_DATASET_ROOT
260 |                         / f"{sub_id}/{seq_id}/{cam_id}/color_{frame_idx:06d}.jpg"
261 |                     )
262 | 
263 |                     # Copy image
264 |                     save_img_name = f"sub{sub_idx:02d}_seq{seq_idx:02d}_cam{cam_idx:02d}_frame{frame_idx:06d}.jpg"
265 |                     shutil.copy(src_img_path, save_image_dir / save_img_name)
266 | 
267 |                     # Generate COCO annotations
268 |                     images.append(
269 |                         {
270 |                             "id": image_idx,
271 |                             "file_name": save_img_name,
272 |                             "height": self._rs_height,
273 |                             "width": self._rs_width,
274 |                         }
275 |                     )
276 | 
277 |                     label_data = np.load(
278 |                         src_img_path.parent / f"label_{frame_idx:06d}.npz"
279 |                     )
280 |                     seg_mask = label_data["seg_mask"]
281 |                     obj_class_inds = label_data["obj_class_inds"]
282 | 
283 |                     for idx, mask_i in enumerate(np.unique(seg_mask)):
284 |                         if mask_i == 0:
285 |                             continue  # Background
286 |                         mask = (seg_mask == mask_i).astype(np.uint8)
287 |                         area = np.sum(mask).item()
288 |                         if area < 10:
289 |                             continue  # Ignore tiny/noisy masks
290 | 
291 |                         x, y, w, h = cv2.boundingRect(mask)
292 |                         category_id = (
293 |                             obj_class_inds[idx - 1].item() + 1
294 |                         )  # COCO category id starts from 1
295 | 
296 |                         annotations.append(
297 |                             {
298 |                                 "id": len(annotations),
299 |                                 "image_id": image_idx,
300 |                                 "category_id": category_id,
301 |                                 "bbox": [x, y, w, h],
302 |                                 "area": area,
303 |                                 "iscrowd": 0,
304 |                                 "segmentation": self._binary_mask_to_rle(mask),
305 |                             }
306 |                         )
307 |                     tqbar.update(1)
308 |                 tqbar.close()
309 | 
310 |                 coco_data = {
311 |                     "images": images,
312 |                     "annotations": annotations,
313 |                     "categories": COCO_CATEGORIES,
314 |                 }
315 |                 with open(save_anno_path, "w") as f:
316 |                     json.dump(coco_data, f)
317 | 
318 |     def create_hpe_dataset(self):
319 |         self._logger.info(">>>>>>>>>> Creating HOCap Hand Pose Estimation Dataset...")
320 |         output_dir = HOCAP_DATASET_ROOT / "hocap_hpe"
321 |         make_clean_folder(output_dir)
322 | 
323 |         for split, split_data in HPE_CONFIG.items():
324 |             self._logger.info(f"Extracting {split} data...")
325 | 
326 |             # Create directories
327 |             save_image_dir = output_dir / split / "images"
328 |             save_image_dir.mkdir(parents=True, exist_ok=True)
329 |             save_label_dir = output_dir / split / "labels"
330 |             save_label_dir.mkdir(parents=True, exist_ok=True)
331 | 
332 |             tqbar = tqdm(total=len(split_data), ncols=100)
333 |             for image_idx, (sub_idx, seq_idx, cam_idx, frame_idx) in enumerate(
334 |                 split_data
335 |             ):
336 |                 sub_id = HOCAP_INFO["subject_ids"][sub_idx]
337 |                 seq_id = HOCAP_INFO["sequence_ids"][seq_idx]
338 |                 cam_id = HOCAP_INFO["device_serials"][cam_idx]
339 | 
340 |                 # Copy image
341 |                 src_img_path = (
342 |                     HOCAP_DATASET_ROOT
343 |                     / f"{sub_id}/{seq_id}/{cam_id}/color_{frame_idx:06d}.jpg"
344 |                 )
345 |                 save_img_name = f"sub{sub_idx:02d}_seq{seq_idx:02d}_cam{cam_idx:02d}_frame{frame_idx:06d}.jpg"
346 |                 shutil.copy(src_img_path, save_image_dir / save_img_name)
347 | 
348 |                 # Generate hand pose annotations
349 |                 label_data = np.load(src_img_path.parent / f"label_{frame_idx:06d}.npz")
350 |                 cam_K = label_data["cam_K"]
351 |                 hand_joints_2d = label_data["hand_joints_2d"]
352 |                 hand_joints_3d = label_data["hand_joints_3d"]
353 |                 mano_shape = self._mano_betas[sub_idx]
354 |                 mano_poses_w = self._load_pose_m(sub_id, seq_id)[:, frame_idx]
355 |                 cam_RT_inv = self._rs_RTs_inv[cam_idx]
356 |                 mano_poses = np.stack(
357 |                     [
358 |                         self._world_mano_pose_to_camera(p, cam_RT_inv)
359 |                         for p in mano_poses_w
360 |                     ],
361 |                     axis=0,
362 |                 )
363 | 
364 |                 save_label_name = f"sub{sub_idx:02d}_seq{seq_idx:02d}_cam{cam_idx:02d}_frame{frame_idx:06d}.npz"
365 |                 np.savez_compressed(
366 |                     save_label_dir / save_label_name,
367 |                     cam_K=cam_K,
368 |                     keypoints_2d=hand_joints_2d,
369 |                     keypoints_3d=hand_joints_3d,
370 |                     mano_betas=mano_shape,
371 |                     mano_poses=mano_poses,
372 |                 )
373 |                 tqbar.update(1)
374 |             tqbar.close()
375 | 
376 |     def create_ope_dataset(self):
377 |         self._logger.info(">>>>>>>>>> Creating HOCap Object Pose Estimation Dataset...")
378 |         output_dir = HOCAP_DATASET_ROOT / "hocap_ope"
379 |         make_clean_folder(output_dir)
380 | 
381 |         object_vertices = self._load_object_vertices()
382 | 
383 |         for split, split_data in OPE_CONFIG.items():
384 |             self._logger.info(f"Extracting {split} data...")
385 |             split_dir = output_dir / split
386 |             split_dir.mkdir(parents=True, exist_ok=True)
387 | 
388 |             tqbar = tqdm(total=len(split_data), ncols=100)
389 |             gt_data = {}
390 |             for image_idx, (sub_idx, seq_idx, cam_idx, frame_idx) in enumerate(
391 |                 split_data
392 |             ):
393 |                 sub_id = HOCAP_INFO["subject_ids"][sub_idx]
394 |                 seq_id = HOCAP_INFO["sequence_ids"][seq_idx]
395 |                 cam_id = HOCAP_INFO["device_serials"][cam_idx]
396 | 
397 |                 save_rgb_dir = split_dir / f"{seq_idx:02d}_{cam_idx:02d}" / "rgb"
398 |                 save_mask_dir = split_dir / f"{seq_idx:02d}_{cam_idx:02d}" / "mask"
399 |                 save_depth_dir = split_dir / f"{seq_idx:02d}_{cam_idx:02d}" / "depth"
400 |                 save_rgb_dir.mkdir(parents=True, exist_ok=True)
401 |                 save_mask_dir.mkdir(parents=True, exist_ok=True)
402 |                 save_depth_dir.mkdir(parents=True, exist_ok=True)
403 | 
404 |                 src_rgb_path = (
405 |                     HOCAP_DATASET_ROOT
406 |                     / f"{sub_id}/{seq_id}/{cam_id}/color_{frame_idx:06d}.jpg"
407 |                 )
408 |                 src_depth_path = (
409 |                     HOCAP_DATASET_ROOT
410 |                     / f"{sub_id}/{seq_id}/{cam_id}/depth_{frame_idx:06d}.png"
411 |                 )
412 |                 label_path = (
413 |                     HOCAP_DATASET_ROOT
414 |                     / f"{sub_id}/{seq_id}/{cam_id}/label_{frame_idx:06d}.npz"
415 |                 )
416 | 
417 |                 # Generate gt data
418 |                 gt_info = []
419 |                 label_data = np.load(label_path)
420 |                 cam_K = label_data["cam_K"]
421 |                 obj_poses = label_data["obj_poses"]
422 |                 seg_mask = label_data["seg_mask"]
423 |                 obj_class_inds = label_data["obj_class_inds"]
424 |                 obj_class_names = label_data["obj_class_names"]
425 |                 obj_seg_mask = np.zeros_like(seg_mask)
426 | 
427 |                 for idx, mask_id in enumerate(np.unique(seg_mask)):
428 |                     if mask_id == 0:  # Background
429 |                         continue
430 |                     obj_idx = (
431 |                         obj_class_inds[idx - 1].item() + 1
432 |                     )  # BOP format starts from 1
433 |                     obj_name = obj_class_names[idx - 1]
434 | 
435 |                     if "HAND" in obj_name:  # Exclude hands
436 |                         continue
437 | 
438 |                     obj_pose = obj_poses[idx - 1]
439 |                     # update obj_seg_mask
440 |                     obj_seg_mask[seg_mask == mask_id] = obj_idx
441 | 
442 |                     gt_entry = {
443 |                         "cam_R_m2c": obj_pose[:3, :3].tolist(),
444 |                         "cam_t_m2c": obj_pose[:3, 3].tolist(),
445 |                         "obj_id": obj_idx,
446 |                         "cam_K": cam_K.tolist(),
447 |                         "obj_bb": self._calculate_projected_bbox(
448 |                             cam_K, obj_pose, object_vertices[obj_name]
449 |                         ),
450 |                     }
451 |                     gt_info.append(gt_entry)
452 | 
453 |                 # Save gt data
454 |                 shutil.copy(
455 |                     src_rgb_path, save_rgb_dir / f"{sub_idx:02d}_{frame_idx:06d}.jpg"
456 |                 )
457 |                 shutil.copy(
458 |                     src_depth_path,
459 |                     save_depth_dir / f"{sub_idx:02d}_{frame_idx:06d}.png",
460 |                 )
461 |                 write_mask_image(
462 |                     save_mask_dir / f"{sub_idx:02d}_{frame_idx:06d}.png", obj_seg_mask
463 |                 )
464 | 
465 |                 # Add gt.yaml
466 |                 if f"{seq_idx:02d}_{cam_idx:02d}" not in gt_data:
467 |                     gt_data[f"{seq_idx:02d}_{cam_idx:02d}"] = {}
468 |                 gt_data[f"{seq_idx:02d}_{cam_idx:02d}"][
469 |                     f"{sub_idx:02d}_{frame_idx:06d}"
470 |                 ] = gt_info
471 | 
472 |                 tqbar.update(1)
473 |             tqbar.close()
474 | 
475 |             # Save gt.yaml
476 |             self._logger.info(f"Saving gt.yaml for {split} split...")
477 |             for key in gt_data.keys():
478 |                 write_data_to_yaml(split_dir / f"{key}/gt.yaml", gt_data[key])
479 | 
480 |         # Copy object models
481 |         self._logger.info("Generating object models...")
482 |         save_model_dir = output_dir / "models"
483 |         save_model_dir.mkdir(parents=True, exist_ok=True)
484 | 
485 |         model_info = {}
486 |         for obj_idx, obj_id in enumerate(HOCAP_INFO["object_classes"]):
487 |             if "HAND" in obj_id:
488 |                 continue  # Exclude hands
489 |             mesh_file, _, _ = self._get_obj_model_path(obj_id)
490 |             # Convert obj to ply
491 |             mesh = trimesh.load(mesh_file)
492 |             mesh.export(save_model_dir / f"{obj_id}.ply")
493 |             model_info[obj_idx] = self._calculate_model_info(mesh)
494 |         write_data_to_json(save_model_dir / "model_info.json", model_info)
495 | 


--------------------------------------------------------------------------------
/hocap_toolkit/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .mano_layer import MANOLayer
2 | from .mano_group_layer import MANOGroupLayer
3 | from .object_layer import ObjectLayer
4 | from .object_group_layer import ObjectGroupLayer
5 | 
6 | 
7 | __all__ = ["MANOLayer", "MANOGroupLayer", "ObjectLayer", "ObjectGroupLayer"]
8 | 


--------------------------------------------------------------------------------
/hocap_toolkit/layers/mano_group_layer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.nn import Module, ModuleList
  4 | from .mano_layer import MANOLayer
  5 | 
  6 | 
  7 | class MANOGroupLayer(Module):
  8 |     """Wrapper layer to hold a group of MANOLayers."""
  9 | 
 10 |     def __init__(self, sides: list[str], betas: list[np.ndarray]):
 11 |         """
 12 |         Constructor.
 13 | 
 14 |         Args:
 15 |             sides (list[str]): A list of MANO sides. 'right' or 'left'.
 16 |             betas (list[np.ndarray]): A list of numpy arrays of shape [10] containing the betas.
 17 |         """
 18 |         super(MANOGroupLayer, self).__init__()
 19 | 
 20 |         self._sides = sides
 21 |         self._betas = betas
 22 |         self._num_obj = len(self._sides)
 23 | 
 24 |         self._layers = ModuleList(
 25 |             [MANOLayer(s, b) for s, b in zip(self._sides, self._betas)]
 26 |         )
 27 | 
 28 |         # Register buffer for faces
 29 |         f = torch.cat([self._layers[i].f + 778 * i for i in range(self._num_obj)])
 30 |         self.register_buffer("f", f)
 31 | 
 32 |         # Register buffer for root translation
 33 |         r = torch.cat([l.root_trans for l in self._layers])
 34 |         self.register_buffer("root_trans", r)
 35 | 
 36 |     def forward(
 37 |         self, p: torch.Tensor, inds: list[int] = None
 38 |     ) -> tuple[torch.Tensor, torch.Tensor]:
 39 |         """
 40 |         Forward function.
 41 | 
 42 |         Args:
 43 |             p (torch.Tensor): A tensor of shape [B, D] containing the pose vectors.
 44 |             inds (list[int], optional): A list of sub-layer indices. Default is None.
 45 | 
 46 |         Returns:
 47 |             tuple[torch.Tensor, torch.Tensor]:
 48 |                 v: A tensor of shape [B, N, 3] containing the vertices.
 49 |                 j: A tensor of shape [B, J, 3] containing the joints.
 50 |         """
 51 |         if inds is None:
 52 |             inds = range(self._num_obj)
 53 |         v = [torch.zeros((p.size(0), 0, 3), dtype=torch.float32, device=self.f.device)]
 54 |         j = [torch.zeros((p.size(0), 0, 3), dtype=torch.float32, device=self.f.device)]
 55 |         p, t = self.pose2pt(p)
 56 |         for i in inds:
 57 |             y = self._layers[i](p[:, i], t[:, i])
 58 |             v.append(y[0])
 59 |             j.append(y[1])
 60 |         v = torch.cat(v, dim=1)
 61 |         j = torch.cat(j, dim=1)
 62 |         return v, j
 63 | 
 64 |     def pose2pt(self, pose: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
 65 |         """
 66 |         Extracts pose and trans from pose vectors.
 67 | 
 68 |         Args:
 69 |             pose (torch.Tensor): A tensor of shape [B, D] containing the pose vectors.
 70 | 
 71 |         Returns:
 72 |             tuple[torch.Tensor, torch.Tensor]:
 73 |                 p: A tensor of shape [B, O, 48] containing the pose.
 74 |                 t: A tensor of shape [B, O, 3] containing the trans.
 75 |         """
 76 |         p = torch.stack(
 77 |             [pose[:, 51 * i : 51 * i + 48] for i in range(self._num_obj)], dim=1
 78 |         )
 79 |         t = torch.stack(
 80 |             [pose[:, 51 * i + 48 : 51 * i + 51] for i in range(self._num_obj)], dim=1
 81 |         )
 82 |         return p, t
 83 | 
 84 |     def get_f_from_inds(self, inds: list[int]) -> tuple[torch.Tensor, torch.Tensor]:
 85 |         """
 86 |         Gets faces from sub-layer indices.
 87 | 
 88 |         Args:
 89 |             inds (list[int]): A list of sub-layer indices.
 90 | 
 91 |         Returns:
 92 |             tuple[torch.Tensor, torch.Tensor]:
 93 |                 f: A tensor of shape [F, 3] containing the faces.
 94 |                 m: A tensor of shape [F] containing the face to index mapping.
 95 |         """
 96 |         f = [torch.zeros((0, 3), dtype=self.f.dtype, device=self.f.device)]
 97 |         m = [torch.zeros((0,), dtype=torch.int64, device=self.f.device)]
 98 |         for i, x in enumerate(inds):
 99 |             f.append(self._layers[x].f + 778 * i)
100 |             m.append(
101 |                 x
102 |                 * torch.ones(
103 |                     self._layers[x].f.size(0), dtype=torch.int64, device=self.f.device
104 |                 )
105 |             )
106 |         f = torch.cat(f)
107 |         m = torch.cat(m)
108 |         return f, m
109 | 
110 |     @property
111 |     def num_obj(self) -> int:
112 |         """Return the number of objects."""
113 |         return self._num_obj
114 | 


--------------------------------------------------------------------------------
/hocap_toolkit/layers/mano_layer.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import numpy as np
 3 | import torch
 4 | from torch.nn import Module
 5 | from manopth.manolayer import ManoLayer
 6 | 
 7 | PROJ_ROOT = Path(__file__).parent.parent.parent
 8 | 
 9 | 
10 | class MANOLayer(Module):
11 |     """Wrapper layer for manopth ManoLayer."""
12 | 
13 |     def __init__(self, side: str, betas: np.ndarray):
14 |         """
15 |         Constructor for MANOLayer.
16 | 
17 |         Args:
18 |             side (str): MANO hand type. 'right' or 'left'.
19 |             betas (np.ndarray): A numpy array of shape [10] containing the betas.
20 |         """
21 |         super(MANOLayer, self).__init__()
22 | 
23 |         self._side = side
24 |         self._betas = betas
25 | 
26 |         self._mano_layer = ManoLayer(
27 |             side=side,
28 |             mano_root=PROJ_ROOT / "config/mano_models",
29 |             flat_hand_mean=False,
30 |             ncomps=45,
31 |             use_pca=True,
32 |         )
33 | 
34 |         # Register buffer for betas
35 |         b = torch.from_numpy(betas).unsqueeze(0).float()
36 |         self.register_buffer("b", b)
37 | 
38 |         # Register buffer for faces
39 |         self.register_buffer("f", self._mano_layer.th_faces)
40 | 
41 |         # Register buffer for root translation
42 |         v = (
43 |             torch.matmul(self._mano_layer.th_shapedirs, self.b.transpose(0, 1)).permute(
44 |                 2, 0, 1
45 |             )
46 |             + self._mano_layer.th_v_template
47 |         )
48 |         r = torch.matmul(self._mano_layer.th_J_regressor[0], v)
49 |         self.register_buffer("root_trans", r)
50 | 
51 |     def forward(
52 |         self, p: torch.Tensor, t: torch.Tensor
53 |     ) -> tuple[torch.Tensor, torch.Tensor]:
54 |         """
55 |         Forward function.
56 | 
57 |         Args:
58 |             p (torch.Tensor): A tensor of shape [B, 48] containing the pose.
59 |             t (torch.Tensor): A tensor of shape [B, 3] containing the translation.
60 | 
61 |         Returns:
62 |             tuple[torch.Tensor, torch.Tensor]:
63 |                 v: A tensor of shape [B, 778, 3] containing the vertices.
64 |                 j: A tensor of shape [B, 21, 3] containing the joints.
65 |         """
66 |         v, j = self._mano_layer(p, self.b.expand(p.size(0), -1), t)
67 | 
68 |         # Convert to meters.
69 |         v /= 1000.0
70 |         j /= 1000.0
71 |         return v, j
72 | 
73 |     @property
74 |     def th_hands_mean(self) -> torch.Tensor:
75 |         """Return the hand mean tensor."""
76 |         return self._mano_layer.th_hands_mean
77 | 
78 |     @property
79 |     def th_selected_comps(self) -> torch.Tensor:
80 |         """Return the selected components tensor."""
81 |         return self._mano_layer.th_selected_comps
82 | 
83 |     @property
84 |     def th_v_template(self) -> torch.Tensor:
85 |         """Return the vertex template tensor."""
86 |         return self._mano_layer.th_v_template
87 | 
88 |     @property
89 |     def side(self) -> str:
90 |         """Return the side of the hand."""
91 |         return self._side
92 | 
93 |     @property
94 |     def num_verts(self) -> int:
95 |         """Return the number of vertices."""
96 |         return 778
97 | 


--------------------------------------------------------------------------------
/hocap_toolkit/layers/object_group_layer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Module, ModuleList
  3 | from .object_layer import ObjectLayer
  4 | import numpy as np
  5 | 
  6 | 
  7 | class ObjectGroupLayer(Module):
  8 |     """Wrapper layer to hold a group of ObjectLayers."""
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         verts: list[np.ndarray],
 13 |         faces: list[np.ndarray],
 14 |         normals: list[np.ndarray],
 15 |     ):
 16 |         """
 17 |         Constructor.
 18 | 
 19 |         Args:
 20 |             verts (list[np.ndarray]): A list of numpy arrays of shape [N, 3] containing the vertices.
 21 |             faces (list[np.ndarray]): A list of numpy arrays of shape [N, 3] containing the faces.
 22 |             normals (list[np.ndarray]): A list of numpy arrays of shape [N, 3] containing the normals.
 23 |         """
 24 |         super(ObjectGroupLayer, self).__init__()
 25 | 
 26 |         self._layers = ModuleList(
 27 |             [ObjectLayer(v, f, n) for v, f, n in zip(verts, faces, normals)]
 28 |         )
 29 |         self._num_obj = len(verts)
 30 |         self._num_verts = [v.shape[0] for v in verts]
 31 | 
 32 |         # Initialize faces with offsets
 33 |         f = []
 34 |         offset = 0
 35 |         for i in range(self._num_obj):
 36 |             if i > 0:
 37 |                 offset += self._layers[i - 1].v.size(1)
 38 |             f.append(self._layers[i].f + offset)
 39 |         f = torch.cat(f)
 40 |         self.register_buffer("f", f)
 41 | 
 42 |     @property
 43 |     def num_obj(self) -> int:
 44 |         """Return the number of objects."""
 45 |         return self._num_obj
 46 | 
 47 |     @property
 48 |     def num_verts(self) -> list[int]:
 49 |         """Return the number of vertices for each object."""
 50 |         return self._num_verts
 51 | 
 52 |     @property
 53 |     def count(self) -> list[int]:
 54 |         """Return the number of faces for each object."""
 55 |         return [l.f.numel() for l in self._layers]
 56 | 
 57 |     def forward(
 58 |         self, p: torch.Tensor, inds: list[int] = None
 59 |     ) -> tuple[torch.Tensor, torch.Tensor]:
 60 |         """
 61 |         Forward function.
 62 | 
 63 |         Args:
 64 |             p (torch.Tensor): A tensor of shape [B, D] containing the pose vectors.
 65 |             inds (list[int], optional): A list of sub-layer indices. Default is None.
 66 | 
 67 |         Returns:
 68 |             tuple[torch.Tensor, torch.Tensor]:
 69 |                 v: A tensor of shape [B, N, 3] containing the transformed vertices.
 70 |                 n: A tensor of shape [B, N, 3] containing the transformed normals.
 71 |         """
 72 |         if inds is None:
 73 |             inds = range(self._num_obj)
 74 |         v = [torch.zeros((p.size(0), 0, 3), dtype=torch.float32, device=self.f.device)]
 75 |         n = [torch.zeros((p.size(0), 0, 3), dtype=torch.float32, device=self.f.device)]
 76 |         r, t = self.pose2rt(p)
 77 |         for i in inds:
 78 |             y = self._layers[i](r[:, i], t[:, i])
 79 |             v.append(y[0])
 80 |             n.append(y[1])
 81 |         v = torch.cat(v, dim=1)
 82 |         n = torch.cat(n, dim=1)
 83 |         return v, n
 84 | 
 85 |     def pose2rt(self, pose: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
 86 |         """
 87 |         Extracts rotations and translations from pose vectors.
 88 | 
 89 |         Args:
 90 |             pose (torch.Tensor): A tensor of shape [B, D] containing the pose vectors.
 91 | 
 92 |         Returns:
 93 |             tuple[torch.Tensor, torch.Tensor]:
 94 |                 r: A tensor of shape [B, O, 3] containing the rotation vectors.
 95 |                 t: A tensor of shape [B, O, 3] containing the translations.
 96 |         """
 97 |         r = torch.stack(
 98 |             [pose[:, 6 * i : 6 * i + 3] for i in range(self._num_obj)], dim=1
 99 |         )
100 |         t = torch.stack(
101 |             [pose[:, 6 * i + 3 : 6 * i + 6] for i in range(self._num_obj)], dim=1
102 |         )
103 |         return r, t
104 | 
105 |     def get_f_from_inds(self, inds: list[int]) -> tuple[torch.Tensor, torch.Tensor]:
106 |         """
107 |         Gets faces from sub-layer indices.
108 | 
109 |         Args:
110 |             inds (list[int]): A list of sub-layer indices.
111 | 
112 |         Returns:
113 |             tuple[torch.Tensor, torch.Tensor]:
114 |                 f: A tensor of shape [F, 3] containing the faces.
115 |                 m: A tensor of shape [F] containing the face to index mapping.
116 |         """
117 |         f = [torch.zeros((0, 3), dtype=self.f.dtype, device=self.f.device)]
118 |         m = [torch.zeros((0,), dtype=torch.int64, device=self.f.device)]
119 |         offset = 0
120 |         for i, x in enumerate(inds):
121 |             if i > 0:
122 |                 offset += self._layers[inds[i - 1]].v.size(1)
123 |             f.append(self._layers[x].f + offset)
124 |             m.append(
125 |                 x
126 |                 * torch.ones(
127 |                     self._layers[x].f.size(0), dtype=torch.int64, device=self.f.device
128 |                 )
129 |             )
130 |         f = torch.cat(f)
131 |         m = torch.cat(m)
132 |         return f, m
133 | 
134 |     def get_num_verts_from_inds(self, inds: list[int]) -> int:
135 |         """
136 |         Gets number of vertices from sub-layer indices.
137 | 
138 |         Args:
139 |             inds (list[int]): A non-empty list of sub-layer indices.
140 | 
141 |         Returns:
142 |             int: The number of vertices.
143 |         """
144 |         return sum(self._layers[i].v.size(1) for i in inds)
145 | 
146 |     def get_vert_inds_from_inds(
147 |         self, inds: list[int]
148 |     ) -> tuple[torch.Tensor, torch.Tensor]:
149 |         """
150 |         Gets vertices from sub-layer indices.
151 | 
152 |         Args:
153 |             inds (list[int]): A list of sub-layer indices.
154 | 
155 |         Returns:
156 |             tuple[torch.Tensor, torch.Tensor]:
157 |                 idx: A tensor of shape [N] containing the vertices.
158 |                 m: A tensor of shape [N] containing the vertex to index mapping.
159 |         """
160 |         idx = [torch.zeros((0,), dtype=torch.int64, device=self.f.device)]
161 |         m = [torch.zeros((0,), dtype=torch.int64, device=self.f.device)]
162 |         offset = 0
163 |         for i in range(self._num_obj):
164 |             if i > 0:
165 |                 offset += self._layers[i - 1].v.size(1)
166 |             idx.append(
167 |                 torch.arange(self._layers[i].v.size(1), device=self.f.device) + offset
168 |             )
169 |             m.append(
170 |                 i
171 |                 * torch.ones(
172 |                     self._layers[i].v.size(1), dtype=torch.int64, device=self.f.device
173 |                 )
174 |             )
175 |         idx = torch.cat(idx)
176 |         m = torch.cat(m)
177 |         return idx, m
178 | 


--------------------------------------------------------------------------------
/hocap_toolkit/layers/object_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.nn import Module
 4 | 
 5 | 
 6 | class ObjectLayer(Module):
 7 |     def __init__(self, verts: np.ndarray, faces: np.ndarray, normals: np.ndarray):
 8 |         """
 9 |         Initializes the object layer.
10 | 
11 |         Args:
12 |             verts (np.ndarray): A numpy array of shape [N, 3] containing the vertices.
13 |             faces (np.ndarray): A numpy array of shape [N, 3] containing the faces.
14 |             normals (np.ndarray): A numpy array of shape [N, 3] containing the normals.
15 |         """
16 |         super().__init__()
17 |         self._num_verts = verts.shape[0]
18 | 
19 |         # Convert numpy arrays to torch tensors
20 |         v = torch.from_numpy(verts.astype(np.float32).T)
21 |         n = torch.from_numpy(normals.astype(np.float32).T)
22 |         f = torch.from_numpy(faces.astype(np.int64).reshape((-1, 3)))
23 | 
24 |         # Register buffers for vertices, normals, and faces
25 |         self.register_buffer("v", v)
26 |         self.register_buffer("n", n)
27 |         self.register_buffer("f", f)
28 | 
29 |     def forward(
30 |         self, r: torch.Tensor, t: torch.Tensor
31 |     ) -> tuple[torch.Tensor, torch.Tensor]:
32 |         """
33 |         Forward function.
34 | 
35 |         Args:
36 |             r (torch.Tensor): A tensor of shape [B, 3] containing the rotation in axis-angle.
37 |             t (torch.Tensor): A tensor of shape [B, 3] containing the translation.
38 | 
39 |         Returns:
40 |             tuple[torch.Tensor, torch.Tensor]:
41 |                 v: A tensor of shape [B, N, 3] containing the transformed vertices.
42 |                 n: A tensor of shape [B, N, 3] containing the transformed normals.
43 |         """
44 |         R = self.rv2dcm(r)
45 |         v = torch.matmul(R, self.v).permute(0, 2, 1) + t.unsqueeze(1)
46 |         n = torch.matmul(R, self.n).permute(0, 2, 1)
47 |         return v, n
48 | 
49 |     def rv2dcm(self, rv: torch.Tensor) -> torch.Tensor:
50 |         """
51 |         Converts rotation vectors to direction cosine matrices.
52 | 
53 |         Args:
54 |             rv (torch.Tensor): A tensor of shape [B, 3] containing the rotation vectors.
55 | 
56 |         Returns:
57 |             torch.Tensor: A tensor of shape [B, 3, 3] containing the direction cosine matrices.
58 |         """
59 |         angle = torch.norm(rv + 1e-8, p=2, dim=1)
60 |         axis = rv / angle.unsqueeze(1)
61 |         s = torch.sin(angle).unsqueeze(1).unsqueeze(2)
62 |         c = torch.cos(angle).unsqueeze(1).unsqueeze(2)
63 |         I = torch.eye(3, device=rv.device).expand(rv.size(0), -1, -1)
64 |         z = torch.zeros_like(angle)
65 |         K = torch.stack(
66 |             (
67 |                 torch.stack((z, -axis[:, 2], axis[:, 1]), dim=1),
68 |                 torch.stack((axis[:, 2], z, -axis[:, 0]), dim=1),
69 |                 torch.stack((-axis[:, 1], axis[:, 0], z), dim=1),
70 |             ),
71 |             dim=1,
72 |         )
73 |         dcm = I + s * K + (1 - c) * torch.bmm(K, K)
74 |         return dcm
75 | 
76 |     @property
77 |     def num_verts(self) -> int:
78 |         """Return the number of vertices."""
79 |         return self._num_verts
80 | 


--------------------------------------------------------------------------------
/hocap_toolkit/loaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .sequence_loader import SequenceLoader
2 | 
3 | __all__ = ["SequenceLoader"]
4 | 


--------------------------------------------------------------------------------
/hocap_toolkit/loaders/sequence_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from pathlib import Path
  4 | from hocap_toolkit.utils import read_data_from_yaml, read_rgb_image, read_depth_image
  5 | 
  6 | 
  7 | class SequenceLoader:
  8 |     """
  9 |     Class for loading and processing sequence data.
 10 | 
 11 |     Supports loading MANO and object layers, along with their poses, intrinsics,
 12 |     extrinsics, and metadata required for 3D reconstruction and analysis.
 13 |     """
 14 | 
 15 |     def __init__(self, sequence_folder: str, device: str = "cuda"):
 16 |         """
 17 |         Initializes the SequenceLoader object.
 18 | 
 19 |         Args:
 20 |             sequence_folder (str): The path to the sequence folder.
 21 |             device (str): The device to run computations on ('cpu' or 'cuda'). Defaults to 'cpu'.
 22 |         """
 23 |         self._data_folder = Path(sequence_folder)
 24 |         self._calib_folder = self._data_folder.parent.parent / "calibration"
 25 |         self._models_folder = self._data_folder.parent.parent / "models"
 26 |         self._device = device
 27 | 
 28 |         # Crop limits in world frame, [x_min, x_max, y_min, y_max, z_min, z_max]
 29 |         self._crop_lim = [-0.60, +0.60, -0.35, +0.35, -0.01, +0.80]
 30 | 
 31 |         # Load metadata
 32 |         self._load_metadata()
 33 | 
 34 |         # Create mapping from 2D coordinates to 3D rays
 35 |         self._rays = self._create_3d_rays()
 36 | 
 37 |         # Create projection matrices from camera to master/world
 38 |         self._M2world = torch.bmm(self._rs_Ks, self._rs_RTs_inv[:, :3, :])
 39 | 
 40 |         # Initialize points, colors, and masks
 41 |         self._frame_id = -1
 42 |         self._points = torch.zeros(
 43 |             (len(self._rs_serials), self._rs_height * self._rs_width, 3),
 44 |             dtype=torch.float32,
 45 |             device=self._device,
 46 |         )
 47 |         self._colors = torch.zeros(
 48 |             (len(self._rs_serials), self._rs_height * self._rs_width, 3),
 49 |             dtype=torch.float32,
 50 |             device=self._device,
 51 |         )
 52 |         self._masks = torch.zeros(
 53 |             (len(self._rs_serials), self._rs_height * self._rs_width),
 54 |             dtype=torch.bool,
 55 |             device=self._device,
 56 |         )
 57 | 
 58 |     def _load_metadata(self):
 59 |         data = read_data_from_yaml(self._data_folder / "meta.yaml")
 60 | 
 61 |         self._num_frames = data["num_frames"]
 62 |         self._object_ids = data["object_ids"]
 63 |         self._mano_sides = data["mano_sides"]
 64 |         self._task_id = data["task_id"]
 65 |         self._subject_id = data["subject_id"]
 66 |         # RealSense camera metadata
 67 |         self._rs_serials = data["realsense"]["serials"]
 68 |         self._rs_width = data["realsense"]["width"]
 69 |         self._rs_height = data["realsense"]["height"]
 70 |         self._num_cams = len(self._rs_serials)
 71 |         # HoloLens metadata
 72 |         self._hl_serial = data["hololens"]["serial"]
 73 |         self._hl_pv_width = data["hololens"]["pv_width"]
 74 |         self._hl_pv_height = data["hololens"]["pv_height"]
 75 |         # Object models file paths
 76 |         self._object_textured_files = [
 77 |             self._models_folder / obj_id / "textured_mesh.obj"
 78 |             for obj_id in self._object_ids
 79 |         ]
 80 |         self._object_cleaned_files = [
 81 |             self._models_folder / obj_id / "cleaned_mesh_10000.obj"
 82 |             for obj_id in self._object_ids
 83 |         ]
 84 | 
 85 |         # Load camera intrinsics
 86 |         self._load_intrinsics()
 87 | 
 88 |         # Load rs camera extrinsics
 89 |         self._load_extrinsics(data["extrinsics"])
 90 | 
 91 |         # Load MANO shape parameters
 92 |         self._mano_beta = self._load_mano_beta()
 93 | 
 94 |     def _load_intrinsics(self):
 95 |         def read_K_from_yaml(serial, cam_type="color"):
 96 |             yaml_file = self._calib_folder / "intrinsics" / f"{serial}.yaml"
 97 |             data = read_data_from_yaml(yaml_file)[cam_type]
 98 |             K = np.array(
 99 |                 [
100 |                     [data["fx"], 0.0, data["ppx"]],
101 |                     [0.0, data["fy"], data["ppy"]],
102 |                     [0.0, 0.0, 1.0],
103 |                 ],
104 |                 dtype=np.float32,
105 |             )
106 |             return K
107 | 
108 |         rs_Ks = np.stack(
109 |             [read_K_from_yaml(serial) for serial in self._rs_serials], axis=0
110 |         )
111 |         rs_Ks_inv = np.stack([np.linalg.inv(K) for K in rs_Ks], axis=0)
112 | 
113 |         hl_K = read_K_from_yaml(self._hl_serial)
114 |         hl_K_inv = np.linalg.inv(hl_K)
115 | 
116 |         # Convert intrinsics to torch tensors
117 |         self._rs_Ks = torch.from_numpy(rs_Ks).to(self._device)
118 |         self._rs_Ks_inv = torch.from_numpy(rs_Ks_inv).to(self._device)
119 |         self._hl_K = torch.from_numpy(hl_K).to(self._device)
120 |         self._hl_K_inv = torch.from_numpy(hl_K_inv).to(self._device)
121 | 
122 |     def _load_extrinsics(self, file_name):
123 |         def create_mat(values):
124 |             return np.array(
125 |                 [values[0:4], values[4:8], values[8:12], [0, 0, 0, 1]], dtype=np.float32
126 |             )
127 | 
128 |         data = read_data_from_yaml(self._calib_folder / "extrinsics" / f"{file_name}")
129 | 
130 |         # Read rs_master serial
131 |         self._rs_master = data["rs_master"]
132 | 
133 |         # Create extrinsics matrices
134 |         extrinsics = data["extrinsics"]
135 |         tag_0 = create_mat(extrinsics["tag_0"])
136 |         tag_0_inv = np.linalg.inv(tag_0)
137 |         tag_1 = create_mat(extrinsics["tag_1"])
138 |         tag_1_inv = np.linalg.inv(tag_1)
139 |         extr2master = np.stack(
140 |             [create_mat(extrinsics[s]) for s in self._rs_serials], axis=0
141 |         )
142 |         extr2master_inv = np.stack([np.linalg.inv(t) for t in extr2master], axis=0)
143 |         extr2world = np.stack([tag_1_inv @ t for t in extr2master], axis=0)
144 |         extr2world_inv = np.stack([np.linalg.inv(t) for t in extr2world], axis=0)
145 | 
146 |         # Convert extrinsics to torch tensors
147 |         self._tag_0 = torch.from_numpy(tag_0).to(self._device)
148 |         self._tag_0_inv = torch.from_numpy(tag_0_inv).to(self._device)
149 |         self._tag_1 = torch.from_numpy(tag_1).to(self._device)
150 |         self._tag_1_inv = torch.from_numpy(tag_1_inv).to(self._device)
151 |         self._extr2master = torch.from_numpy(extr2master).to(self._device)
152 |         self._extr2master_inv = torch.from_numpy(extr2master_inv).to(self._device)
153 |         self._rs_RTs = torch.from_numpy(extr2world).to(self._device)
154 |         self._rs_RTs_inv = torch.from_numpy(extr2world_inv).to(self._device)
155 | 
156 |     def _load_mano_beta(self) -> torch.Tensor:
157 |         file_path = self._calib_folder / "mano" / f"{self._subject_id}.yaml"
158 |         data = read_data_from_yaml(file_path)
159 |         return torch.tensor(data["betas"], dtype=torch.float32, device=self._device)
160 | 
161 |     def _create_3d_rays(self) -> torch.Tensor:
162 |         """Creates 3D rays for deprojecting depth images to 3D space."""
163 | 
164 |         def create_2d_coords() -> torch.Tensor:
165 |             xv, yv = torch.meshgrid(
166 |                 torch.arange(self._rs_width),
167 |                 torch.arange(self._rs_height),
168 |                 indexing="xy",
169 |             )
170 |             coord_2d = torch.stack(
171 |                 (xv, yv, torch.ones_like(xv)), dim=0
172 |             ).float()  # (3, H, W)
173 |             coords_2d = (
174 |                 coord_2d.unsqueeze(0)
175 |                 .repeat(self._num_cams, 1, 1, 1)
176 |                 .view(self._num_cams, 3, -1)
177 |             )  # (N, 3, H*W)
178 |             coords_2d = coords_2d.to(self._device)
179 |             return coords_2d
180 | 
181 |         coords_2d = create_2d_coords()
182 |         return torch.bmm(self._rs_Ks_inv, coords_2d)  # (N, 3, H*W)
183 | 
184 |     def _deproject(self, colors, depths) -> tuple:
185 |         """
186 |         Deprojects depth images to 3D points.
187 | 
188 |         Args:
189 |             colors (np.ndarray): List of color images, [N, H, W, 3], dtype=float32.
190 |             depths (np.ndarray): List of depth images, [N, H, W], dtype=np.float32.
191 | 
192 |         Returns:
193 |             tuple: Colors, 3D points, and masks.
194 |         """
195 |         # Process color images
196 |         colors = torch.from_numpy(colors.reshape(self._num_cams, -1, 3)).to(
197 |             self._device
198 |         )  # [N, H*W, 3]
199 | 
200 |         # Process depth images
201 |         depths = torch.from_numpy(depths.reshape(self._num_cams, 1, -1)).to(
202 |             self._device
203 |         )  # [N, 1, H*W]
204 | 
205 |         # Deproject depth images to 3D points in camera frame
206 |         pts_c = self._rays * depths  # [N, 3, H*W]
207 |         # Transform 3D points from camera frame to world frame
208 |         pts = torch.baddbmm(
209 |             self._rs_RTs[:, :3, 3].unsqueeze(2),
210 |             self._rs_RTs[:, :3, :3],
211 |             pts_c,
212 |         ).permute(
213 |             0, 2, 1
214 |         )  # (N, H*W, 3)
215 | 
216 |         # Crop 3D points
217 |         mx1 = pts[..., 0] > self._crop_lim[0]
218 |         mx2 = pts[..., 0] < self._crop_lim[1]
219 |         my1 = pts[..., 1] > self._crop_lim[2]
220 |         my2 = pts[..., 1] < self._crop_lim[3]
221 |         mz1 = pts[..., 2] > self._crop_lim[4]
222 |         mz2 = pts[..., 2] < self._crop_lim[5]
223 |         masks = mx1 & mx2 & my1 & my2 & mz1 & mz2
224 | 
225 |         return colors, pts, masks
226 | 
227 |     def _update_pcd(self, frame_id: int):
228 |         """Update point cloud data."""
229 |         colors, points, masks = self._deproject(
230 |             np.stack(
231 |                 [self.get_rgb_image(frame_id, serial) for serial in self._rs_serials],
232 |                 axis=0,
233 |                 dtype=np.float32,
234 |             )
235 |             / 255.0,
236 |             np.stack(
237 |                 [self.get_depth_image(frame_id, serial) for serial in self._rs_serials],
238 |                 axis=0,
239 |                 dtype=np.float32,
240 |             ),
241 |         )
242 |         self._points.copy_(points)
243 |         self._colors.copy_(colors)
244 |         self._masks.copy_(masks)
245 | 
246 |     def get_rgb_image(self, frame_id: int, serial: str) -> np.ndarray:
247 |         """Get RGB image in numpy format, dtype=uint8, [H, W, 3]."""
248 |         image_file = self._data_folder / f"{serial}/color_{frame_id:06d}.jpg"
249 |         return read_rgb_image(image_file)
250 | 
251 |     def get_depth_image(self, frame_id: int, serial: str) -> np.ndarray:
252 |         """Get depth image in numpy format, dtype=uint16, [H, W]."""
253 |         image_file = self._data_folder / f"{serial}/depth_{frame_id:06d}.png"
254 |         return read_depth_image(image_file, scale=1000.0)
255 | 
256 |     def get_image_label(self, frame_id: int, serial: str) -> dict:
257 |         """Get image label data."""
258 |         label_file = self._data_folder / f"{serial}/label_{frame_id:06d}.npz"
259 |         if not label_file.exists():
260 |             return {}
261 |         return np.load(label_file)
262 | 
263 |     def step(self):
264 |         """Step to the next frame."""
265 |         self._frame_id = (self._frame_id + 1) % self._num_frames
266 |         self._update_pcd(self._frame_id)
267 | 
268 |     def step_by_frame_id(self, frame_id: int):
269 |         """Step to a specific frame."""
270 |         self._frame_id = frame_id % self._num_frames
271 |         self._update_pcd(self._frame_id)
272 | 
273 |     @property
274 |     def object_ids(self) -> list:
275 |         return self._object_ids
276 | 
277 |     @property
278 |     def subject_id(self) -> str:
279 |         return self._subject_id
280 | 
281 |     @property
282 |     def num_frames(self) -> int:
283 |         return self._num_frames
284 | 
285 |     @property
286 |     def rs_width(self) -> int:
287 |         return self._rs_width
288 | 
289 |     @property
290 |     def rs_height(self) -> int:
291 |         return self._rs_height
292 | 
293 |     @property
294 |     def rs_serials(self) -> list:
295 |         return self._rs_serials
296 | 
297 |     @property
298 |     def rs_master(self) -> str:
299 |         return self._rs_master
300 | 
301 |     @property
302 |     def holo_pv_width(self) -> int:
303 |         return self._hl_pv_width
304 | 
305 |     @property
306 |     def holo_pv_height(self) -> int:
307 |         return self._hl_pv_height
308 | 
309 |     @property
310 |     def holo_serial(self) -> list:
311 |         return self._hl_serial
312 | 
313 |     @property
314 |     def mano_beta(self) -> torch.Tensor:
315 |         return self._mano_beta
316 | 
317 |     @property
318 |     def mano_sides(self) -> list:
319 |         return self._mano_sides
320 | 
321 |     @property
322 |     def rs_Ks(self) -> torch.Tensor:
323 |         return self._rs_Ks
324 | 
325 |     @property
326 |     def rs_Ks_inv(self) -> torch.Tensor:
327 |         return self._rs_Ks_inv
328 | 
329 |     @property
330 |     def rs_RTs(self) -> torch.Tensor:
331 |         return self._rs_RTs
332 | 
333 |     @property
334 |     def rs_RTs_inv(self) -> torch.Tensor:
335 |         return self._rs_RTs_inv
336 | 
337 |     @property
338 |     def tag_0(self) -> torch.Tensor:
339 |         """tag_0 to rs_master transformation matrix"""
340 |         return self._tag_0
341 | 
342 |     @property
343 |     def tag_0_inv(self) -> torch.Tensor:
344 |         """rs_master to tag_0 transformation matrix"""
345 |         return self._tag_0_inv
346 | 
347 |     @property
348 |     def tag_1(self) -> torch.Tensor:
349 |         """tag_1 to rs_master transformation matrix"""
350 |         return self._tag_1
351 | 
352 |     @property
353 |     def tag_1_inv(self) -> torch.Tensor:
354 |         """rs_master to tag_1 transformation matrix"""
355 |         return self._tag_1_inv
356 | 
357 |     @property
358 |     def M(self) -> torch.Tensor:
359 |         """camera to world transformation matrix"""
360 |         return self._M2world
361 | 
362 |     @property
363 |     def frame_id(self) -> int:
364 |         return self._frame_id
365 | 
366 |     @property
367 |     def object_textured_mesh_files(self) -> list:
368 |         return [
369 |             str(self._models_folder / f"{object_id}/textured_mesh.obj")
370 |             for object_id in self._object_ids
371 |         ]
372 | 
373 |     @property
374 |     def object_cleaned_mesh_files(self) -> list:
375 |         return [
376 |             str(self._models_folder / f"{object_id}/cleaned_mesh_10000.obj")
377 |             for object_id in self._object_ids
378 |         ]
379 | 
380 |     @property
381 |     def points(self) -> torch.Tensor:
382 |         return self._points
383 | 
384 |     @property
385 |     def colors(self) -> torch.Tensor:
386 |         return self._colors
387 | 
388 |     @property
389 |     def masks(self) -> torch.Tensor:
390 |         return self._masks
391 | 


--------------------------------------------------------------------------------
/hocap_toolkit/renderers/__init__.py:
--------------------------------------------------------------------------------
1 | from .sequence_renderer import SequenceRenderer
2 | from .renderer_pyrd import OffscreenRenderer
3 | 


--------------------------------------------------------------------------------
/hocap_toolkit/renderers/renderer_pyrd.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | os.environ["PYOPENGL_PLATFORM"] = "egl"  # GPU-based offscreen rendering
 4 | 
 5 | import numpy as np
 6 | import pyrender
 7 | from pyrender import RenderFlags
 8 | 
 9 | # OpenGL RH y UP (Pyrender)
10 | #    y
11 | #    |
12 | #    +---x
13 | #   /
14 | #  z
15 | 
16 | # CV Camera RH y DOWN, x RIGHT, z FRONT
17 | #      z
18 | #     /
19 | #    +---x
20 | #    |
21 | #    y
22 | 
23 | 
24 | class OffscreenRenderer:
25 |     def __init__(self, width, height, znear=0.001, zfar=1000.0, pose_type="cv") -> None:
26 |         assert pose_type in ["cv", "gl"], "Invalid pose type. Must be 'cv' or 'gl'"
27 |         self._pose_type = pose_type
28 |         self._width = width
29 |         self._height = height
30 |         self._znear = znear
31 |         self._zfar = zfar
32 |         self._bg_color = np.array([0.0, 0.0, 0.0, 1.0])
33 |         self._ambient_light = np.array([1.0, 1.0, 1.0, 1.0])
34 | 
35 |         self._cam = pyrender.PerspectiveCamera(
36 |             yfov=np.pi / 3.0, znear=self._znear, zfar=self._zfar
37 |         )
38 | 
39 |         self._glcam2cvcam = np.array(
40 |             [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
41 |         )
42 |         self._cvcam2glcam = np.linalg.inv(self._glcam2cvcam)
43 | 
44 |     def get_render_image(self, meshes, mesh_poses=None, cam_K=None, cam_pose=None):
45 |         poses_m = (
46 |             mesh_poses
47 |             if mesh_poses is not None
48 |             else [np.eye(4) for _ in range(len(meshes))]
49 |         )
50 |         pose_c = cam_pose if cam_pose is not None else np.eye(4)
51 |         if self._pose_type == "cv":
52 |             pose_c = pose_c @ self._cvcam2glcam
53 | 
54 |         scene = pyrender.Scene(
55 |             bg_color=self._bg_color, ambient_light=self._ambient_light
56 |         )
57 | 
58 |         # add dummy world node
59 |         world_node = scene.add(pyrender.PerspectiveCamera(yfov=np.pi / 3.0))
60 | 
61 |         # add camera
62 |         scene.main_camera_node = scene.add(
63 |             pyrender.IntrinsicsCamera(
64 |                 fx=cam_K[0, 0],
65 |                 fy=cam_K[1, 1],
66 |                 cx=cam_K[0, 2],
67 |                 cy=cam_K[1, 2],
68 |                 znear=self._znear,
69 |                 zfar=self._zfar,
70 |             ),
71 |             name="camera",
72 |             pose=pose_c,
73 |             parent_node=world_node,
74 |         )
75 | 
76 |         # add meshes
77 |         for i, mesh in enumerate(meshes):
78 |             scene.add(
79 |                 pyrender.Mesh.from_trimesh(mesh),
80 |                 name=f"mesh_{i}",
81 |                 pose=poses_m[i],
82 |                 parent_node=world_node,
83 |             )
84 |         # render
85 |         r = pyrender.OffscreenRenderer(self._width, self._height)
86 |         color, depth = r.render(scene, flags=RenderFlags.ALL_SOLID)
87 |         r.delete()
88 | 
89 |         return color, depth
90 | 


--------------------------------------------------------------------------------
/hocap_toolkit/renderers/sequence_renderer.py:
--------------------------------------------------------------------------------
  1 | from hocap_toolkit.utils import *
  2 | from hocap_toolkit.loaders import SequenceLoader
  3 | from hocap_toolkit.layers import MANOGroupLayer
  4 | 
  5 | cvcam_in_glcam = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
  6 | 
  7 | 
  8 | class SequenceRenderer:
  9 |     def __init__(self, sequence_folder, device="cpu") -> None:
 10 |         self._seq_folder = Path(sequence_folder).resolve()
 11 |         self._device = device
 12 |         self._loader = SequenceLoader(sequence_folder, device=device)
 13 |         self._num_frames = self._loader.num_frames
 14 |         self._object_ids = self._loader.object_ids
 15 |         self._mano_sides = self._loader.mano_sides
 16 |         self._mano_group_layer = self._init_mano_group_layer()
 17 |         # Realsense cameras
 18 |         self._rs_serials = self._loader.rs_serials
 19 |         self._rs_width = self._loader.rs_width
 20 |         self._rs_height = self._loader.rs_height
 21 |         self._rs_intrinsics = self._loader.rs_Ks.cpu().numpy()
 22 |         self._rs_extrinsics = self._loader.rs_RTs.cpu().numpy()
 23 |         # Hololens cameras
 24 |         self._hl_serial = self._loader.holo_serial
 25 |         self._hl_pv_width = self._loader.holo_pv_width
 26 |         self._hl_pv_height = self._loader.holo_pv_height
 27 |         self._hl_pv_intrinsics = self._loader._hl_K.cpu().numpy()
 28 | 
 29 |         # Load poses
 30 |         self._poses_o = self._load_object_poses()
 31 |         self._poses_m = self._load_mano_poses()
 32 |         self._poses_pv = self._load_holo_poses()
 33 | 
 34 |         # Load object meshes
 35 |         self._obj_meshes = [
 36 |             pyrender.Mesh.from_trimesh(trimesh.load_mesh(f, process=False))
 37 |             for f in self._loader.object_textured_mesh_files
 38 |         ]
 39 | 
 40 |         # Get verts, faces, colors for MANO
 41 |         self._mano_verts = self._get_mano_verts()
 42 |         self._mano_faces = self._get_mano_faces()
 43 |         self._mano_colors = self._get_mano_colors()
 44 | 
 45 |         # Rendering flags
 46 |         self._rgb_flags = (
 47 |             pyrender.RenderFlags.OFFSCREEN | pyrender.RenderFlags.SHADOWS_ALL
 48 |         )
 49 |         self._depth_flags = (
 50 |             pyrender.RenderFlags.OFFSCREEN | pyrender.RenderFlags.DEPTH_ONLY
 51 |         )
 52 |         self._mask_flags = pyrender.RenderFlags.OFFSCREEN | pyrender.RenderFlags.SEG
 53 | 
 54 |     def _load_holo_pv_intrinsics(self, serial):
 55 |         K = np.fromfile(
 56 |             self._loader._calib_folder
 57 |             / f"hololens/{serial}/personal_video"
 58 |             / f"1000_{self._hl_pv_width}_{self._hl_pv_height}/intrinsics.bin",
 59 |             dtype=np.float32,
 60 |         ).reshape(4, 4)[:3, :3]
 61 |         K[0, 0] = -K[0, 0]
 62 |         return K.T
 63 | 
 64 |     def _load_object_poses(self):
 65 |         pose_file = self._seq_folder / "poses_o.npy"
 66 |         poses = np.load(pose_file)
 67 |         poses = np.stack([quat_to_mat(p) for p in poses], axis=0)
 68 |         return poses
 69 | 
 70 |     def _load_mano_poses(self):
 71 |         pose_file = self._seq_folder / "poses_m.npy"
 72 |         poses = np.load(pose_file)
 73 |         poses = [
 74 |             torch.from_numpy(poses[0 if side == "right" else 1]).to(self._device)
 75 |             for side in self._mano_sides
 76 |         ]
 77 |         return poses
 78 | 
 79 |     def _load_holo_poses(self):
 80 |         pose_file = self._seq_folder / "poses_pv.npy"
 81 |         poses = quat_to_mat(np.load(pose_file))
 82 |         return poses
 83 | 
 84 |     def _init_mano_group_layer(self):
 85 |         beta = self._loader.mano_beta.cpu().numpy()
 86 |         return MANOGroupLayer(self._mano_sides, [beta for _ in self._mano_sides]).to(
 87 |             self._device
 88 |         )
 89 | 
 90 |     def _get_mano_verts(self):
 91 |         p = torch.cat(self._poses_m, dim=1)
 92 |         v, _ = self._mano_group_layer(p)
 93 |         if p.size(0) == 1:
 94 |             v = v[0]
 95 |         return v.cpu().numpy()
 96 | 
 97 |     def _get_mano_faces(self):
 98 |         mano_faces = self._mano_group_layer.f.cpu().numpy()
 99 |         mano_faces = [
100 |             np.concatenate(
101 |                 [
102 |                     mano_faces[idx * NUM_MANO_FACES : (idx + 1) * NUM_MANO_FACES]
103 |                     - idx * NUM_MANO_VERTS,
104 |                     NEW_MANO_FACES[side],
105 |                 ]
106 |             )
107 |             for idx, side in enumerate(self._mano_sides)
108 |         ]
109 |         return mano_faces
110 | 
111 |     def _get_mano_colors(self):
112 |         mano_colors = [
113 |             HAND_COLORS[1].rgb if side == "right" else HAND_COLORS[2].rgb
114 |             for side in self._mano_sides
115 |         ]
116 |         return mano_colors
117 | 
118 |     def _get_mano_meshes(self, frame_id):
119 |         meshes = [
120 |             trimesh.Trimesh(
121 |                 vertices=self._mano_verts[frame_id][
122 |                     i * NUM_MANO_VERTS : (i + 1) * NUM_MANO_VERTS
123 |                 ],
124 |                 faces=self._mano_faces[i],
125 |                 vertex_colors=self._mano_colors[i],
126 |             )
127 |             for i in range(len(self._mano_sides))
128 |         ]
129 |         meshes = [pyrender.Mesh.from_trimesh(mesh) for mesh in meshes]
130 |         return meshes
131 | 
132 |     def create_scene(self, frame_id):
133 |         self._scene = pyrender.Scene(
134 |             bg_color=[0.0, 0.0, 0.0], ambient_light=[1.0, 1.0, 1.0]
135 |         )
136 | 
137 |         # Add world node
138 |         world_node = self._scene.add_node(pyrender.Node(name="world"))
139 | 
140 |         # Add realsense camera nodes
141 |         self._camera_nodes = {
142 |             serial: self._scene.add(
143 |                 pyrender.IntrinsicsCamera(
144 |                     fx=cam_K[0, 0],
145 |                     fy=cam_K[1, 1],
146 |                     cx=cam_K[0, 2],
147 |                     cy=cam_K[1, 2],
148 |                     znear=0.01,
149 |                     zfar=10.0,
150 |                 ),
151 |                 parent_node=world_node,
152 |                 name=f"cam_{serial}",
153 |                 pose=cam_RT @ cvcam_in_glcam,
154 |             )
155 |             for serial, cam_K, cam_RT in zip(
156 |                 self._rs_serials, self._rs_intrinsics, self._rs_extrinsics
157 |             )
158 |         }
159 | 
160 |         # Add hololens camera node
161 |         self._camera_nodes[self._hl_serial] = self._scene.add(
162 |             pyrender.IntrinsicsCamera(
163 |                 fx=self._hl_pv_intrinsics[0, 0],
164 |                 fy=self._hl_pv_intrinsics[1, 1],
165 |                 cx=self._hl_pv_intrinsics[0, 2],
166 |                 cy=self._hl_pv_intrinsics[1, 2],
167 |                 znear=0.01,
168 |                 zfar=10.0,
169 |             ),
170 |             parent_node=world_node,
171 |             name=f"cam_{self._hl_serial}",
172 |             pose=self._poses_pv[frame_id] @ cvcam_in_glcam,
173 |         )
174 | 
175 |         # Add object nodes
176 |         self._object_nodes = [
177 |             self._scene.add(
178 |                 obj_mesh,
179 |                 parent_node=world_node,
180 |                 name=f"obj_{self._object_ids[i]}",
181 |                 pose=self._poses_o[i, frame_id],
182 |             )
183 |             for i, obj_mesh in enumerate(self._obj_meshes)
184 |         ]
185 | 
186 |         # Add MANO nodes
187 |         self._mano_nodes = [
188 |             self._scene.add(
189 |                 mano_mesh,
190 |                 parent_node=world_node,
191 |                 name=f"mano_{self._mano_sides[i]}",
192 |                 pose=np.eye(4),
193 |             )
194 |             for i, mano_mesh in enumerate(self._get_mano_meshes(frame_id))
195 |         ]
196 | 
197 |         self._seg_node_map = {}
198 |         for i, obj_node in enumerate(self._object_nodes):
199 |             self._seg_node_map[obj_node] = OBJ_CLASS_COLORS[i + 1].rgb
200 | 
201 |         for i, side in enumerate(self._mano_sides):
202 |             hand_color_idx = 1 if side == "right" else 2
203 |             self._seg_node_map[self._mano_nodes[i]] = HAND_COLORS[hand_color_idx].rgb
204 | 
205 |     def get_rgb_image(self, frame_id, serial):
206 |         return self._loader.get_rgb_image(frame_id, serial)
207 | 
208 |     def get_render_colors(self):
209 |         color_images = {}
210 |         # Render color images for realsense cameras
211 |         r = pyrender.OffscreenRenderer(self._rs_width, self._rs_height)
212 |         for serial in self._rs_serials:
213 |             self._scene.main_camera_node = self._camera_nodes[serial]
214 |             color, _ = r.render(self._scene, flags=self._rgb_flags)
215 |             color_images[serial] = color
216 |         r.delete()
217 |         # Render color image for hololens camera
218 |         r = pyrender.OffscreenRenderer(self._hl_pv_width, self._hl_pv_height)
219 |         self._scene.main_camera_node = self._camera_nodes[self._hl_serial]
220 |         color, _ = r.render(self._scene, flags=self._rgb_flags)
221 |         color_images[self._hl_serial] = color
222 |         r.delete()
223 |         return color_images
224 | 
225 |     def get_render_depths(self):
226 |         depth_images = {}
227 |         # Render depth images for realsense cameras
228 |         r = pyrender.OffscreenRenderer(self._rs_width, self._rs_height)
229 |         for serial in self._rs_serials:
230 |             self._scene.main_camera_node = self._camera_nodes[serial]
231 |             depth = r.render(self._scene, flags=self._depth_flags)
232 |             depth_images[serial] = depth
233 |         r.delete()
234 |         # Render depth image for hololens camera
235 |         r = pyrender.OffscreenRenderer(self._hl_pv_width, self._hl_pv_height)
236 |         self._scene.main_camera_node = self._camera_nodes[self._hl_serial]
237 |         depth = r.render(self._scene, flags=self._depth_flags)
238 |         depth_images[self._hl_serial] = depth
239 |         r.delete()
240 |         return depth_images
241 | 
242 |     def get_render_masks(self):
243 |         mask_images = {}
244 |         # Render mask images for realsense cameras
245 |         r = pyrender.OffscreenRenderer(self._rs_width, self._rs_height)
246 |         for serial in self._rs_serials:
247 |             self._scene.main_camera_node = self._camera_nodes[serial]
248 |             mask, _ = r.render(
249 |                 self._scene, flags=self._mask_flags, seg_node_map=self._seg_node_map
250 |             )
251 |             mask_images[serial] = mask
252 |         r.delete()
253 |         # Render mask image for hololens camera
254 |         r = pyrender.OffscreenRenderer(self._hl_pv_width, self._hl_pv_height)
255 |         self._scene.main_camera_node = self._camera_nodes[self._hl_serial]
256 |         mask, _ = r.render(
257 |             self._scene, flags=self._mask_flags, seg_node_map=self._seg_node_map
258 |         )
259 |         mask_images[self._hl_serial] = mask
260 |         r.delete()
261 |         return mask_images
262 | 
263 |     @property
264 |     def num_frames(self):
265 |         return self._num_frames
266 | 
267 |     @property
268 |     def rs_serials(self):
269 |         return self._rs_serials
270 | 
271 |     @property
272 |     def holo_serial(self):
273 |         return self._hl_serial
274 | 
275 | 
276 | def plot_and_save_images(images):
277 |     """
278 |     Plot the images in the specified layout and save as 1080P PNG.
279 | 
280 |     Parameters:
281 |     images (list of numpy arrays): List of 10 images to be displayed.
282 |     frame_id (int): The frame ID to be used in the filename.
283 |     output_folder (str): The folder where the output images will be saved.
284 |     """
285 |     if len(images) != 10:
286 |         raise ValueError("The function expects exactly 10 images.")
287 | 
288 |     # Create a figure with 1920x1080 resolution
289 |     fig = plt.figure(
290 |         figsize=(19.2, 10.8), dpi=100
291 |     )  # figsize in inches, dpi=100 for 1920x1080 pixels
292 | 
293 |     # Create a GridSpec with 3 rows and 4 columns
294 |     gs = fig.add_gridspec(3, 4, height_ratios=[1, 1, 1.5])
295 | 
296 |     # Plot the first 8 images in a 2x4 grid
297 |     for i in range(8):
298 |         ax = fig.add_subplot(gs[i // 4, i % 4])
299 |         ax.imshow(images[i])
300 |         ax.axis("off")  # Hide the axes
301 | 
302 |     # Plot the 9th image on the bottom left
303 |     ax = fig.add_subplot(gs[2, :2])
304 |     ax.imshow(images[8])
305 |     ax.axis("off")  # Hide the axes
306 | 
307 |     # Plot the 10th image on the bottom right
308 |     ax = fig.add_subplot(gs[2, 2:])
309 |     ax.imshow(images[9])
310 |     ax.axis("off")  # Hide the axes
311 | 
312 |     # Display the plot
313 |     plt.tight_layout()
314 |     plt.show()
315 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .common_imports import *
 2 | from .io import *
 3 | from .cv_utils import *
 4 | from .transforms import *
 5 | from .mano_info import NEW_MANO_FACES, NUM_MANO_VERTS, NUM_MANO_FACES
 6 | 
 7 | 
 8 | def add_path(path):
 9 |     if str(path) not in sys.path:
10 |         sys.path.insert(0, str(path))
11 | 
12 | 
13 | def get_logger(log_name="HOCapToolkit", log_level="INFO", log_file=None):
14 |     """Create and return a logger with console and optional file output."""
15 |     logger = logging.getLogger(log_name)
16 |     logger.setLevel(logging.DEBUG)
17 |     formatter = logging.Formatter(
18 |         "[%(asctime)s] [%(name)s:%(funcName)s] [%(levelname).3s] %(message)s",
19 |         datefmt="%Y%m%d;%H:%M:%S",
20 |     )
21 |     if not logger.hasHandlers():
22 |         if log_file:
23 |             fh = logging.FileHandler(log_file)
24 |             fh.setLevel(logging.DEBUG)
25 |             fh.setFormatter(formatter)
26 |             logger.addHandler(fh)
27 |         # Console handler
28 |         ch = logging.StreamHandler()
29 |         ch.setLevel(getattr(logging, log_level.upper(), logging.INFO))
30 |         ch.setFormatter(formatter)
31 |         logger.addHandler(ch)
32 |     return logger
33 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/color_info.py:
--------------------------------------------------------------------------------
  1 | class RGBA:
  2 |     def __init__(self, red, green, blue, alpha=255):
  3 |         """
  4 |         Initialize an RGBA color.
  5 |         :param red: Red channel (0-255)
  6 |         :param green: Green channel (0-255)
  7 |         :param blue: Blue channel (0-255)
  8 |         :param alpha: Alpha channel (0-255), default is 255 (opaque)
  9 |         """
 10 |         self.red = red
 11 |         self.green = green
 12 |         self.blue = blue
 13 |         self.alpha = alpha
 14 | 
 15 |     def __str__(self):
 16 |         return "({},{},{},{})".format(self.red, self.green, self.blue, self.alpha)
 17 | 
 18 |     @property
 19 |     def hex(self):
 20 |         """Return the hexadecimal representation of the color."""
 21 |         return "#{:02X}{:02X}{:02X}".format(self.red, self.green, self.blue)
 22 | 
 23 |     @property
 24 |     def rgba(self):
 25 |         """Return a tuple of the RGBA values."""
 26 |         return (self.red, self.green, self.blue, self.alpha)
 27 | 
 28 |     @property
 29 |     def rgb(self):
 30 |         """Return a tuple of the RGB values."""
 31 |         return (self.red, self.green, self.blue)
 32 | 
 33 |     @property
 34 |     def bgra(self):
 35 |         """Return a tuple of the BGRA values (Blue, Green, Red, Alpha)."""
 36 |         return (self.blue, self.green, self.red, self.alpha)
 37 | 
 38 |     @property
 39 |     def bgr(self):
 40 |         """Return a tuple of the BGR values (Blue, Green, Red)."""
 41 |         return (self.blue, self.green, self.red)
 42 | 
 43 |     @property
 44 |     def rgba_norm(self):
 45 |         """Return normalized RGBA values (0 to 1)."""
 46 |         return (
 47 |             self.red / 255.0,
 48 |             self.green / 255.0,
 49 |             self.blue / 255.0,
 50 |             self.alpha / 255.0,
 51 |         )
 52 | 
 53 |     @property
 54 |     def rgb_norm(self):
 55 |         """Return normalized RGB values (0 to 1)."""
 56 |         return (self.red / 255.0, self.green / 255.0, self.blue / 255.0)
 57 | 
 58 |     @property
 59 |     def bgra_norm(self):
 60 |         """Return normalized BGRA values (0 to 1)."""
 61 |         return (
 62 |             self.blue / 255.0,
 63 |             self.green / 255.0,
 64 |             self.red / 255.0,
 65 |             self.alpha / 255.0,
 66 |         )
 67 | 
 68 |     @property
 69 |     def bgr_norm(self):
 70 |         """Return normalized BGR values (0 to 1)."""
 71 |         return (self.blue / 255.0, self.green / 255.0, self.red / 255.0)
 72 | 
 73 | 
 74 | COLORS = {
 75 |     "red": RGBA(255, 0, 0),
 76 |     "dark_red": RGBA(139, 0, 0),
 77 |     "green": RGBA(0, 255, 0),
 78 |     "dark_green": RGBA(0, 100, 0),
 79 |     "blue": RGBA(0, 0, 255),
 80 |     "yellow": RGBA(255, 255, 0),
 81 |     "magenta": RGBA(255, 0, 255),
 82 |     "cyan": RGBA(0, 255, 255),
 83 |     "orange": RGBA(255, 165, 0),
 84 |     "purple": RGBA(128, 0, 128),
 85 |     "brown": RGBA(165, 42, 42),
 86 |     "pink": RGBA(255, 192, 203),
 87 |     "lime": RGBA(0, 255, 0),
 88 |     "navy": RGBA(0, 0, 128),
 89 |     "teal": RGBA(0, 128, 128),
 90 |     "olive": RGBA(128, 128, 0),
 91 |     "maroon": RGBA(128, 0, 0),
 92 |     "coral": RGBA(255, 127, 80),
 93 |     "turquoise": RGBA(64, 224, 208),
 94 |     "indigo": RGBA(75, 0, 130),
 95 |     "violet": RGBA(238, 130, 238),
 96 |     "gold": RGBA(255, 215, 0),
 97 |     "skin": RGBA(255, 219, 172),
 98 |     "white": RGBA(255, 255, 255),
 99 |     "black": RGBA(0, 0, 0),
100 |     "gray": RGBA(128, 128, 128),
101 |     "darkgray": RGBA(64, 64, 64),
102 |     "lightgray": RGBA(211, 211, 211),
103 |     "tomato": RGBA(255, 99, 71),
104 |     "deepskyblue": RGBA(0, 128, 255),
105 |     # Tab10 colors
106 |     "tab10_0": RGBA(31, 119, 180),
107 |     "tab10_1": RGBA(255, 127, 14),
108 |     "tab10_2": RGBA(44, 160, 44),
109 |     "tab10_3": RGBA(214, 39, 40),
110 |     "tab10_4": RGBA(148, 103, 189),
111 |     "tab10_5": RGBA(140, 86, 75),
112 |     "tab10_6": RGBA(227, 119, 194),
113 |     "tab10_7": RGBA(127, 127, 127),
114 |     "tab10_8": RGBA(188, 189, 34),
115 |     "tab10_9": RGBA(23, 190, 207),
116 | }
117 | 
118 | # RGB colors for Object classes
119 | OBJ_CLASS_COLORS = [
120 |     COLORS["black"],  # background
121 |     COLORS["tab10_0"],  # object 1
122 |     COLORS["tab10_1"],  # object 2
123 |     COLORS["tab10_2"],  # object 3
124 |     COLORS["tab10_3"],  # object 4
125 | ]
126 | 
127 | # RGB colors for Hands
128 | HAND_COLORS = [
129 |     COLORS["black"],  # background
130 |     COLORS["tab10_5"],  # right hand
131 |     COLORS["tab10_8"],  # left hand
132 | ]
133 | 
134 | # RGB colors for HOCap Dataset Segmentation
135 | HO_CAP_SEG_COLOR = [
136 |     COLORS["black"],  # background
137 |     OBJ_CLASS_COLORS[1],  # object 1
138 |     OBJ_CLASS_COLORS[2],  # object 2
139 |     OBJ_CLASS_COLORS[3],  # object 3
140 |     OBJ_CLASS_COLORS[4],  # object 4
141 |     HAND_COLORS[1],  # right hand
142 |     HAND_COLORS[2],  # left hand
143 | ]
144 | 
145 | # RGB colors for Hand Bones
146 | HAND_BONE_COLORS = [
147 |     # Palm connections
148 |     COLORS["gray"],  # (0, 1)
149 |     COLORS["gray"],  # (0, 5)
150 |     COLORS["gray"],  # (0, 17)
151 |     COLORS["gray"],  # (5, 9)
152 |     COLORS["gray"],  # (9, 13)
153 |     COLORS["gray"],  # (13, 17)
154 |     # Thumb
155 |     COLORS["red"],  # (1, 2)
156 |     COLORS["red"],  # (2, 3)
157 |     COLORS["red"],  # (3, 4)
158 |     # Index
159 |     COLORS["green"],  # (5, 6)
160 |     COLORS["green"],  # (6, 7)
161 |     COLORS["green"],  # (7, 8)
162 |     # Middle
163 |     COLORS["blue"],  # (9, 10)
164 |     COLORS["blue"],  # (10, 11)
165 |     COLORS["blue"],  # (11, 12)
166 |     # Ring
167 |     COLORS["yellow"],  # (13, 14)
168 |     COLORS["yellow"],  # (14, 15)
169 |     COLORS["yellow"],  # (15, 16)
170 |     # Pinky
171 |     COLORS["pink"],  # (17, 18)
172 |     COLORS["pink"],  # (18, 19)
173 |     COLORS["pink"],  # (19, 20)
174 | ]
175 | 
176 | # RGB colors for Hand Joints
177 | HAND_JOINT_COLORS = [
178 |     # Wrist (root)
179 |     COLORS["black"],  # 0
180 |     # Thumb joints
181 |     COLORS["red"],  # 1
182 |     COLORS["red"],  # 2
183 |     COLORS["red"],  # 3
184 |     COLORS["red"],  # 4
185 |     # Index joints
186 |     COLORS["green"],  # 5
187 |     COLORS["green"],  # 6
188 |     COLORS["green"],  # 7
189 |     COLORS["green"],  # 8
190 |     # Middle joints
191 |     COLORS["blue"],  # 9
192 |     COLORS["blue"],  # 10
193 |     COLORS["blue"],  # 11
194 |     COLORS["blue"],  # 12
195 |     # Ring joints
196 |     COLORS["yellow"],  # 13
197 |     COLORS["yellow"],  # 14
198 |     COLORS["yellow"],  # 15
199 |     COLORS["yellow"],  # 16
200 |     # Pinky joints
201 |     COLORS["pink"],  # 17
202 |     COLORS["pink"],  # 18
203 |     COLORS["pink"],  # 19
204 |     COLORS["pink"],  # 20
205 | ]
206 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/common_imports.py:
--------------------------------------------------------------------------------
 1 | # Standard library imports
 2 | import os
 3 | import sys
 4 | import gc
 5 | import time
 6 | import math
 7 | import json
 8 | import shutil
 9 | import pickle as pkl
10 | import argparse
11 | import itertools
12 | import multiprocessing
13 | from pathlib import Path
14 | from typing import List, Tuple, Dict, Any, Union, Optional
15 | import logging
16 | import concurrent.futures
17 | 
18 | # Third-party libraries
19 | from ruamel.yaml import YAML
20 | from tqdm import tqdm
21 | import numpy as np
22 | from scipy.spatial.transform import Rotation as R
23 | from scipy.interpolate import interp1d, CubicSpline
24 | import cv2
25 | import matplotlib.pyplot as plt
26 | from matplotlib.gridspec import GridSpec
27 | from matplotlib.patches import Circle, Rectangle
28 | from matplotlib.lines import Line2D
29 | import open3d as o3d
30 | import open3d.core as o3c
31 | import trimesh
32 | import pyrender
33 | import av
34 | import torch
35 | 
36 | yaml = YAML()
37 | yaml.default_flow_style = False
38 | yaml.indent(mapping=2, sequence=4, offset=2)
39 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/cv_utils.py:
--------------------------------------------------------------------------------
  1 | from .common_imports import *
  2 | from .color_info import (
  3 |     COLORS,
  4 |     OBJ_CLASS_COLORS,
  5 |     HAND_COLORS,
  6 |     HAND_BONE_COLORS,
  7 |     HAND_JOINT_COLORS,
  8 |     HO_CAP_SEG_COLOR,
  9 | )
 10 | from .mano_info import HAND_BONES
 11 | 
 12 | 
 13 | def _apply_morphology(
 14 |     mask: np.ndarray, operation: str, kernel_size: int = 3, iterations: int = 1
 15 | ) -> np.ndarray:
 16 |     """Helper function to apply a morphological operation (erode/dilate) on the mask."""
 17 |     if mask.ndim not in [2, 3]:
 18 |         raise ValueError("Mask must be a 2D or 3D numpy array.")
 19 |     if kernel_size <= 1:
 20 |         raise ValueError("Kernel size must be greater than 1.")
 21 |     kernel = np.ones((kernel_size, kernel_size), np.uint8)
 22 |     mask_dtype = mask.dtype
 23 |     mask = mask.astype(np.uint8)
 24 |     if operation == "erode":
 25 |         result = cv2.erode(mask, kernel, iterations=iterations)
 26 |     elif operation == "dilate":
 27 |         result = cv2.dilate(mask, kernel, iterations=iterations)
 28 |     else:
 29 |         raise ValueError(f"Invalid operation: {operation}. Use 'erode' or 'dilate'.")
 30 |     return result.astype(mask_dtype)
 31 | 
 32 | 
 33 | def _plot_image(ax, image, name, facecolor, titlecolor, fontsize):
 34 |     """Helper function to plot an image in the grid."""
 35 |     if image.ndim == 3 and image.shape[2] == 3:  # RGB image
 36 |         ax.imshow(image)
 37 |     elif image.ndim == 2 and image.dtype == np.uint8:  # Grayscale/mask image
 38 |         unique_values = np.unique(image)
 39 |         cmap = "tab10" if len(unique_values) <= 10 else "gray"
 40 |         ax.imshow(image, cmap=cmap)
 41 |     elif image.ndim == 2 and image.dtype == bool:  # Binary image
 42 |         ax.imshow(image, cmap="gray")
 43 |     else:  # Depth or other image
 44 |         ax.imshow(image, cmap="viridis")
 45 | 
 46 |     if name:
 47 |         ax.text(
 48 |             5,
 49 |             5,
 50 |             name,
 51 |             fontsize=fontsize,
 52 |             color=titlecolor,
 53 |             verticalalignment="top",
 54 |             horizontalalignment="left",
 55 |             bbox=dict(facecolor=facecolor, alpha=0.5, edgecolor="none", pad=3),
 56 |         )
 57 | 
 58 | 
 59 | def erode_mask(
 60 |     mask: np.ndarray, kernel_size: int = 3, iterations: int = 1
 61 | ) -> np.ndarray:
 62 |     """Apply erosion to the mask."""
 63 |     return _apply_morphology(
 64 |         mask, operation="erode", kernel_size=kernel_size, iterations=iterations
 65 |     )
 66 | 
 67 | 
 68 | def dilate_mask(
 69 |     mask: np.ndarray, kernel_size: int = 3, iterations: int = 1
 70 | ) -> np.ndarray:
 71 |     """Apply dilation to the mask."""
 72 |     return _apply_morphology(
 73 |         mask, operation="dilate", kernel_size=kernel_size, iterations=iterations
 74 |     )
 75 | 
 76 | 
 77 | def get_depth_colormap(image: np.ndarray) -> np.ndarray:
 78 |     """Convert a depth image to a colormap representation."""
 79 |     if image.ndim != 2:
 80 |         raise ValueError("Input image must be a 2D array.")
 81 |     d_min, d_max = image.min(), image.max()
 82 |     if d_min == d_max:
 83 |         return np.zeros_like(image, dtype=np.uint8)
 84 |     # Normalize the depth image to range [0, 255]
 85 |     img = (image - d_min) / (d_max - d_min) * 255
 86 |     img = img.astype(np.uint8)
 87 |     img = cv2.applyColorMap(img, cv2.COLORMAP_VIRIDIS)
 88 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 89 |     return img
 90 | 
 91 | 
 92 | def draw_image_overlay(
 93 |     rgb_image: np.ndarray, overlay_image: np.ndarray, alpha: float = 0.5
 94 | ) -> np.ndarray:
 95 |     """Draw an overlay image on top of an RGB image."""
 96 |     return cv2.addWeighted(rgb_image, 1 - alpha, overlay_image, alpha, 0)
 97 | 
 98 | 
 99 | def draw_image_grid(
100 |     images,
101 |     names=None,
102 |     figsize=(1920, 1080),
103 |     max_cols=4,
104 |     facecolor="white",
105 |     titlecolor="black",
106 |     fontsize=12,
107 |     bar_width=0.2,
108 | ):
109 |     """Display a list of images in a grid and draw the title name on each image's top-left corner."""
110 |     num_images = len(images)
111 |     if num_images == 0:
112 |         raise ValueError("No images provided to display.")
113 |     num_cols = min(num_images, max_cols)
114 |     num_rows = (num_images + num_cols - 1) // num_cols
115 |     # Default to no names if not provided
116 |     if names is None or len(names) != num_images:
117 |         names = [None] * num_images
118 |     # Create figure and axis grid
119 |     fig, axs = plt.subplots(
120 |         num_rows,
121 |         num_cols,
122 |         figsize=(figsize[0] / 100.0, figsize[1] / 100.0),
123 |         dpi=100,
124 |         facecolor=facecolor,
125 |     )
126 |     axs = np.atleast_1d(axs).flat  # Ensure axs is always iterable
127 |     # Plot each image
128 |     for i, (image, name) in enumerate(zip(images, names)):
129 |         _plot_image(axs[i], image, name, facecolor, titlecolor, fontsize)
130 |         axs[i].axis("off")
131 |     # Hide unused axes
132 |     for ax in axs[i + 1 :]:
133 |         ax.axis("off")
134 |     # Adjust layout and spacing
135 |     plt.tight_layout(pad=bar_width, h_pad=bar_width, w_pad=bar_width)
136 |     # Convert the figure to an RGB array
137 |     fig.canvas.draw()
138 |     rgb_image = np.array(fig.canvas.buffer_rgba())[:, :, :3]
139 |     # Close the figure
140 |     plt.close(fig)
141 |     return rgb_image
142 | 
143 | 
144 | def draw_hand_landmarks(rgb_image, landmarks, hand_side=None, box=None):
145 |     """Draw hand landmarks on an image."""
146 |     img = rgb_image.copy()
147 |     # draw bones
148 |     for idx, bone in enumerate(HAND_BONES):
149 |         if np.any(landmarks[bone[0]] == -1) or np.any(landmarks[bone[1]] == -1):
150 |             continue
151 |         cv2.line(
152 |             img,
153 |             landmarks[bone[0]],
154 |             landmarks[bone[1]],
155 |             HAND_BONE_COLORS[idx].rgb,
156 |             2,
157 |         )
158 |     # draw joints
159 |     for idx, mark in enumerate(landmarks):
160 |         if np.any(mark == -1):
161 |             continue
162 |         cv2.circle(img, mark, 5, [255, 255, 255], -1)
163 |         cv2.circle(
164 |             img,
165 |             mark,
166 |             3,
167 |             HAND_JOINT_COLORS[idx].rgb,
168 |             -1,
169 |         )
170 | 
171 |     # draw hand box
172 |     if box is not None:
173 |         cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
174 | 
175 |     # draw hand side text
176 |     if hand_side is not None:
177 |         text = hand_side.lower()
178 |         text_x = np.min(landmarks[:, 0])
179 |         text_y = np.min(landmarks[:, 1]) - 5  # add margin to top
180 |         text_color = HAND_COLORS[1] if text == "right" else HAND_COLORS[2]
181 |         cv2.putText(
182 |             img,
183 |             text,
184 |             (text_x, text_y),
185 |             cv2.FONT_HERSHEY_DUPLEX,
186 |             1,
187 |             text_color.rgb,
188 |             1,
189 |             cv2.LINE_AA,
190 |         )
191 |     return img
192 | 
193 | 
194 | def draw_all_camera_images(
195 |     images,
196 |     names=None,
197 |     figsize=(1920, 1080),
198 |     facecolor="white",
199 |     titlecolor="black",
200 |     fontsize=12,
201 |     bar_width=0.2,
202 |     show_only=False,
203 | ):
204 |     """Draw nine images in a grid (8 from RealSense cameras and 1 from HoloLens) in a 3x4 layout.
205 | 
206 |     Args:
207 |         images (list of np.ndarray): List of 9 images to be displayed.
208 |         names (list of str, optional): List of image names to display on top-left. Defaults to None.
209 |         figsize (tuple, optional): Figure size in pixels. Defaults to (1920, 1080).
210 |         facecolor (str, optional): Background color of the figure. Defaults to "white".
211 |         titlecolor (str, optional): Color of the image titles. Defaults to "black".
212 |         fontsize (int, optional): Font size for the image titles. Defaults to 12.
213 |         bar_width (float, optional): Padding between subplots. Defaults to 0.2.
214 | 
215 |     Returns:
216 |         np.ndarray: The final figure rendered as an RGB image.
217 |     """
218 |     num_images = len(images)
219 |     if num_images != 9:
220 |         raise ValueError(f"Expected exactly 9 images, but got {num_images}.")
221 |     if len(names) != num_images:
222 |         raise ValueError(
223 |             f"Number of 'names' must match the number of images. Expected 9, but got {len(names)}."
224 |         )
225 |     if names is None:
226 |         names = [None] * num_images
227 |     fig = plt.figure(
228 |         figsize=(figsize[0] / 100.0, figsize[1] / 100.0), dpi=100, facecolor=facecolor
229 |     )
230 |     gs = GridSpec(3, 4, figure=fig)
231 |     # Plot the first eight images in a 2x4 grid
232 |     for i in range(8):
233 |         row, col = divmod(i, 4)  # Divide by 4 to get row, modulo 4 to get column
234 |         ax = fig.add_subplot(gs[row, col])
235 |         _plot_image(ax, images[i], names[i], facecolor, titlecolor, fontsize)
236 |         ax.axis("off")
237 |     # Plot the ninth image in the third row, spanning columns 1 and 2
238 |     center_ax = fig.add_subplot(gs[2, 1:3])
239 |     _plot_image(center_ax, images[8], names[8], facecolor, titlecolor, fontsize)
240 |     center_ax.axis("off")
241 |     # Adjust layout and spacing
242 |     plt.tight_layout(pad=bar_width, h_pad=bar_width, w_pad=bar_width)
243 |     if show_only:
244 |         plt.show()
245 |         plt.close(fig)
246 |         return
247 | 
248 |     # Convert figure to RGB image
249 |     fig.canvas.draw()
250 |     rgb_image = np.array(fig.canvas.buffer_rgba())[:, :, :3]
251 |     # Close the figure to free memory
252 |     plt.close(fig)
253 |     return rgb_image
254 | 
255 | 
256 | def get_rgb_difference(rgb1, rgb2, scale=255.0):
257 |     """Compute L2 error between RGB 1 and RGB2."""
258 |     # Convert to float32 and normalize
259 |     im1 = rgb1.astype(np.float32) / scale
260 |     im2 = rgb2.astype(np.float32) / scale
261 |     # Compute the normalized L2 error
262 |     diff = np.sqrt(np.mean((im1 - im2) ** 2))
263 |     return diff
264 | 
265 | 
266 | def get_mask_iou(mask1, mask2):
267 |     """Compute Intersection over Union (IoU) between two binary masks."""
268 |     # Convert to boolean masks
269 |     m1 = mask1.astype(bool)
270 |     m2 = mask2.astype(bool)
271 |     # Compute intersection and union
272 |     intersection = np.logical_and(m1, m2).sum()
273 |     union = np.logical_or(m1, m2).sum()
274 |     # Calculate IoU score
275 |     score = intersection / union if union != 0 else 0.0
276 |     return score
277 | 
278 | 
279 | def get_mask_dice_coefficient(mask1, mask2):
280 |     """Compute Dice coefficient between two binary masks."""
281 |     # Convert to boolean masks
282 |     m1 = mask1.astype(bool)
283 |     m2 = mask2.astype(bool)
284 |     # Compute intersection and sum of masks
285 |     intersection = np.logical_and(m1, m2).sum()
286 |     sum_masks = m1.sum() + m2.sum()
287 |     # Calculate Dice coefficient
288 |     score = 2 * intersection / sum_masks if sum_masks != 0 else 0.0
289 |     return score
290 | 
291 | 
292 | def create_video_from_rgb_images(
293 |     file_path: Union[str, Path], rgb_images: List[np.ndarray], fps: int = 30
294 | ) -> None:
295 |     """Create a video from a list of RGB images."""
296 |     if not rgb_images:
297 |         raise ValueError("The list of RGB images is empty.")
298 |     height, width = rgb_images[0].shape[:2]
299 |     container = None
300 |     try:
301 |         container = av.open(str(file_path), mode="w")
302 |         stream = container.add_stream("h264", rate=fps)
303 |         stream.width = width
304 |         stream.height = height
305 |         stream.pix_fmt = "yuv420p"
306 |         stream.thread_type = "FRAME"  # Parallel processing of frames
307 |         stream.thread_count = os.cpu_count()  # Number of threads to use
308 |         for image in rgb_images:
309 |             frame = av.VideoFrame.from_ndarray(image, format="rgb24")
310 |             for packet in stream.encode(frame):
311 |                 container.mux(packet)
312 |         for packet in stream.encode():
313 |             container.mux(packet)
314 |     except Exception as e:
315 |         raise IOError(f"Failed to write video to '{file_path}': {e}")
316 |     finally:
317 |         if container:
318 |             container.close()
319 | 
320 | 
321 | def create_video_from_depth_images(
322 |     file_path: Union[str, Path], depth_images: list[np.ndarray], fps: int = 30
323 | ) -> None:
324 |     """Create a video from a list of depth images."""
325 |     # Validate image dimensions
326 |     height, width = depth_images[0].shape[:2]
327 |     container = None
328 |     try:
329 |         container = av.open(str(file_path), mode="w")
330 |         stream = container.add_stream("h264", rate=fps)
331 |         stream.width = width
332 |         stream.height = height
333 |         stream.pix_fmt = "yuv420p"
334 |         stream.thread_type = "FRAME"  # Parallel processing of frames
335 |         stream.thread_count = os.cpu_count()  # Number of threads to use
336 | 
337 |         for depth_image in depth_images:
338 |             image = get_depth_colormap(depth_image)
339 |             frame = av.VideoFrame.from_ndarray(image, format="rgb24")
340 |             for packet in stream.encode(frame):
341 |                 container.mux(packet)
342 |         for packet in stream.encode():
343 |             container.mux(packet)
344 |     except Exception as e:
345 |         raise IOError(f"Failed to write video to '{file_path}': {e}")
346 |     finally:
347 |         if container:
348 |             container.close()
349 | 
350 | 
351 | def create_video_from_image_files(
352 |     file_path: Union[str, Path],
353 |     image_files: List[Union[str, Path]],
354 |     fps: int = 30,
355 |     preload: bool = False,
356 | ) -> None:
357 |     """Create a video from a list of image files (RGB or Depth images).
358 | 
359 |     Args:
360 |         file_path (str | Path): Path to save the output video.
361 |         image_files (list[str | Path]): List of image file paths.
362 |         fps (int, optional): Frames per second for the video. Defaults to 30.
363 |         preload (bool, optional): Preload all images into memory before creating the video. Defaults to False.
364 |     """
365 | 
366 |     def worker_read_image_file(image_file):
367 |         """Helper to read the image file, handle depth images, and return an RGB image."""
368 |         img = cv2.imread(str(image_file), cv2.IMREAD_UNCHANGED)
369 |         if img is None:
370 |             raise ValueError(f"Failed to read image file: {image_file}")
371 |         # If depth image (2D), apply colormap, otherwise assume it's an RGB image
372 |         if img.ndim == 2:
373 |             img = get_depth_colormap(img)
374 |         return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
375 | 
376 |     if not image_files:
377 |         raise ValueError("The list of image files is empty.")
378 | 
379 |     # Load all images into memory if preload is True
380 |     if preload:
381 |         images = [None] * len(image_files)
382 |         with concurrent.futures.ThreadPoolExecutor() as executor:
383 |             futures = {
384 |                 executor.submit(worker_read_image_file, image_file): i
385 |                 for i, image_file in enumerate(image_files)
386 |             }
387 |             for future in concurrent.futures.as_completed(futures):
388 |                 i = futures[future]
389 |                 try:
390 |                     images[i] = future.result()
391 |                 except Exception as e:
392 |                     raise ValueError(f"Error loading image: {e}")
393 |     else:
394 |         images = None
395 | 
396 |     first_image = worker_read_image_file(image_files[0])
397 |     height, width = first_image.shape[:2]
398 |     container = None
399 |     try:
400 |         container = av.open(str(file_path), mode="w")
401 |         stream = container.add_stream("h264", rate=fps)
402 |         stream.width = width
403 |         stream.height = height
404 |         stream.pix_fmt = "yuv420p"
405 |         stream.thread_type = "FRAME"  # Parallel processing of frames
406 |         stream.thread_count = os.cpu_count()  # Number of threads to use
407 |         for i in range(len(image_files)):
408 |             image = images[i] if preload else worker_read_image_file(image_files[i])
409 |             frame = av.VideoFrame.from_ndarray(image, format="rgb24")
410 |             for packet in stream.encode(frame):
411 |                 container.mux(packet)
412 |         for packet in stream.encode():
413 |             container.mux(packet)
414 |     except Exception as e:
415 |         raise IOError(f"Failed to write video to '{file_path}': {e}")
416 |     finally:
417 |         if container:
418 |             container.close()
419 | 
420 | 
421 | def write_points_to_ply(
422 |     points: np.ndarray, save_path: Union[str, Path], colors: np.ndarray = None
423 | ) -> None:
424 |     """Write a point cloud to a PLY file."""
425 |     if colors is None:  # Default to green color
426 |         colors = np.tile([0, 1, 0], (points.shape[0], 1)).astype(np.float32)
427 |     pcd = o3d.geometry.PointCloud()
428 |     pcd.points = o3d.utility.Vector3dVector(points)
429 |     pcd.colors = o3d.utility.Vector3dVector(colors)
430 |     o3d.io.write_point_cloud(str(save_path), pcd, write_ascii=True)
431 | 
432 | 
433 | def read_points_from_ply(file_path: Union[str, Path]) -> np.ndarray:
434 |     """Read a point cloud from a PLY file."""
435 |     pcd = o3d.io.read_point_cloud(str(file_path))
436 |     points = np.asarray(pcd.points, dtype=np.float32)
437 |     return points
438 | 
439 | 
440 | def get_xyz_from_uvd(u, v, d, fx, fy, cx, cy):
441 |     if d == 0:  # Handle division by zero
442 |         return [0.0, 0.0, 0.0]
443 |     x = (u - cx) * d / fx
444 |     y = (v - cy) * d / fy
445 |     z = d
446 |     return [x, y, z]
447 | 
448 | 
449 | def get_uv_from_xyz(x, y, z, fx, fy, cx, cy):
450 |     if z == 0:  # Prevent division by zero
451 |         return [-1.0, -1.0]
452 |     u = x * fx / z + cx
453 |     v = y * fy / z + cy
454 |     return [u, v]
455 | 
456 | 
457 | def get_bbox_from_landmarks(landmarks, width, height, margin=3):
458 |     """Get the xyxy bounding box from hand landmarks."""
459 |     # Filter landmarks where both x and y are valid (i.e., not -1)
460 |     marks = np.array(landmarks)
461 |     valid_mask = ~np.all(marks == -1, axis=1)
462 |     if valid_mask.sum() == 0:
463 |         # If no valid landmarks, return a full image bounding box
464 |         return [-1, -1, -1, -1]
465 |     # Get the bounding box using cv2.boundingRect
466 |     x, y, w, h = cv2.boundingRect(marks[valid_mask])
467 |     bbox = np.array([x, y, x + w, y + h])
468 |     # Apply margin while ensuring the bounding box stays within image bounds
469 |     bbox[0] = max(0, bbox[0] - margin)
470 |     bbox[1] = max(0, bbox[1] - margin)
471 |     bbox[2] = min(width - 1, bbox[2] + margin)
472 |     bbox[3] = min(height - 1, bbox[3] + margin)
473 |     return bbox.astype(int).tolist()
474 | 
475 | 
476 | def get_bbox_from_mask(mask, margin=3):
477 |     """Get the xyxy bounding box from a binary mask."""
478 |     height, width = mask.shape[:2]
479 |     if not np.any(mask):
480 |         return [-1.0, -1.0, -1.0, -1.0]
481 |     x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
482 |     bbox = np.array([x, y, x + w, y + h])
483 |     bbox[0] = max(0, bbox[0] - margin)
484 |     bbox[1] = max(0, bbox[1] - margin)
485 |     bbox[2] = min(width - 1, bbox[2] + margin)
486 |     bbox[3] = min(height - 1, bbox[3] + margin)
487 |     return bbox.astype(float).tolist()
488 | 
489 | 
490 | def get_mask_from_seg_image(seg_img, color_to_idx_map):
491 |     H, W, _ = seg_img.shape
492 |     flat_seg_img = seg_img.reshape(-1, 3)
493 |     flat_mask_img = np.zeros((H * W), dtype=np.uint8)
494 |     for color, idx in color_to_idx_map.items():
495 |         matching_pixels = np.all(flat_seg_img == color, axis=1)
496 |         flat_mask_img[matching_pixels] = idx
497 |     mask_img = flat_mask_img.reshape(H, W)
498 |     return mask_img
499 | 
500 | 
501 | def draw_debug_image(
502 |     rgb_image,
503 |     hand_mask=None,
504 |     object_mask=None,
505 |     prompt_points=None,
506 |     prompt_labels=None,
507 |     hand_marks=None,
508 |     alpha=0.5,
509 |     draw_boxes=False,
510 |     draw_hand_sides=False,
511 |     reduce_background=False,
512 | ):
513 |     """
514 |     Draws debug information on an RGB image.
515 | 
516 |     Args:
517 |         rgb_image (np.ndarray): The original RGB image.
518 |         hand_mask (np.ndarray, optional): Mask of the hands.
519 |         object_mask (np.ndarray, optional): Mask of the objects.
520 |         prompt_points (list, optional): Points to be drawn on the image.
521 |         prompt_labels (list, optional): Labels for the prompt points.
522 |         hand_marks (list, optional): Hand landmark points.
523 |         alpha (float, optional): Transparency factor for overlay. Defaults to 0.5.
524 |         reduce_background (bool, optional): Whether to reduce the background visibility. Defaults to False.
525 |         draw_boxes (bool, optional): Whether to draw bounding boxes around hands and objects. Defaults to False.
526 |         draw_hand_sides (bool, optional): Whether to draw text indicating left/right hand. Defaults to False.
527 | 
528 |     Returns:
529 |         np.ndarray: The image with debug information drawn on it.
530 |     """
531 |     height, width = rgb_image.shape[:2]
532 |     overlay = np.zeros_like(rgb_image) if reduce_background else rgb_image.copy()
533 | 
534 |     def apply_mask(mask, colors):
535 |         for label in np.unique(mask):
536 |             if label == 0:
537 |                 continue
538 |             overlay[mask == label] = colors[label].rgb
539 | 
540 |     def draw_boxes_from_mask(mask, colors):
541 |         for label in np.unique(mask):
542 |             if label == 0:
543 |                 continue
544 |             box = get_bbox_from_mask(mask == label)
545 |             cv2.rectangle(
546 |                 overlay, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), colors[label].rgb, 2
547 |             )
548 | 
549 |     # Draw hand mask
550 |     if hand_mask is not None:
551 |         apply_mask(hand_mask, HAND_COLORS)
552 | 
553 |     # Draw object mask
554 |     if object_mask is not None:
555 |         apply_mask(object_mask, OBJ_CLASS_COLORS)
556 | 
557 |     # Draw bounding boxes
558 |     if draw_boxes:
559 |         if hand_mask is not None:
560 |             draw_boxes_from_mask(hand_mask, HAND_COLORS)
561 |         if object_mask is not None:
562 |             draw_boxes_from_mask(object_mask, OBJ_CLASS_COLORS)
563 | 
564 |     # Draw prompt points
565 |     if prompt_points is not None and prompt_labels is not None:
566 |         points = np.array(prompt_points, dtype=np.int32).reshape(-1, 2)
567 |         labels = np.array(prompt_labels, dtype=np.int32).reshape(-1)
568 |         for point, label in zip(points, labels):
569 |             color = COLORS["dark_red"] if label == 0 else COLORS["dark_green"]
570 |             cv2.circle(overlay, tuple(point), 3, color.rgb, -1)
571 | 
572 |     overlay = cv2.addWeighted(rgb_image, 1 - alpha, overlay, alpha, 0)
573 | 
574 |     # Draw hand sides
575 |     if draw_hand_sides and hand_mask is not None and hand_marks is None:
576 |         for label in np.unique(hand_mask):
577 |             if label == 0:
578 |                 continue
579 |             mask = hand_mask == label
580 |             color = HAND_COLORS[label]
581 |             text = "right" if label == 1 else "left"
582 |             x, y, _, _ = cv2.boundingRect(mask.astype(np.uint8))
583 |             cv2.putText(
584 |                 overlay,
585 |                 text,
586 |                 (x, y - 5),
587 |                 cv2.FONT_HERSHEY_DUPLEX,
588 |                 1,
589 |                 color.rgb,
590 |                 1,
591 |                 cv2.LINE_AA,
592 |             )
593 | 
594 |     # Draw hand landmarks
595 |     if hand_marks is not None:
596 |         for ind, marks in enumerate(hand_marks):
597 |             if np.all(marks == -1):
598 |                 continue
599 | 
600 |             # Draw bones
601 |             for bone_idx, (start, end) in enumerate(HAND_BONES):
602 |                 if np.any(marks[start] == -1) or np.any(marks[end] == -1):
603 |                     continue
604 |                 color = HAND_BONE_COLORS[bone_idx]
605 |                 cv2.line(overlay, tuple(marks[start]), tuple(marks[end]), color.rgb, 2)
606 | 
607 |             # Draw joints
608 |             for i, mark in enumerate(marks):
609 |                 if np.any(mark == -1):
610 |                     continue
611 |                 color = HAND_JOINT_COLORS[i]
612 |                 cv2.circle(overlay, tuple(mark), 5, (255, 255, 255), -1)
613 |                 cv2.circle(overlay, tuple(mark), 3, color.rgb, -1)
614 | 
615 |             if draw_boxes:
616 |                 box = get_bbox_from_landmarks(marks, width, height, margin=10)
617 |                 color = HAND_COLORS[1] if ind == 0 else HAND_COLORS[2]
618 |                 cv2.rectangle(overlay, (box[0], box[1]), (box[2], box[3]), color.rgb, 2)
619 | 
620 |             if draw_hand_sides:
621 |                 text = "right" if ind == 0 else "left"
622 |                 color = HAND_COLORS[1] if ind == 0 else HAND_COLORS[2]
623 |                 x, y, _, _ = cv2.boundingRect(
624 |                     np.array([m for m in marks if np.all(m != -1)], dtype=np.int64)
625 |                 )
626 |                 cv2.putText(
627 |                     overlay,
628 |                     text,
629 |                     (x, y - 11),
630 |                     cv2.FONT_HERSHEY_DUPLEX,
631 |                     .8,
632 |                     color.rgb,
633 |                     1,
634 |                     cv2.LINE_AA,
635 |                 )
636 | 
637 |     return overlay
638 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/io.py:
--------------------------------------------------------------------------------
  1 | from .common_imports import *
  2 | 
  3 | 
  4 | def make_clean_folder(folder_path: Union[str, Path]) -> None:
  5 |     """Delete the folder if it exists and create a new one."""
  6 |     if Path(folder_path).is_dir():
  7 |         shutil.rmtree(str(folder_path))
  8 |     try:
  9 |         Path(folder_path).mkdir(parents=True, exist_ok=True)
 10 |     except OSError as e:
 11 |         raise OSError(f"Failed to create folder '{folder_path}': {e}")
 12 | 
 13 | 
 14 | def read_data_from_json(file_path: Union[str, Path]) -> Any:
 15 |     """Read data from a JSON file and return it."""
 16 |     if not Path(file_path).is_file():
 17 |         raise FileNotFoundError(f"File not found: {file_path}")
 18 |     try:
 19 |         with open(str(file_path), "r", encoding="utf-8") as f:
 20 |             return json.load(f)
 21 |     except json.JSONDecodeError as e:
 22 |         raise ValueError(f"Error parsing JSON from {file_path}: {e}")
 23 | 
 24 | 
 25 | def write_data_to_json(file_path: Union[str, Path], data: Union[list, Dict]) -> None:
 26 |     """Write data to a JSON file."""
 27 |     try:
 28 |         with open(str(file_path), "w", encoding="utf-8") as f:
 29 |             json.dump(data, f, indent=2, ensure_ascii=False, sort_keys=False)
 30 |     except IOError as e:
 31 |         raise IOError(f"Failed to write JSON data to {file_path}: {e}")
 32 | 
 33 | 
 34 | def read_data_from_yaml(file_path: Union[str, Path]) -> Any:
 35 |     """Read data from a YAML file and return it."""
 36 |     if not Path(file_path).is_file():
 37 |         raise FileNotFoundError(f"File not found: {file_path}")
 38 |     try:
 39 |         with open(str(file_path), "r", encoding="utf-8") as f:
 40 |             return yaml.load(f)
 41 |     except FileNotFoundError:
 42 |         raise FileNotFoundError(f"File not found: {file_path}")
 43 |     except Exception as e:
 44 |         raise ValueError(f"Error reading YAML file from {file_path}: {e}")
 45 | 
 46 | 
 47 | def write_data_to_yaml(file_path: Union[str, Path], data: Any) -> None:
 48 |     """Write data to a YAML file."""
 49 |     try:
 50 |         with open(str(file_path), "w", encoding="utf-8") as f:
 51 |             yaml.dump(data, f)
 52 |     except IOError as e:
 53 |         raise IOError(f"Failed to write YAML data to {file_path}: {e}")
 54 | 
 55 | 
 56 | def read_rgb_image(file_path: Union[str, Path]) -> np.ndarray:
 57 |     """Read an RGB image from the specified file path."""
 58 |     if not Path(file_path).exists():
 59 |         raise FileNotFoundError(f"Image file '{file_path}' does not exist.")
 60 |     image = cv2.imread(str(file_path))
 61 |     if image is None:
 62 |         raise ValueError(f"Failed to load image from '{file_path}'.")
 63 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 64 |     return image
 65 | 
 66 | 
 67 | def write_rgb_image(file_path: Union[str, Path], image: np.ndarray) -> None:
 68 |     """Write an RGB image to the specified file path."""
 69 |     if image.ndim != 3 or image.shape[2] != 3:
 70 |         raise ValueError("Input image must be an RGB image with 3 channels.")
 71 |     success = cv2.imwrite(str(file_path), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
 72 |     if not success:
 73 |         raise ValueError(f"Failed to write RGB image to '{file_path}'.")
 74 | 
 75 | 
 76 | def read_depth_image(file_path: Union[str, Path], scale: float = 1.0) -> np.ndarray:
 77 |     """Read a depth image from the specified file path."""
 78 |     if not Path(file_path).exists():
 79 |         raise FileNotFoundError(f"Depth image file '{file_path}' does not exist.")
 80 |     image = cv2.imread(str(file_path), cv2.IMREAD_ANYDEPTH)
 81 |     if image is None:
 82 |         raise ValueError(f"Failed to load depth image from '{file_path}'.")
 83 |     image = image.astype(np.float32) / scale
 84 |     return image
 85 | 
 86 | 
 87 | def write_depth_image(file_path: Union[str, Path], image: np.ndarray) -> None:
 88 |     """Write a depth image to the specified file path."""
 89 |     if image.dtype not in [np.uint16, np.uint8]:
 90 |         raise ValueError("Depth image must be of type uint16 or uint8.")
 91 |     success = cv2.imwrite(str(file_path), image)
 92 |     if not success:
 93 |         raise ValueError(f"Failed to write depth image to '{file_path}'.")
 94 | 
 95 | 
 96 | def read_mask_image(file_path: Union[str, Path]) -> np.ndarray:
 97 |     """Read a mask image from the specified file path."""
 98 |     if not Path(file_path).exists():
 99 |         raise FileNotFoundError(f"Mask image file '{file_path}' does not exist.")
100 |     image = cv2.imread(str(file_path), cv2.IMREAD_GRAYSCALE)
101 |     if image is None:
102 |         raise ValueError(f"Failed to load mask image from '{file_path}'.")
103 |     return image
104 | 
105 | 
106 | def write_mask_image(file_path: Union[str, Path], image: np.ndarray) -> None:
107 |     """Write a mask image to the specified file path."""
108 |     success = cv2.imwrite(str(file_path), image)
109 |     if not success:
110 |         raise ValueError(f"Failed to write mask image to '{file_path}'.")
111 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/mano_info.py:
--------------------------------------------------------------------------------
  1 | """MediaPipe Hands connections and MANO hand model enhancements."""
  2 | 
  3 | # Connections for the hand palm, thumb, and fingers
  4 | HAND_PALM_CONNECTIONS = ((0, 1), (0, 5), (0, 17), (5, 9), (9, 13), (13, 17))
  5 | HAND_THUMB_CONNECTIONS = ((1, 2), (2, 3), (3, 4))
  6 | HAND_INDEX_FINGER_CONNECTIONS = ((5, 6), (6, 7), (7, 8))
  7 | HAND_MIDDLE_FINGER_CONNECTIONS = ((9, 10), (10, 11), (11, 12))
  8 | HAND_RING_FINGER_CONNECTIONS = ((13, 14), (14, 15), (15, 16))
  9 | HAND_PINKY_FINGER_CONNECTIONS = ((17, 18), (18, 19), (19, 20))
 10 | 
 11 | # All hand bone connections combined
 12 | HAND_BONES = (
 13 |     HAND_PALM_CONNECTIONS
 14 |     + HAND_THUMB_CONNECTIONS
 15 |     + HAND_INDEX_FINGER_CONNECTIONS
 16 |     + HAND_MIDDLE_FINGER_CONNECTIONS
 17 |     + HAND_RING_FINGER_CONNECTIONS
 18 |     + HAND_PINKY_FINGER_CONNECTIONS
 19 | )
 20 | 
 21 | # Hand joint names as per the typical skeleton structure
 22 | HAND_JOINT_NAMES = (
 23 |     "WRIST",
 24 |     "THUMB_CMC",
 25 |     "THUMB_MCP",
 26 |     "THUMB_IP",
 27 |     "THUMB_TIP",
 28 |     "INDEX_MCP",
 29 |     "INDEX_PIP",
 30 |     "INDEX_DIP",
 31 |     "INDEX_TIP",
 32 |     "MIDDLE_MCP",
 33 |     "MIDDLE_PIP",
 34 |     "MIDDLE_DIP",
 35 |     "MIDDLE_TIP",
 36 |     "RING_MCP",
 37 |     "RING_PIP",
 38 |     "RING_DIP",
 39 |     "RING_TIP",
 40 |     "PINKY_MCP",
 41 |     "PINKY_PIP",
 42 |     "PINKY_DIP",
 43 |     "PINKY_TIP",
 44 | )
 45 | 
 46 | # Parent-child relationships of hand joints (index refers to HAND_JOINT_NAMES)
 47 | # -1 indicates no parent (root node)
 48 | HAND_JOINT_PARENTS = [
 49 |     -1,  # WRIST
 50 |     0,  # THUMB_CMC
 51 |     1,  # THUMB_MCP
 52 |     2,  # THUMB_IP
 53 |     3,  # THUMB_TIP
 54 |     0,  # INDEX_MCP
 55 |     5,  # INDEX_PIP
 56 |     6,  # INDEX_DIP
 57 |     7,  # INDEX_TIP
 58 |     0,  # MIDDLE_MCP
 59 |     9,  # MIDDLE_PIP
 60 |     10,  # MIDDLE_DIP
 61 |     11,  # MIDDLE_TIP
 62 |     0,  # RING_MCP
 63 |     13,  # RING_PIP
 64 |     14,  # RING_DIP
 65 |     15,  # RING_TIP
 66 |     0,  # PINKY_MCP
 67 |     17,  # PINKY_PIP
 68 |     18,  # PINKY_DIP
 69 |     19,  # PINKY_TIP
 70 | ]
 71 | 
 72 | # Additional faces added to the MANO hand mesh for watertightness
 73 | NEW_MANO_FACES = {
 74 |     "right": [
 75 |         [92, 38, 234],
 76 |         [234, 38, 239],
 77 |         [38, 122, 239],
 78 |         [239, 122, 279],
 79 |         [122, 118, 279],
 80 |         [279, 118, 215],
 81 |         [118, 117, 215],
 82 |         [215, 117, 214],
 83 |         [117, 119, 214],
 84 |         [214, 119, 121],
 85 |         [119, 120, 121],
 86 |         [121, 120, 78],
 87 |         [120, 108, 78],
 88 |         [78, 108, 79],
 89 |     ],
 90 |     "left": [
 91 |         [234, 38, 92],
 92 |         [239, 38, 234],
 93 |         [239, 122, 38],
 94 |         [279, 122, 239],
 95 |         [279, 118, 122],
 96 |         [215, 118, 279],
 97 |         [215, 117, 118],
 98 |         [214, 117, 215],
 99 |         [214, 119, 117],
100 |         [121, 119, 214],
101 |         [121, 120, 119],
102 |         [78, 120, 121],
103 |         [78, 108, 120],
104 |         [79, 108, 78],
105 |     ],
106 | }
107 | 
108 | # Number of vertices and faces in the MANO model
109 | NUM_MANO_VERTS = 778
110 | NUM_MANO_FACES = 1538
111 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from .common_imports import Union, Path, sys, logging, Optional, json, os
 2 | 
 3 | 
 4 | def add_path(path):
 5 |     if str(path) not in sys.path:
 6 |         sys.path.insert(0, str(path))
 7 | 
 8 | 
 9 | def get_logger(log_name="HOCapToolkit", log_level="INFO", log_file=None):
10 |     """Create and return a logger with console and optional file output."""
11 |     logger = logging.getLogger(log_name)
12 |     logger.setLevel(logging.DEBUG)
13 |     formatter = logging.Formatter(
14 |         "[%(asctime)s] [%(name)s:%(funcName)s] [%(levelname).3s] %(message)s",
15 |         datefmt="%Y%m%d;%H:%M:%S",
16 |     )
17 |     if not logger.hasHandlers():
18 |         if log_file:
19 |             fh = logging.FileHandler(log_file)
20 |             fh.setLevel(logging.DEBUG)
21 |             fh.setFormatter(formatter)
22 |             logger.addHandler(fh)
23 |         # Console handler
24 |         ch = logging.StreamHandler()
25 |         ch.setLevel(getattr(logging, log_level.upper(), logging.INFO))
26 |         ch.setFormatter(formatter)
27 |         logger.addHandler(ch)
28 |     return logger
29 | 


--------------------------------------------------------------------------------
/hocap_toolkit/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | from .common_imports import *
  2 | 
  3 | 
  4 | def average_quats(quats: np.ndarray) -> np.ndarray:
  5 |     """
  6 |     Calculate the average quaternion from a set of quaternions.
  7 | 
  8 |     Args:
  9 |         quats (np.ndarray): An array of quaternions of shape (N, 4), where N is the number of quaternions.
 10 | 
 11 |     Returns:
 12 |         np.ndarray: The averaged quaternion of shape (4,).
 13 |     """
 14 |     if not isinstance(quats, np.ndarray) or quats.shape[-1] != 4:
 15 |         raise ValueError("Input must be a numpy array of shape (N, 4).")
 16 | 
 17 |     rotations = R.from_quat(quats)
 18 |     avg_quat = rotations.mean().as_quat().astype(np.float32)
 19 |     return avg_quat
 20 | 
 21 | 
 22 | def normalize_quats(qs: np.ndarray) -> np.ndarray:
 23 |     """
 24 |     Normalize quaternions to have unit length.
 25 | 
 26 |     Args:
 27 |         qs (np.ndarray): Input quaternion, shape (4,) or (N, 4) where each quaternion is (qx, qy, qz, qw).
 28 | 
 29 |     Returns:
 30 |         np.ndarray: Normalized quaternion(s), same shape as input.
 31 |     """
 32 |     # Compute the norm of the quaternion
 33 |     norms = np.linalg.norm(qs, axis=-1, keepdims=True)
 34 |     if np.any(norms == 0):
 35 |         raise ValueError("Quaternion norms cannot be zero.")
 36 |     return qs / norms
 37 | 
 38 | 
 39 | def rvt_to_quat(rvt: np.ndarray) -> np.ndarray:
 40 |     """
 41 |     Convert rotation vector and translation vector to quaternion and translation vector.
 42 | 
 43 |     Args:
 44 |         rvt (np.ndarray): Rotation vector and translation vector, shape (6,) for single or (N, 6) for batch.
 45 | 
 46 |     Returns:
 47 |         np.ndarray: Quaternion and translation vector, shape (7,) for single or (N, 7) for batch,
 48 |                     in the format [qx, qy, qz, qw, tx, ty, tz].
 49 |     """
 50 |     # Ensure the input has the correct shape
 51 |     if rvt.ndim == 1 and rvt.shape[0] == 6:
 52 |         rv = rvt[:3]
 53 |         t = rvt[3:]
 54 |         q = R.from_rotvec(rv).as_quat()
 55 |         return np.concatenate([q, t], dtype=np.float32)
 56 | 
 57 |     elif rvt.ndim == 2 and rvt.shape[1] == 6:
 58 |         rv = rvt[:, :3]
 59 |         t = rvt[:, 3:]
 60 |         q = R.from_rotvec(rv).as_quat()  # Batch process
 61 |         return np.concatenate([q, t], axis=-1).astype(np.float32)
 62 | 
 63 |     else:
 64 |         raise ValueError("Input must be of shape (6,) or (N, 6).")
 65 | 
 66 | 
 67 | def quat_to_rvt(quat: np.ndarray) -> np.ndarray:
 68 |     """
 69 |     Convert quaternion and translation vector to rotation vector and translation vector.
 70 | 
 71 |     Args:
 72 |         quat (np.ndarray): Quaternion and translation vector. Shape can be (7,) for single input
 73 |                            or (N, 7) for batched input.
 74 | 
 75 |     Returns:
 76 |         np.ndarray: Rotation vector and translation vector. Shape will be (6,) for single input
 77 |                     or (N, 6) for batched input.
 78 | 
 79 |     Raises:
 80 |         ValueError: If the input does not have the expected shape or dimensions.
 81 |     """
 82 |     # Validate input shape
 83 |     if not isinstance(quat, np.ndarray):
 84 |         raise TypeError("Input must be a numpy array.")
 85 | 
 86 |     if quat.ndim == 1 and quat.shape[0] == 7:
 87 |         batch_mode = False
 88 |     elif quat.ndim == 2 and quat.shape[1] == 7:
 89 |         batch_mode = True
 90 |     else:
 91 |         raise ValueError(
 92 |             "Input must have shape (7,) for a single quaternion or (N, 7) for a batch of quaternions."
 93 |         )
 94 | 
 95 |     # Extract quaternion (q) and translation (t)
 96 |     q = quat[..., :4]  # Quaternion (4 elements)
 97 |     t = quat[..., 4:]  # Translation (3 elements)
 98 | 
 99 |     # Convert quaternion to rotation vector
100 |     r = R.from_quat(q)
101 |     rv = r.as_rotvec()  # Convert to rotation vector (3 elements)
102 | 
103 |     # Concatenate rotation vector and translation vector
104 |     return np.concatenate([rv, t], axis=-1).astype(np.float32)
105 | 
106 | 
107 | def rvt_to_mat(rvt: np.ndarray) -> np.ndarray:
108 |     """
109 |     Convert rotation vector and translation vector to pose matrix.
110 | 
111 |     Args:
112 |         rvt (np.ndarray): Rotation vector and translation vector, shape (6,) for single or (N, 6) for batch.
113 | 
114 |     Returns:
115 |         np.ndarray: Pose matrix, shape (4, 4) for single or (N, 4, 4) for batch.
116 |     """
117 |     # Single input case (shape (6,))
118 |     if rvt.ndim == 1 and rvt.shape[0] == 6:
119 |         p = np.eye(4)
120 |         rv = rvt[:3]
121 |         t = rvt[3:]
122 |         r = R.from_rotvec(rv)
123 |         p[:3, :3] = r.as_matrix()
124 |         p[:3, 3] = t
125 |         return p.astype(np.float32)
126 | 
127 |     # Batched input case (shape (N, 6))
128 |     elif rvt.ndim == 2 and rvt.shape[1] == 6:
129 |         N = rvt.shape[0]
130 |         p = np.tile(np.eye(4), (N, 1, 1))  # Create an identity matrix for each batch
131 |         rv = rvt[:, :3]  # Rotation vectors (N, 3)
132 |         t = rvt[:, 3:]  # Translation vectors (N, 3)
133 |         r = R.from_rotvec(rv)
134 |         p[:, :3, :3] = r.as_matrix()  # Set rotation matrices for each batch
135 |         p[:, :3, 3] = t  # Set translation vectors for each batch
136 |         return p.astype(np.float32)
137 | 
138 |     else:
139 |         raise ValueError("Input must be of shape (6,) or (N, 6).")
140 | 
141 | 
142 | def mat_to_rvt(mat_4x4: np.ndarray) -> np.ndarray:
143 |     """
144 |     Convert pose matrix to rotation vector and translation vector.
145 | 
146 |     Args:
147 |         mat_4x4 (np.ndarray): Pose matrix, shape (4, 4) for single input
148 |                               or (N, 4, 4) for batched input.
149 | 
150 |     Returns:
151 |         np.ndarray: Rotation vector and translation vector, shape (6,) for single input
152 |                     or (N, 6) for batched input.
153 |     """
154 |     # Single input case (shape (4, 4))
155 |     if mat_4x4.ndim == 2 and mat_4x4.shape == (4, 4):
156 |         r = R.from_matrix(mat_4x4[:3, :3])
157 |         rv = r.as_rotvec()
158 |         t = mat_4x4[:3, 3]
159 |         return np.concatenate([rv, t], dtype=np.float32)
160 | 
161 |     # Batched input case (shape (N, 4, 4))
162 |     elif mat_4x4.ndim == 3 and mat_4x4.shape[1:] == (4, 4):
163 |         rv = R.from_matrix(mat_4x4[:, :3, :3]).as_rotvec()  # Batch process rotations
164 |         t = mat_4x4[:, :3, 3]  # Batch process translations
165 |         return np.concatenate([rv, t], axis=-1).astype(np.float32)
166 | 
167 |     else:
168 |         raise ValueError("Input must be of shape (4, 4) or (N, 4, 4).")
169 | 
170 | 
171 | def mat_to_quat(mat_4x4: np.ndarray) -> np.ndarray:
172 |     """
173 |     Convert pose matrix to quaternion and translation vector.
174 | 
175 |     Args:
176 |         mat_4x4 (np.ndarray): Pose matrix, shape (4, 4) for single input or (N, 4, 4) for batched input.
177 | 
178 |     Returns:
179 |         np.ndarray: Quaternion and translation vector, shape (7,) for single input or (N, 7) for batched input.
180 | 
181 |     Raises:
182 |         ValueError: If the input does not have the expected shape or dimensions.
183 |     """
184 |     if not isinstance(mat_4x4, np.ndarray) or mat_4x4.shape[-2:] != (4, 4):
185 |         raise ValueError("Input must be a numpy array with shape (4, 4) or (N, 4, 4).")
186 | 
187 |     if mat_4x4.ndim == 2:  # Single matrix (shape (4, 4))
188 |         r = R.from_matrix(mat_4x4[:3, :3])
189 |         q = r.as_quat()  # Quaternion (shape (4,))
190 |         t = mat_4x4[:3, 3]  # Translation (shape (3,))
191 |         return np.concatenate([q, t], dtype=np.float32)
192 | 
193 |     elif mat_4x4.ndim == 3:  # Batch of matrices (shape (N, 4, 4))
194 |         r = R.from_matrix(mat_4x4[:, :3, :3])  # Handle batch of rotation matrices
195 |         q = r.as_quat()  # Quaternions (shape (N, 4))
196 |         t = mat_4x4[:, :3, 3]  # Translations (shape (N, 3))
197 |         return np.concatenate([q, t], axis=-1).astype(np.float32)  # Shape (N, 7)
198 | 
199 |     else:
200 |         raise ValueError("Input dimension is not valid. Must be 2D or 3D.")
201 | 
202 | 
203 | def quat_to_mat(quat: np.ndarray) -> np.ndarray:
204 |     """
205 |     Convert quaternion and translation vector to a pose matrix.
206 | 
207 |     This function supports converting a single quaternion or a batch of quaternions.
208 | 
209 |     Args:
210 |         quat (np.ndarray): Quaternion and translation vector. Shape can be (7,) for a single quaternion
211 |                            or (N, 7) for a batch of quaternions, where N is the batch size.
212 | 
213 |     Returns:
214 |         np.ndarray: Pose matrix. Shape will be (4, 4) for a single quaternion or (N, 4, 4) for a batch of quaternions.
215 | 
216 |     Raises:
217 |         ValueError: If the input does not have the expected shape or dimensions.
218 |     """
219 |     # Validate input shape
220 |     if not isinstance(quat, np.ndarray):
221 |         raise TypeError("Input must be a numpy array.")
222 | 
223 |     if quat.ndim == 1 and quat.shape[0] == 7:
224 |         batch_mode = False
225 |     elif quat.ndim == 2 and quat.shape[1] == 7:
226 |         batch_mode = True
227 |     else:
228 |         raise ValueError(
229 |             "Input must have shape (7,) for a single quaternion or (N, 7) for a batch of quaternions."
230 |         )
231 | 
232 |     # Extract quaternion (q) and translation (t)
233 |     q = quat[..., :4]  # Quaternion (4 elements)
234 |     t = quat[..., 4:]  # Translation (3 elements)
235 | 
236 |     # Prepare the pose matrix
237 |     if batch_mode:
238 |         N = quat.shape[0]
239 |         p = np.tile(np.eye(4), (N, 1, 1))  # Create N identity matrices
240 |     else:
241 |         p = np.eye(4)  # Single identity matrix
242 | 
243 |     # Convert quaternion to rotation matrix and fill in the pose matrix
244 |     r = R.from_quat(q)
245 |     p[..., :3, :3] = r.as_matrix()  # Fill rotation part
246 |     p[..., :3, 3] = t  # Fill translation part
247 | 
248 |     return p.astype(np.float32)
249 | 
250 | 
251 | def quat_distance(
252 |     q1: np.ndarray, q2: np.ndarray, in_degree: bool = False
253 | ) -> Union[float, np.ndarray]:
254 |     """
255 |     Calculate the shortest angular distance in degrees between paired quaternions.
256 | 
257 |     Args:
258 |         q1 (np.ndarray): First quaternion(s), shape (4,) or (N, 4).
259 |         q2 (np.ndarray): Second quaternion(s), shape (4,) or (N, 4).
260 | 
261 |     Returns:
262 |         float or np.ndarray: Angular distance in degrees, scalar if single pair, array if multiple pairs.
263 |     """
264 |     # Validate input shapes
265 |     if q1.ndim not in {1, 2} or q2.ndim not in {1, 2}:
266 |         raise ValueError("q1 and q2 must be 1D or 2D arrays.")
267 |     if q1.shape[-1] != 4 or q2.shape[-1] != 4:
268 |         raise ValueError("Each quaternion must have 4 components (qx, qy, qz, qw).")
269 |     if q1.shape != q2.shape:
270 |         raise ValueError("q1 and q2 must have the same shape.")
271 | 
272 |     # Normalize quaternions to ensure they are unit quaternions
273 |     q1 = q1 / np.linalg.norm(q1, axis=-1, keepdims=True)
274 |     q2 = q2 / np.linalg.norm(q2, axis=-1, keepdims=True)
275 | 
276 |     # Compute the dot product between paired quaternions
277 |     dot_product = np.sum(q1 * q2, axis=-1)
278 | 
279 |     # Clamp the dot product to the range [-1, 1] to handle numerical precision issues
280 |     dot_product = np.clip(dot_product, -1.0, 1.0)
281 | 
282 |     # Calculate the shortest angular distance in radians
283 |     angular_distance = 2 * np.arccos(np.abs(dot_product))
284 | 
285 |     # Convert to degrees if needed
286 |     if in_degree:
287 |         return np.degrees(angular_distance)
288 |     return angular_distance
289 | 
290 | 
291 | def trans_distance(t1, t2):
292 |     """Calculate the Euclidean distance between two translation vectors or arrays of translation vectors.
293 | 
294 |     Args:
295 |         t1 (np.ndarray): First translation vector(s) in shape (3,) or (N, 3), where N is the number of vectors.
296 |         t2 (np.ndarray): Second translation vector(s) in shape (3,) or (N, 3), where N is the number of vectors.
297 | 
298 |     Returns:
299 |         float or np.ndarray: Euclidean distance. Returns a scalar if inputs are 1D vectors, or an array of distances if inputs are 2D arrays.
300 |     Raises:
301 |         ValueError: If the inputs are not valid translation vectors or if their shapes are incompatible.
302 |     """
303 | 
304 |     # Ensure both inputs are NumPy arrays
305 |     t1 = np.asarray(t1, dtype=np.float32)
306 |     t2 = np.asarray(t2, dtype=np.float32)
307 | 
308 |     # Check if the shapes of t1 and t2 are compatible
309 |     if t1.shape != t2.shape:
310 |         raise ValueError(
311 |             f"Shape mismatch: t1.shape {t1.shape} and t2.shape {t2.shape} must be the same."
312 |         )
313 | 
314 |     # Check for valid shapes: (3,) for a single vector or (N, 3) for multiple vectors
315 |     if t1.shape[-1] != 3:
316 |         raise ValueError("Each translation vector must have 3 components (tx, ty, tz).")
317 | 
318 |     # Compute Euclidean distance
319 |     return np.linalg.norm(t1 - t2, axis=-1)
320 | 
321 | 
322 | def angular_difference(q1: np.ndarray, q2: np.ndarray) -> Union[float, np.ndarray]:
323 |     """
324 |     Calculate the angular difference in degrees between two quaternions or arrays of quaternions.
325 | 
326 |     Args:
327 |         q1 (np.ndarray): First quaternion(s) in [qx, qy, qz, qw] or [N, qx, qy, qz, qw] format.
328 |         q2 (np.ndarray): Second quaternion(s) in [qx, qy, qz, qw] or [N, qx, qy, qz, qw] format.
329 | 
330 |     Returns:
331 |         float or np.ndarray: Angular difference in degrees, scalar if single pair or array if multiple pairs.
332 |     """
333 |     dim = q1.ndim
334 |     if dim == 1:
335 |         q1 = q1 / np.linalg.norm(q1)
336 |         q2 = q2 / np.linalg.norm(q2)
337 |     else:
338 |         q1 = q1 / np.linalg.norm(q1, axis=1, keepdims=True)
339 |         q2 = q2 / np.linalg.norm(q2, axis=1, keepdims=True)
340 | 
341 |     q1 = R.from_quat(q1)
342 |     q2 = R.from_quat(q2)
343 |     delta_q = q1.inv() * q2
344 |     delta_q_quat = delta_q.as_quat()
345 | 
346 |     if dim == 1:
347 |         if delta_q_quat[3] < 0:
348 |             delta_q_quat = -delta_q_quat
349 |     else:
350 |         negative_indices = delta_q_quat[:, 3] < 0
351 |         delta_q_quat[negative_indices] = -delta_q_quat[negative_indices]
352 | 
353 |     if dim == 1:
354 |         angular_diff = 2 * np.arccos(np.clip(delta_q_quat[3], -1.0, 1.0))
355 |     else:
356 |         angular_diff = 2 * np.arccos(np.clip(delta_q_quat[:, 3], -1.0, 1.0))
357 | 
358 |     return np.degrees(angular_diff)
359 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=64.0", "torch>=2.3.1"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "hocap-toolkit"
 7 | version = "1.0.0"
 8 | description = "Python package providing evaluation and visualization tools for the HoCap dataset."
 9 | requires-python = ">=3.10"
10 | license = { "text" = "GPL-3.0" }
11 | authors = [
12 |     { name = "Jikai Wang", email = "jikai.wang@utdallas.edu" }
13 | ]
14 | 
15 | dependencies = [
16 |     "numpy>=1.26.4,<2",
17 |     "scipy>=1.13.1",
18 |     "matplotlib>=3.9.1",
19 |     "ruamel.yaml>=0.18.5",
20 |     "tqdm>=4.66.4",
21 |     "ninja>=1.11.1.1",
22 |     "opencv-python>=4.7.0",
23 |     "open3d>=0.18.0",
24 |     "av>=12.2.0",
25 |     "pyglet<2",
26 |     "trimesh==4.4.1",
27 |     "pyrender==0.1.45",
28 |     "pyOpenGL>=3.1.0",
29 |     "pyopengl-accelerate>=3.1.0; sys_platform != 'darwin'",
30 |     "mediapipe==0.10.14",
31 |     "gdown>=5.2.0",
32 |     "pycocotools>=2.0.7",
33 |     "chumpy @ git+https://github.com/gobanana520/chumpy.git",
34 |     "manopth @ git+https://github.com/gobanana520/manopth.git"
35 | ]
36 | 
37 | [tool.setuptools.packages]
38 | find = { include = ["hocap_toolkit"] }
39 | 


--------------------------------------------------------------------------------
/results/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/tools/hocap_dataset_split.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from hocap_toolkit.factory import HOCapFactory
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     # Parse arguments
 7 |     parser = argparse.ArgumentParser(description="Split HOCAP dataset")
 8 |     parser.add_argument(
 9 |         "--task",
10 |         type=str,
11 |         choices=["hpe", "odet", "ope"],
12 |         required=True,
13 |         help="Dataset task (hpe, odet, ope)",
14 |     )
15 |     parser.add_argument(
16 |         "--anno_type",
17 |         type=str,
18 |         default="coco",
19 |         choices=["coco", "yolo"],
20 |         help="Annotation type for odet (coco, yolo)",
21 |     )
22 |     args = parser.parse_args()
23 | 
24 |     factory = HOCapFactory()
25 | 
26 |     if args.task == "hpe":
27 |         factory.create_hpe_dataset()
28 | 
29 |     if args.task == "odet":
30 |         factory.create_odet_dataset(args.anno_type)
31 | 
32 |     if args.task == "ope":
33 |         factory.create_ope_dataset()
34 | 


--------------------------------------------------------------------------------
/tools/hocap_downloader.py:
--------------------------------------------------------------------------------
  1 | import zipfile
  2 | import requests
  3 | from hocap_toolkit.utils import *
  4 | 
  5 | PROJ_ROOT = Path(__file__).parent.parent
  6 | 
  7 | 
  8 | def download_box_file(box_link, save_file_path):
  9 |     output_path = Path(save_file_path)
 10 |     resume_header = {}
 11 |     downloaded_size = 0
 12 | 
 13 |     with requests.get(box_link, headers=resume_header, stream=True) as response:
 14 |         # Check if the request was successful
 15 |         if response.status_code == 200:
 16 |             total_size = int(response.headers.get("content-length", 0))
 17 |         else:
 18 |             print(f"Failed to retrieve file info. Status code: {response.status_code}")
 19 |             return
 20 | 
 21 |     if output_path.exists():
 22 |         downloaded_size = output_path.stat().st_size
 23 |         # Check if there's a partial download and get its size
 24 |         resume_header = {"Range": f"bytes={downloaded_size}-"}
 25 | 
 26 |     # Check if the file is already fully downloaded
 27 |     if downloaded_size == total_size:
 28 |         tqdm.write(f"  ** {output_path.name} is already downloaded.")
 29 |         return
 30 | 
 31 |     # Send a GET request with the range header if needed
 32 |     with requests.get(box_link, headers=resume_header, stream=True) as response:
 33 |         # Check if the request was successful
 34 |         if response.status_code in [200, 206]:
 35 |             # Initialize tqdm progress bar
 36 |             with tqdm(
 37 |                 total=total_size,
 38 |                 initial=downloaded_size,
 39 |                 unit="B",
 40 |                 unit_scale=True,
 41 |                 ncols=80,
 42 |             ) as pbar:
 43 |                 # Download the file in chunks
 44 |                 with output_path.open("ab") as file:
 45 |                     for chunk in response.iter_content(
 46 |                         chunk_size=1024 * 1024
 47 |                     ):  # 1 MB chunks
 48 |                         if chunk:
 49 |                             file.write(chunk)
 50 |                             pbar.update(len(chunk))
 51 |         else:
 52 |             print(f"Failed to download file. Status code: {response.status_code}")
 53 | 
 54 | 
 55 | def unzip_file(zip_file, output_dir):
 56 |     zip_file = Path(zip_file)
 57 |     output_dir = Path(output_dir)
 58 | 
 59 |     if not output_dir.exists():
 60 |         output_dir.mkdir(parents=True)
 61 | 
 62 |     with zipfile.ZipFile(zip_file, "r") as zip_ref:
 63 |         zip_ref.extractall(output_dir)
 64 | 
 65 | 
 66 | def main():
 67 |     dataset_files = read_data_from_yaml(PROJ_ROOT / "config/hocap_recordings.yaml")
 68 | 
 69 |     tqdm.write(f"- Downloading 'calibration.zip'...")
 70 |     download_box_file(
 71 |         dataset_files["calibration"], PROJ_ROOT / "datasets/calibration.zip"
 72 |     )
 73 | 
 74 |     tqdm.write(f"- Downloading 'models.zip'...")
 75 |     download_box_file(dataset_files["models"], PROJ_ROOT / "datasets/models.zip")
 76 | 
 77 |     tqdm.write(f"- Downloading 'poses.zip'...")
 78 |     download_box_file(dataset_files["poses"], PROJ_ROOT / "datasets/poses.zip")
 79 | 
 80 |     tqdm.write(f"- Downloading 'labels.zip'...")
 81 |     download_box_file(dataset_files["labels"], PROJ_ROOT / "datasets/labels.zip")
 82 | 
 83 |     subject_ids = (
 84 |         [f"subject_{i}" for i in range(1, 10)]
 85 |         if args.subject_id == "all"
 86 |         else [args.subject_id]
 87 |     )
 88 | 
 89 |     for subject_id in subject_ids:
 90 |         tqdm.write(f"- Downloading '{subject_id}.zip'...")
 91 |         download_box_file(
 92 |             dataset_files[subject_id], PROJ_ROOT / "datasets" / f"{subject_id}.zip"
 93 |         )
 94 | 
 95 |     # Extract the downloaded zip files
 96 |     zip_files = list(PROJ_ROOT.glob("datasets/*.zip"))
 97 |     tqdm.write(f"- Extracting downloaded zip files...")
 98 |     for zip_file in zip_files:
 99 |         tqdm.write(f"  ** Extracting '{zip_file.name}'...")
100 |         unzip_file(zip_file, zip_file.parent)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     parser = argparse.ArgumentParser(description="Download dataset files")
105 |     parser.add_argument(
106 |         "--subject_id",
107 |         type=str,
108 |         default="all",
109 |         choices=[
110 |             "all",
111 |             "subject_1",
112 |             "subject_2",
113 |             "subject_3",
114 |             "subject_4",
115 |             "subject_5",
116 |             "subject_6",
117 |             "subject_7",
118 |             "subject_8",
119 |             "subject_9",
120 |         ],
121 |         help="The subject id to download",
122 |     )
123 |     args = parser.parse_args()
124 | 
125 |     main()
126 | 


--------------------------------------------------------------------------------