├── asset
    ├── teaser.png
    ├── logo_cvlab.png
    └── example_images
    │   ├── using_pen1.jpg
    │   ├── holding_cup1.jpg
    │   ├── holding_cup2.jpg
    │   ├── holding_hammer1.jpg
    │   ├── squidgame_demo2.png
    │   └── holding_scissors2.jpg
├── scripts
    ├── download_official_decaf.sh
    ├── download_official_ycb_models.sh
    ├── download_official_hoi4d.sh
    ├── download_demo_base_data.sh
    ├── download_train_base_data.sh
    ├── download_demo_example_videos.sh
    ├── download_official_mow.sh
    ├── download_official_prox.sh
    ├── download_official_hi4d.sh
    ├── download_gsdf_obman.sh
    ├── download_train_preprocessed_data.sh
    ├── download_haco_checkpoints.sh
    ├── download_official_hic.sh
    ├── download_initial_checkpoints.sh
    ├── extract_official_hi4d.sh
    ├── download_official_interhand26m.sh
    ├── download_official_dexycb.sh
    └── download_official_rich.sh
├── requirements.txt
├── docs
    ├── data_demo.md
    └── data_eval.md
├── lib
    ├── utils
    │   ├── log_utils.py
    │   ├── smplx
    │   │   ├── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── README.md
    │   │   │   ├── clean_ch.py
    │   │   │   └── merge_smplh_mano.py
    │   │   ├── smplx
    │   │   │   ├── __init__.py
    │   │   │   ├── vertex_ids.py
    │   │   │   ├── vertex_joint_selector.py
    │   │   │   ├── utils.py
    │   │   │   └── joint_names.py
    │   │   ├── setup.py
    │   │   ├── examples
    │   │   │   ├── vis_flame_vertices.py
    │   │   │   └── vis_mano_vertices.py
    │   │   └── LICENSE
    │   ├── eval_utils.py
    │   ├── contact_utils.py
    │   ├── func_utils.py
    │   ├── human_models.py
    │   ├── mano_utils.py
    │   ├── demo_utils.py
    │   └── transforms.py
    ├── models
    │   ├── backbone
    │   │   ├── vit.py
    │   │   └── resnet.py
    │   └── model.py
    └── core
    │   ├── logger.py
    │   ├── base.py
    │   └── config.py
├── data
    ├── DexYCB
    │   └── toolkit
    │   │   ├── factory.py
    │   │   ├── logging.py
    │   │   ├── layers
    │   │       ├── mano_layer.py
    │   │       ├── mano_group_layer.py
    │   │       ├── ycb_group_layer.py
    │   │       └── ycb_layer.py
    │   │   └── obj.py
    ├── dataset.py
    ├── MOW
    │   └── dataset.py
    ├── PROX
    │   └── dataset.py
    ├── HIC
    │   └── dataset.py
    ├── H2O
    │   └── dataset.py
    ├── HOI4D
    │   └── dataset.py
    ├── ARCTIC
    │   └── dataset.py
    ├── HO3D
    │   └── dataset.py
    ├── Decaf
    │   └── dataset.py
    ├── RICH
    │   └── dataset.py
    ├── ObMan
    │   └── dataset.py
    └── Hi4D
    │   └── dataset.py
├── .gitignore
├── test.py
├── demo.py
└── demo_video.py


/asset/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/teaser.png


--------------------------------------------------------------------------------
/asset/logo_cvlab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/logo_cvlab.png


--------------------------------------------------------------------------------
/asset/example_images/using_pen1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/using_pen1.jpg


--------------------------------------------------------------------------------
/asset/example_images/holding_cup1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_cup1.jpg


--------------------------------------------------------------------------------
/asset/example_images/holding_cup2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_cup2.jpg


--------------------------------------------------------------------------------
/asset/example_images/holding_hammer1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_hammer1.jpg


--------------------------------------------------------------------------------
/asset/example_images/squidgame_demo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/squidgame_demo2.png


--------------------------------------------------------------------------------
/asset/example_images/holding_scissors2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_scissors2.jpg


--------------------------------------------------------------------------------
/scripts/download_official_decaf.sh:
--------------------------------------------------------------------------------
1 | mkdir -p data/Decaf
2 | wget -P data/Decaf https://vcai.mpi-inf.mpg.de/projects/Decaf/static/DecafDataset.zip
3 | unzip data/Decaf/DecafDataset.zip -d data/Decaf
4 | mv data/Decaf/DecafDataset data/Decaf/data
5 | rm -f data/Decaf/DecafDataset.zip


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | easydict
 2 | tqdm
 3 | pytz
 4 | opencv-python
 5 | trimesh
 6 | rtree
 7 | point-cloud-utils
 8 | chumpy
 9 | pyyaml
10 | plyfile
11 | einops
12 | timm==1.0.14
13 | smplx
14 | gdown
15 | mediapipe
16 | pyrender==0.1.33
17 | huggingface_hub
18 | pycocotools
19 | ultralytics
20 | rtree


--------------------------------------------------------------------------------
/docs/data_demo.md:
--------------------------------------------------------------------------------
 1 | ## Data
 2 | You need to follow directory structure of the `data` as below.
 3 | ```
 4 | ${ROOT} 
 5 | |-- data  
 6 | |   |-- base_data
 7 | ```
 8 | * Download `base_data` from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data/blob/main/demo/data/base_data.tar.gz) by running:
 9 | ```
10 | bash scripts/download_demo_base_data.sh
11 | ```


--------------------------------------------------------------------------------
/lib/utils/log_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | from datetime import datetime
 5 | from pytz import timezone
 6 | 
 7 | 
 8 | def get_datetime(timezone_name='Asia/Seoul'):
 9 |     datetime_out = datetime.now(timezone(timezone_name)).strftime('%Y-%m-%d_%H:%M:%S')
10 |     return datetime_out
11 | 
12 | 
13 | def init_dirs(dir_list):
14 |     for dir in dir_list:
15 |         if os.path.exists(dir) and os.path.isdir(dir):
16 |             shutil.rmtree(dir)
17 |         os.makedirs(dir)


--------------------------------------------------------------------------------
/scripts/download_official_ycb_models.sh:
--------------------------------------------------------------------------------
 1 | # Create target directory for DexYCB
 2 | mkdir -p data/DexYCB/data
 3 | gdown https://drive.google.com/uc?id=1gmcDD-5bkJfcMKLZb3zGgH_HUFbulQWu -O data/DexYCB/data/YCB_Video_Models.zip
 4 | 
 5 | # Unzip in DexYCB
 6 | unzip data/DexYCB/data/YCB_Video_Models.zip -d data/DexYCB/data
 7 | rm data/DexYCB/data/YCB_Video_Models.zip
 8 | 
 9 | # Copy to H2O3D
10 | mkdir -p data/H2O3D/YCB_object_models
11 | cp -r data/DexYCB/data/models data/H2O3D/YCB_object_models/models


--------------------------------------------------------------------------------
/scripts/download_official_hoi4d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Target directory
 4 | target_dir="data/HOI4D/data/datalist"
 5 | mkdir -p "$target_dir"
 6 | 
 7 | # Download files
 8 | wget https://raw.githubusercontent.com/leolyliu/HOI4D-Instructions/main/prepare_4Dseg/datalists/train_all.txt -O "$target_dir/train_all.txt"
 9 | wget https://raw.githubusercontent.com/leolyliu/HOI4D-Instructions/main/prepare_4Dseg/datalists/test_all.txt -O "$target_dir/test_all.txt"
10 | 
11 | echo "Download complete: Files saved to $target_dir"


--------------------------------------------------------------------------------
/scripts/download_demo_base_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TARGET_DIR="data"
 4 | FILE_URL="https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/demo/data/base_data.tar.gz"
 5 | ARCHIVE_NAME="$TARGET_DIR/base_data.tar.gz"
 6 | 
 7 | mkdir -p "$TARGET_DIR"
 8 | 
 9 | echo "Downloading base_data.tar.gz..."
10 | wget -c "$FILE_URL" -O "$ARCHIVE_NAME"
11 | 
12 | echo "Decompressing into $TARGET_DIR..."
13 | tar -xvzf "$ARCHIVE_NAME" -C "$TARGET_DIR"
14 | 
15 | echo "Removing archive..."
16 | rm "$ARCHIVE_NAME"
17 | 
18 | echo "Done. Extracted to $TARGET_DIR"


--------------------------------------------------------------------------------
/scripts/download_train_base_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TARGET_DIR="data"
 4 | FILE_URL="https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/train/data/base_data.tar.gz"
 5 | ARCHIVE_NAME="$TARGET_DIR/base_data.tar.gz"
 6 | 
 7 | mkdir -p "$TARGET_DIR"
 8 | 
 9 | echo "Downloading base_data.tar.gz..."
10 | wget -c "$FILE_URL" -O "$ARCHIVE_NAME"
11 | 
12 | echo "Decompressing into $TARGET_DIR..."
13 | tar -xvzf "$ARCHIVE_NAME" -C "$TARGET_DIR"
14 | 
15 | echo "Removing archive..."
16 | rm "$ARCHIVE_NAME"
17 | 
18 | echo "Done. Extracted to $TARGET_DIR"


--------------------------------------------------------------------------------
/scripts/download_demo_example_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TARGET_DIR="asset"
 4 | FILE_URL="https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/demo/asset/example_videos.zip"
 5 | ARCHIVE_NAME="$TARGET_DIR/example_videos.zip"
 6 | 
 7 | mkdir -p "$TARGET_DIR"
 8 | 
 9 | echo "Downloading example_videos.zip..."
10 | wget -c "$FILE_URL" -O "$ARCHIVE_NAME"
11 | 
12 | echo "Unzipping into $TARGET_DIR..."
13 | unzip -o "$ARCHIVE_NAME" -d "$TARGET_DIR"
14 | 
15 | # Remove zip after extraction
16 | rm "$ARCHIVE_NAME"
17 | 
18 | echo "Done. Extracted to $TARGET_DIR"


--------------------------------------------------------------------------------
/scripts/download_official_mow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Download and extract MOW dataset
 5 | mkdir -p data/MOW
 6 | wget --show-progress -P data/MOW https://zhec.github.io/rhoi/mow.zip
 7 | unzip -q data/MOW/mow.zip -d data/MOW
 8 | mkdir -p data/MOW/data
 9 | mv data/MOW/mow/images data/MOW/data/
10 | mv data/MOW/mow/models data/MOW/data/
11 | rm -rf data/MOW/__MACOSX data/MOW/mow data/MOW/mow.zip
12 | 
13 | # Download poses.json
14 | wget --show-progress -O data/MOW/data/poses.json https://raw.githubusercontent.com/ZheC/MOW/b2acbb4fac40acc4c286833da895fc9f23e58bb6/poses.json


--------------------------------------------------------------------------------
/lib/utils/smplx/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems and the Max Planck Institute for Biological
14 | # Cybernetics. All rights reserved.
15 | #
16 | # Contact: ps-license@tuebingen.mpg.de
17 | 
18 | import clean_ch
19 | import merge_smplh_mano
20 | 


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/factory.py:
--------------------------------------------------------------------------------
 1 | # DexYCB Toolkit
 2 | # Copyright (C) 2021 NVIDIA Corporation
 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
 4 | 
 5 | """Factory method for easily getting datasets by name."""
 6 | 
 7 | from .dex_ycb import DexYCBDataset
 8 | 
 9 | _sets = {}
10 | 
11 | for setup in ('s0', 's1', 's2', 's3'):
12 |   for split in ('train', 'val', 'test'):
13 |     name = '{}_{}'.format(setup, split)
14 |     _sets[name] = (lambda setup=setup, split=split: DexYCBDataset(setup, split))
15 | 
16 | 
17 | def get_dataset(name):
18 |   """Gets a dataset by name.
19 | 
20 |   Args:
21 |     name: Dataset name. E.g., 's0_test'.
22 | 
23 |   Returns:
24 |     A dataset.
25 | 
26 |   Raises:
27 |     KeyError: If name is not supported.
28 |   """
29 |   if name not in _sets:
30 |     raise KeyError('Unknown dataset name: {}'.format(name))
31 |   return _sets[name]()
32 | 


--------------------------------------------------------------------------------
/scripts/download_official_prox.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | urle () { [[ "${1}" ]] || return 1; local LANG=C i x; for (( i = 0; i < ${#1}; i++ )); do x="${1:i:1}"; [[ "${x}" == [a-zA-Z0-9.~-] ]] && echo -n "${x}" || printf '%%%02X' "'${x}"; done; echo; }
 4 | 
 5 | # username and password input
 6 | echo -e "\nYou need to register at https://prox.is.tue.mpg.de/"
 7 | read -p "Username: " username
 8 | read -p "Password: " password
 9 | 
10 | # Set save directory (hardcoded)
11 | save_dir="data/PROX/data"
12 | 
13 | username=$(urle $username)
14 | password=$(urle $password)
15 | 
16 | mkdir -p "$save_dir"
17 | 
18 | # Download
19 | wget --post-data "username=$username&password=$password" \
20 |     'https://download.is.tue.mpg.de/download.php?domain=prox&resume=1&sfile=quantitative.zip' \
21 |     -O "$save_dir/quantitative.zip" \
22 |     --no-check-certificate --continue
23 | 
24 | unzip data/PROX/data/quantitative.zip -d data/PROX/data
25 | rm -f data/PROX/data/quantitative.zip


--------------------------------------------------------------------------------
/lib/utils/smplx/tools/README.md:
--------------------------------------------------------------------------------
 1 | ## Removing Chumpy objects
 2 | 
 3 | In a Python 2 virtual environment with [Chumpy](https://github.com/mattloper/chumpy) installed run the following to remove any Chumpy objects from the model data:
 4 | 
 5 | ```bash
 6 | python tools/clean_ch.py --input-models path-to-models/*.pkl --output-folder output-folder
 7 | ```
 8 | 
 9 | ## Merging SMPL-H and MANO parameters
10 | 
11 | In order to use the given PyTorch SMPL-H module we first need to merge the SMPL-H and MANO parameters in a single file. After agreeing to the license and downloading the models, run the following command:
12 | 
13 | ```bash
14 | python tools/merge_smplh_mano.py --smplh-fn SMPLH_FOLDER/SMPLH_GENDER.pkl \
15 |  --mano-left-fn MANO_FOLDER/MANO_LEFT.pkl \
16 |  --mano-right-fn MANO_FOLDER/MANO_RIGHT.pkl \
17 |  --output-folder OUTPUT_FOLDER
18 | ```
19 | 
20 | where SMPLH_FOLDER is the folder with the SMPL-H files and MANO_FOLDER the one for the MANO files.
21 | 


--------------------------------------------------------------------------------
/scripts/download_official_hi4d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Ask user for the personalized Hi4D URL (usually https://hi4d.ait.ethz.ch/download.php?dt=...)
 4 | read -p "Enter your personal Hi4D download URL: " URL
 5 | 
 6 | # Set target directory
 7 | TARGET_DIR="data/Hi4D/data"
 8 | mkdir -p "$TARGET_DIR"
 9 | cd "$TARGET_DIR" || exit 1
10 | 
11 | BASE_URL="https://hi4d.ait.ethz.ch"
12 | 
13 | echo "Fetching page content from $URL..."
14 | html=$(curl -s "$URL")
15 | 
16 | echo "Extracting .tar.gz links..."
17 | echo "$html" | grep -oP 'href="\K[^"]+\.tar\.gz' > hrefs.txt
18 | 
19 | if [ ! -s hrefs.txt ]; then
20 |   echo "No .tar.gz links found. Please check your URL or access permissions."
21 |   exit 1
22 | fi
23 | 
24 | echo "Downloading files into $TARGET_DIR..."
25 | while read -r href; do
26 |   filename=$(basename "$href")
27 |   full_url="$BASE_URL/$href"
28 |   echo "Downloading $filename..."
29 |   wget -c "$full_url" -O "$filename"
30 | done < hrefs.txt
31 | 
32 | echo "Done. Files downloaded to $TARGET_DIR"


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/logging.py:
--------------------------------------------------------------------------------
 1 | # DexYCB Toolkit
 2 | # Copyright (C) 2021 NVIDIA Corporation
 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
 4 | 
 5 | """Functions for logging."""
 6 | 
 7 | import logging
 8 | import sys
 9 | 
10 | 
11 | def get_logger(log_file):
12 |   """Gets a logger given the path to the log file.
13 | 
14 |   Args:
15 |     name: Path to the log file.
16 | 
17 |   Returns:
18 |     A logger.
19 |   """
20 |   logger = logging.getLogger()
21 |   logger.setLevel(logging.INFO)
22 | 
23 |   formatter = logging.Formatter('%(asctime)s: %(message)s', '%Y-%m-%d %H:%M:%S')
24 | 
25 |   stdout_handler = logging.StreamHandler(sys.stdout)
26 |   stdout_handler.setLevel(logging.INFO)
27 |   stdout_handler.setFormatter(formatter)
28 |   logger.addHandler(stdout_handler)
29 | 
30 |   file_handler = logging.FileHandler(log_file, mode='w')
31 |   file_handler.setLevel(logging.INFO)
32 |   file_handler.setFormatter(formatter)
33 |   logger.addHandler(file_handler)
34 | 
35 |   return logger
36 | 


--------------------------------------------------------------------------------
/lib/utils/smplx/smplx/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from .body_models import (
18 |     create,
19 |     SMPL,
20 |     SMPLH,
21 |     SMPLX,
22 |     MANO,
23 |     FLAME,
24 |     build_layer,
25 |     SMPLLayer,
26 |     SMPLHLayer,
27 |     SMPLXLayer,
28 |     MANOLayer,
29 |     FLAMELayer,
30 | )
31 | 


--------------------------------------------------------------------------------
/scripts/download_gsdf_obman.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ############ Download ObMan Annotations and Splits ############
 4 | # Set target directories
 5 | ann_dir="data/ObMan/annotations"
 6 | splits_dir="data/ObMan/splits"
 7 | 
 8 | # Create directories
 9 | mkdir -p "$ann_dir"
10 | mkdir -p "$splits_dir"
11 | 
12 | # Download annotations folder from Google Drive
13 | echo "Downloading annotations to $ann_dir ..."
14 | gdown --folder https://drive.google.com/drive/folders/1DBzG9J0uLzCy4A6W6Uq6Aq4JNAHiiNJQ -O "$ann_dir"
15 | 
16 | # Download split JSON files from GitHub
17 | echo "Downloading train/test split files to $splits_dir ..."
18 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/obman/splits/train_87k.json -O "$splits_dir/train_87k.json"
19 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/obman/splits/test_6k.json -O "$splits_dir/test_6k.json"
20 | 
21 | echo "ObMan annotations and splits successfully downloaded."
22 | ############ End of ObMan setup ############


--------------------------------------------------------------------------------
/scripts/download_train_preprocessed_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set directories
 4 | TEMP_DIR="temp_haco_train_data"
 5 | TARGET_DIR="data"
 6 | 
 7 | # Initialize git-lfs and clone dataset repo
 8 | git lfs install
 9 | git clone https://huggingface.co/datasets/dqj5182/haco-data "$TEMP_DIR"
10 | 
11 | # --------- Extract all .tar.gz in TEMP_DIR BEFORE moving ---------
12 | echo "Extracting .tar.gz files inside $TEMP_DIR..."
13 | 
14 | find "$TEMP_DIR" -type f -name "*.tar.gz" | while read -r file; do
15 |     echo "Extracting: $file"
16 |     dir=$(dirname "$file")
17 |     tar -xzf "$file" -C "$dir"
18 |     if [ $? -eq 0 ]; then
19 |         echo "Successfully extracted: $file"
20 |         rm "$file"
21 |     else
22 |         echo "Failed to extract: $file"
23 |     fi
24 | done
25 | 
26 | # Create target directory if needed
27 | mkdir -p "$TARGET_DIR"
28 | 
29 | # Now sync only the extracted contents (excluding .tar.gz)
30 | rsync -av --exclude='*.tar.gz' "$TEMP_DIR/train/data/" "$TARGET_DIR/"
31 | 
32 | # Clean up temporary cloned repo
33 | rm -rf "$TEMP_DIR"
34 | 
35 | echo "All extracted data moved to $TARGET_DIR"


--------------------------------------------------------------------------------
/scripts/download_haco_checkpoints.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Target directory
 4 | TARGET_DIR="release_checkpoint"
 5 | mkdir -p "$TARGET_DIR"
 6 | 
 7 | # Base URL of the Hugging Face dataset repo (using 'resolve/main')
 8 | BASE_URL="https://huggingface.co/datasets/dqj5182/haco-checkpoints/resolve/main/NeurIPS2025"
 9 | 
10 | # List of files to download (add more as needed)
11 | FILES=(
12 |   "haco_neurips_hamer_checkpoint.ckpt"
13 |   "haco_neurips_handoccnet_checkpoint.ckpt"
14 |   "haco_neurips_vit_l_checkpoint.ckpt"
15 |   "haco_neurips_vit_b_checkpoint.ckpt"
16 |   "haco_neurips_vit_s_checkpoint.ckpt"
17 |   "haco_neurips_hrnet_w48_checkpoint.ckpt"
18 |   "haco_neurips_hrnet_w32_checkpoint.ckpt"
19 |   "haco_neurips_resnet_152_checkpoint.ckpt"
20 |   "haco_neurips_resnet_50_checkpoint.ckpt"
21 |   "haco_neurips_resnet_101_checkpoint.ckpt"
22 |   "haco_neurips_resnet_34_checkpoint.ckpt"
23 |   "haco_neurips_resnet_18_checkpoint.ckpt"
24 | )
25 | 
26 | # Download each file directly to the target directory
27 | for file in "${FILES[@]}"; do
28 |   echo "Downloading $file to $TARGET_DIR..."
29 |   wget -c "$BASE_URL/$file" -O "$TARGET_DIR/$file"
30 | done
31 | 
32 | echo "All files downloaded to $TARGET_DIR"


--------------------------------------------------------------------------------
/lib/models/backbone/vit.py:
--------------------------------------------------------------------------------
 1 | import timm
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ViTBackbone(nn.Module):
 6 |     def __init__(self, model_name='vit_base_patch16_224', pretrained=True, return_cls=False):
 7 |         """
 8 |         Args:
 9 |             model_name (str): 'vit_base_patch16_224' or 'vit_large_patch16_224'
10 |             pretrained (bool): load pretrained weights from timm
11 |             return_cls (bool): if True, return CLS token instead of patch tokens
12 |         """
13 |         super().__init__()
14 |         self.return_cls = return_cls
15 | 
16 |         # Load model with no classification head
17 |         self.vit = timm.create_model(model_name, pretrained=pretrained, num_classes=0)
18 | 
19 |         # Get dimensions
20 |         self.embed_dim = self.vit.embed_dim  # 768 for B/16, 1024 for L/16
21 |         self.patch_size = self.vit.patch_embed.patch_size
22 | 
23 |     def forward(self, x):
24 |         # Features includes CLS + patch tokens: [B, 1 + N, D]
25 |         x = self.vit.forward_features(x)
26 | 
27 |         if self.return_cls:
28 |             return x[:, 0]  # [B, D] – CLS token
29 |         else:
30 |             patch_tokens = x[:, 1:]  # [B, N, D]
31 |             B, N, D = patch_tokens.shape
32 |             H = W = int(N ** 0.5)
33 |             return patch_tokens.view(B, D, H, W)  # [B, H, W, D]


--------------------------------------------------------------------------------
/scripts/download_official_hic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set target directory
 4 | save_dir="data/HIC/data"
 5 | mkdir -p "$save_dir"
 6 | cd "$save_dir" || exit
 7 | 
 8 | # Download and unzip Hand_Hand sequences
 9 | for seq_idx in 01 02 03 04 05 06 07 08 09 10 11; do
10 |     echo "Downloading Hand_Hand sequence $seq_idx..."
11 |     wget http://files.is.tue.mpg.de/dtzionas/Hand-Object-Capture/Dataset/Hand_Hand___All_Files/"$seq_idx".zip
12 |     unzip "$seq_idx".zip
13 |     rm "$seq_idx".zip
14 | done
15 | 
16 | # Download and unzip Hand_Object sequences
17 | for seq_idx in 15 16 17 18 19 20 21; do
18 |     echo "Downloading Hand_Object sequence $seq_idx..."
19 |     wget http://files.is.tue.mpg.de/dtzionas/Hand-Object-Capture/Dataset/Hand_Object___All_Files/"$seq_idx".zip
20 |     unzip "$seq_idx".zip
21 |     rm "$seq_idx".zip
22 | done
23 | 
24 | # Download the MANO-compatible parameter file
25 | echo "Downloading MANO-compatible parameter file..."
26 | wget http://files.is.tue.mpg.de/dtzionas/Hand-Object-Capture/Dataset/MANO_compatible/IJCV16___Results_MANO___parms_for___joints21.zip
27 | unzip IJCV16___Results_MANO___parms_for___joints21.zip
28 | rm IJCV16___Results_MANO___parms_for___joints21.zip
29 | 
30 | echo "All files downloaded, unzipped, and cleaned up in $save_dir."
31 | 
32 | # Download HIC.json
33 | gdown https://drive.google.com/uc?id=1oqquzJ7DY728M8zQoCYvvuZEBh8L8zkQ -O data/HIC/data/HIC.json


--------------------------------------------------------------------------------
/scripts/download_initial_checkpoints.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # HaMeR
 5 | mkdir -p data/base_data/pretrained_models/hamer
 6 | wget -P data/base_data/pretrained_models/hamer https://www.cs.utexas.edu/~pavlakos/hamer/data/hamer_demo_data.tar.gz
 7 | tar -xzf data/base_data/pretrained_models/hamer/hamer_demo_data.tar.gz -C data/base_data/pretrained_models/hamer
 8 | mv data/base_data/pretrained_models/hamer/_DATA/hamer_ckpts/checkpoints/hamer.ckpt data/base_data/pretrained_models/hamer/hamer.ckpt
 9 | rm -rf data/base_data/pretrained_models/hamer/hamer_demo_data.tar.gz data/base_data/pretrained_models/hamer/_DATA
10 | 
11 | # HandOccNet
12 | mkdir -p data/base_data/pretrained_models/handoccnet
13 | gdown https://drive.google.com/uc?id=1JXOcWgn6Bx173BhDH99EH6sZ7oOW05Hh -O data/base_data/pretrained_models/handoccnet/snapshot_demo.pth.tar
14 | 
15 | # HRNet
16 | mkdir -p data/base_data/pretrained_models/hrnet
17 | gdown https://drive.google.com/uc?id=1aTXmxKAJVLsXbvM-TmQ0ZjJxP868G73q -O data/base_data/pretrained_models/hrnet/hrnet_w32-36af842e.pth
18 | gdown https://drive.google.com/uc?id=1qm5-QfHTz5Ia71ByZ1Haq5zJpyEbZRoc -O data/base_data/pretrained_models/hrnet/hrnet_w48-8ef0771d.pth
19 | 
20 | # Pose2Pose
21 | mkdir -p data/base_data/pretrained_models/pose2pose/hand
22 | gdown https://drive.google.com/uc?id=15wYR8psO2U3ZhFYQEH1-DWc81XkWvK2Y -O data/base_data/pretrained_models/pose2pose/hand/snapshot_12.pth.tar
23 | 


--------------------------------------------------------------------------------
/data/dataset.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from torch.utils.data.dataset import Dataset
 4 | 
 5 | 
 6 | class MultipleDatasets(Dataset):
 7 |     def __init__(self, dbs, make_same_len=True):
 8 |         self.dbs = dbs
 9 |         self.db_num = len(self.dbs)
10 |         self.max_db_data_num = max([len(db) for db in dbs])
11 |         self.db_len_cumsum = np.cumsum([len(db) for db in dbs])
12 |         self.make_same_len = make_same_len
13 | 
14 |     def __len__(self):
15 |         # all dbs have the same length
16 |         if self.make_same_len:
17 |             return self.max_db_data_num * self.db_num
18 |         # each db has different length
19 |         else:
20 |             return sum([len(db) for db in self.dbs])
21 | 
22 |     def __getitem__(self, index):
23 |         if self.make_same_len:
24 |             db_idx = index // self.max_db_data_num
25 |             data_idx = index % self.max_db_data_num 
26 |             if data_idx >= len(self.dbs[db_idx]) * (self.max_db_data_num // len(self.dbs[db_idx])): # last batch: random sampling
27 |                 data_idx = random.randint(0,len(self.dbs[db_idx])-1)
28 |             else: # before last batch: use modular
29 |                 data_idx = data_idx % len(self.dbs[db_idx])
30 |         else:
31 |             for i in range(self.db_num):
32 |                 if index < self.db_len_cumsum[i]:
33 |                     db_idx = i
34 |                     break
35 |             if db_idx == 0:
36 |                 data_idx = index
37 |             else:
38 |                 data_idx = index - self.db_len_cumsum[db_idx-1]
39 | 
40 |         return self.dbs[db_idx][data_idx]


--------------------------------------------------------------------------------
/scripts/extract_official_hi4d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Usage:
 4 | # ./extract_tar_gz.sh /path/to/input_dir /path/to/destination_dir
 5 | # If destination_dir is omitted, files are extracted to the input_dir
 6 | 
 7 | # List of tar.gz files that do NOT have a root directory
 8 | no_root_path_pair_list=("pair00_1" "pair19_1" "pair19_2" "pair32_1" "pair32_2")
 9 | 
10 | input_dir="data/Hi4D/data"
11 | destination_dir="data/Hi4D/data"
12 | 
13 | # If destination is not specified, use input directory
14 | if [ -z "$destination_dir" ]; then
15 |   destination_dir="$input_dir"
16 | fi
17 | 
18 | # Check input directory exists
19 | if [ ! -d "$input_dir" ]; then
20 |   echo "Error: Input directory '$input_dir' does not exist."
21 |   exit 1
22 | fi
23 | 
24 | # Create destination directory if it doesn't exist
25 | mkdir -p "$destination_dir"
26 | 
27 | # Loop through all .tar.gz files in the input directory
28 | for file_path in "$input_dir"/*.tar.gz; do
29 |   [ -e "$file_path" ] || continue  # Skip if no files found
30 | 
31 |   file_name=$(basename "$file_path")
32 |   pair_name="${file_name%.tar.gz}"
33 | 
34 |   echo "Processing $file_name..."
35 | 
36 |   # Check if this file is in the no_root_path list
37 |   if [[ " ${no_root_path_pair_list[@]} " =~ " $pair_name " ]]; then
38 |     extract_path="$destination_dir/$pair_name"
39 |     mkdir -p "$extract_path"
40 |     echo "  Extracting to $extract_path..."
41 |     tar -xzf "$file_path" -C "$extract_path"
42 |   else
43 |     echo "  Extracting to $destination_dir..."
44 |     tar -xzf "$file_path" -C "$destination_dir"
45 |   fi
46 | 
47 |   if [ $? -eq 0 ]; then
48 |     echo "  Successfully extracted $file_name"
49 |   else
50 |     echo "  Failed to extract $file_name"
51 |   fi
52 | done


--------------------------------------------------------------------------------
/lib/core/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os.path as osp
 3 | import warnings
 4 | 
 5 | 
 6 | warnings.filterwarnings("ignore")
 7 | 
 8 | OK = '\033[92m'
 9 | WARNING = '\033[93m'
10 | FAIL = '\033[91m'
11 | END = '\033[0m'
12 | 
13 | PINK = '\033[95m'
14 | BLUE = '\033[94m'
15 | GREEN = OK
16 | RED = FAIL
17 | WHITE = END
18 | YELLOW = WARNING
19 | 
20 | 
21 | class ColorLogger():
22 |     def __init__(self, log_dir, log_name='log.txt'):
23 |         # set log
24 |         self._logger = logging.getLogger(log_name)
25 |         self._logger.setLevel(logging.INFO)
26 |         log_file = osp.join(log_dir, log_name)
27 |         file_log = logging.FileHandler(log_file, mode='a')
28 |         file_log.setLevel(logging.INFO)
29 |         console_log = logging.StreamHandler()
30 |         console_log.setLevel(logging.INFO)
31 |         file_formatter = logging.Formatter(
32 |             "%(asctime)s %(message)s",
33 |             "%m-%d %H:%M:%S")
34 |         console_formatter = logging.Formatter(
35 |             "{}%(asctime)s{} %(message)s".format(GREEN, END),
36 |             "%m-%d %H:%M:%S")
37 |         file_log.setFormatter(file_formatter)
38 |         console_log.setFormatter(console_formatter)
39 |         self._logger.addHandler(file_log)
40 |         self._logger.addHandler(console_log)
41 | 
42 |     def debug(self, msg):
43 |         self._logger.debug(str(msg))
44 | 
45 |     def info(self, msg):
46 |         self._logger.info(str(msg))
47 | 
48 |     def warning(self, msg):
49 |         self._logger.warning(WARNING + 'WRN: ' + str(msg) + END)
50 | 
51 |     def critical(self, msg):
52 |         self._logger.critical(RED + 'CRI: ' + str(msg) + END)
53 | 
54 |     def error(self, msg):
55 |         self._logger.error(RED + 'ERR: ' + str(msg) + END)


--------------------------------------------------------------------------------
/lib/utils/eval_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def evaluation(outputs, targets_data, meta_info, mode='val', thres=0.5):
 6 |     eval_out = {}
 7 | 
 8 |     # GT
 9 |     mesh_valid = meta_info['mano_valid'] is not None
10 | 
11 |     # Pred
12 |     contact_pred = outputs['contact_out'].sigmoid()[0].detach().cpu().numpy()
13 | 
14 |     # Error Calculate
15 |     if mesh_valid:
16 |         # Contact Metrics
17 |         cont_pre, cont_rec, cont_f1 = compute_contact_metrics(targets_data['contact_data']['contact_h'][0].detach().cpu().numpy(), contact_pred, mesh_valid, thres=thres)
18 |         eval_out['cont_pre'] = cont_pre
19 |         eval_out['cont_rec'] = cont_rec
20 |         eval_out['cont_f1'] = cont_f1
21 | 
22 |     return eval_out
23 | 
24 | 
25 | def compute_contact_metrics(gt, pred, valid, thres=0.5):
26 |     """
27 |     Compute precision, recall, and f1 using NumPy
28 |     """
29 |     if valid:
30 |         # True Positives
31 |         tp_num = np.sum(gt[pred >= thres])
32 | 
33 |         # Denominators for precision and recall
34 |         precision_denominator = np.sum(pred >= thres)
35 |         recall_denominator = np.sum(gt)
36 | 
37 |         # Compute precision, recall, and F1 score
38 |         precision_ = tp_num / precision_denominator if precision_denominator > 0 else None
39 |         recall_ = tp_num / recall_denominator if recall_denominator > 0 else None
40 |         if precision_ is not None and recall_ is not None and (precision_ + recall_) > 0:
41 |             f1_ = 2 * precision_ * recall_ / (precision_ + recall_)
42 |         else:
43 |             f1_ = None
44 |     else:
45 |         # If not valid, return None for metrics
46 |         precision_ = None
47 |         recall_ = None
48 |         f1_ = None
49 | 
50 |     return precision_, recall_, f1_


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/layers/mano_layer.py:
--------------------------------------------------------------------------------
 1 | # DexYCB Toolkit
 2 | # Copyright (C) 2021 NVIDIA Corporation
 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
 4 | 
 5 | """Wrapper layer for manopth ManoLayer."""
 6 | 
 7 | import torch
 8 | 
 9 | from torch.nn import Module
10 | from manopth.manolayer import ManoLayer
11 | 
12 | 
13 | class MANOLayer(Module):
14 |   """Wrapper layer for manopth ManoLayer."""
15 | 
16 |   def __init__(self, side, betas):
17 |     """Constructor.
18 | 
19 |     Args:
20 |       side: MANO hand type. 'right' or 'left'.
21 |       betas: A numpy array of shape [10] containing the betas.
22 |     """
23 |     super(MANOLayer, self).__init__()
24 | 
25 |     self._side = side
26 |     self._betas = betas
27 |     self._mano_layer = ManoLayer(flat_hand_mean=False,
28 |                                  ncomps=45,
29 |                                  side=self._side,
30 |                                  mano_root='manopth/mano/models',
31 |                                  use_pca=True)
32 | 
33 |     b = torch.from_numpy(self._betas).unsqueeze(0)
34 |     f = self._mano_layer.th_faces
35 |     self.register_buffer('b', b)
36 |     self.register_buffer('f', f)
37 | 
38 |     v = torch.matmul(self._mano_layer.th_shapedirs, self.b.transpose(
39 |         0, 1)).permute(2, 0, 1) + self._mano_layer.th_v_template
40 |     r = torch.matmul(self._mano_layer.th_J_regressor[0], v)
41 |     self.register_buffer('root_trans', r)
42 | 
43 |   def forward(self, p, t):
44 |     """Forward function.
45 | 
46 |     Args:
47 |       p: A tensor of shape [B, 48] containing the pose.
48 |       t: A tensor of shape [B, 3] containing the trans.
49 | 
50 |     Returns:
51 |       v: A tensor of shape [B, 778, 3] containing the vertices.
52 |       j: A tensor of shape [B, 21, 3] containing the joints.
53 |     """
54 |     v, j = self._mano_layer(p, self.b.expand(p.size(0), -1), t)
55 |     v /= 1000
56 |     j /= 1000
57 |     return v, j
58 | 


--------------------------------------------------------------------------------
/lib/utils/contact_utils.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | import torch
 3 | import numpy as np
 4 | from trimesh.proximity import ProximityQuery
 5 | 
 6 | from lib.utils.human_models import mano
 7 | 
 8 | 
 9 | def get_ho_contact_and_offset(mesh_hand, mesh_obj, c_thres):
10 |     # Make sure that meshes are watertight and do not comntain inverted faces
11 |     # Typically canonical space meshes are more stable
12 | 
13 |     pq = ProximityQuery(mesh_obj)
14 |     obj_coord_c, dist, obj_coord_c_idx = pq.on_surface(mesh_hand.vertices.astype(np.float32))
15 | 
16 |     is_contact_h = (dist < c_thres)
17 |     contact_h = (1. * is_contact_h).astype(np.float32)
18 | 
19 |     contact_valid = np.ones((mano.vertex_num, 1))
20 |     inter_coord_valid = np.ones((mano.vertex_num))
21 | 
22 |     # Explicit cleanup
23 |     del pq
24 |     gc.collect()
25 | 
26 |     return np.array(contact_h), np.array(obj_coord_c), contact_valid, inter_coord_valid
27 | 
28 | 
29 | def get_contact_thres(backbone_type='hamer'):
30 |     # We select contact threshold that has best balance between precision and recall.
31 |     if backbone_type == 'hamer': # seed 314
32 |         return 0.52
33 |     elif backbone_type == 'vit-l-16': # seed 327
34 |         return 0.52
35 |     elif backbone_type == 'vit-b-16': # seed 327
36 |         return 0.5
37 |     elif backbone_type == 'vit-s-16': # seed 314
38 |         return 0.6
39 |     elif backbone_type == 'handoccnet': # seed 314
40 |         return 0.6
41 |     elif backbone_type == 'hrnet-w48': # seed 314
42 |         return 0.58
43 |     elif backbone_type == 'hrnet-w32': # seed 314
44 |         return 0.57
45 |     elif backbone_type == 'resnet-152': # seed 314
46 |         return 0.54
47 |     elif backbone_type == 'resnet-101': # seed 314
48 |         return 0.52
49 |     elif backbone_type == 'resnet-50': # seed 314
50 |         return 0.55
51 |     elif backbone_type == 'resnet-34': # seed 314
52 |         return 0.55
53 |     elif backbone_type == 'resnet-18': # seed 314
54 |         return 0.62
55 |     else:
56 |         raise NotImplementedError


--------------------------------------------------------------------------------
/scripts/download_official_interhand26m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ########### Download InterHand2.6M images ############
 4 | # Set target directory for images
 5 | save_images_dir="data/InterHand26M/images"
 6 | mkdir -p "$save_images_dir"
 7 | cd "$save_images_dir" || exit 1
 8 | 
 9 | base_url="https://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15.s3.amazonaws.com/InterHand2.6M/InterHand2.6M.images.5.fps.v1.0/"
10 | 
11 | # Download all part files
12 | for part1 in a b; do
13 |   for part2 in {a..z}; do
14 |     if [[ "$part1" == "b" && "$part2" == "s" ]]; then
15 |       break
16 |     fi
17 |     filename="InterHand2.6M.images.5.fps.v1.0.tar.part${part1}${part2}"
18 |     echo "Downloading $filename ..."
19 |     wget -c "${base_url}${filename}"
20 |   done
21 | done
22 | 
23 | # Download CHECKSUM and helper scripts
24 | wget -c "${base_url}InterHand2.6M.images.5.fps.v1.0.tar.CHECKSUM"
25 | wget -c "${base_url}unzip.sh"
26 | wget -c "${base_url}verify_download.py"
27 | 
28 | # Run verification
29 | echo "Running verify_download.py..."
30 | python3 verify_download.py || { echo "Checksum verification failed"; exit 1; }
31 | 
32 | # Run unzip
33 | echo "Running unzip.sh..."
34 | bash unzip.sh || { echo "Unzip failed"; exit 1; }
35 | 
36 | cd "../../.." || exit 1
37 | 
38 | # Move extracted images into the target directory root
39 | extracted_subdir="$save_images_dir/InterHand2.6M_5fps_batch1/images"
40 | if [ -d "$extracted_subdir" ]; then
41 |   echo "Moving images to $save_images_dir ..."
42 |   mv "$extracted_subdir"/* "$save_images_dir"
43 |   rm -r "$save_images_dir/InterHand2.6M_5fps_batch1"
44 | else
45 |   echo "Expected directory $extracted_subdir not found."
46 |   exit 1
47 | fi
48 | 
49 | echo "InterHand2.6M image data downloaded and extracted to $save_images_dir"
50 | ########### End of image download ############
51 | 
52 | 
53 | 
54 | ############ Download InterHand2.6M annotations ############
55 | save_ann_dir="data/InterHand26M/annotations"
56 | mkdir -p "$save_ann_dir"
57 | 
58 | echo "Downloading annotations to $save_ann_dir ..."
59 | gdown --folder https://drive.google.com/drive/folders/12RNG9slv9i_TsXSoZ6pQAq-Fa98eGLoy -O "$save_ann_dir"
60 | 
61 | # Move contents up if nested under 'annotations'
62 | if [ -d "$save_ann_dir/annotations" ]; then
63 |   mv "$save_ann_dir/annotations/"* "$save_ann_dir"
64 |   rmdir "$save_ann_dir/annotations"
65 | fi
66 | 
67 | echo "InterHand2.6M annotations downloaded to $save_ann_dir"
68 | ############ End of annotations download ############


--------------------------------------------------------------------------------
/lib/utils/smplx/tools/clean_ch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems and the Max Planck Institute for Biological
14 | # Cybernetics. All rights reserved.
15 | #
16 | # Contact: ps-license@tuebingen.mpg.de
17 | 
18 | from __future__ import print_function
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | 
22 | import argparse
23 | import os
24 | import os.path as osp
25 | 
26 | import pickle
27 | 
28 | from tqdm import tqdm
29 | import numpy as np
30 | 
31 | 
32 | def clean_fn(fn, output_folder='output'):
33 |     with open(fn, 'rb') as body_file:
34 |         body_data = pickle.load(body_file)
35 | 
36 |     output_dict = {}
37 |     for key, data in body_data.iteritems():
38 |         if 'chumpy' in str(type(data)):
39 |             output_dict[key] = np.array(data)
40 |         else:
41 |             output_dict[key] = data
42 | 
43 |     out_fn = osp.split(fn)[1]
44 | 
45 |     out_path = osp.join(output_folder, out_fn)
46 |     with open(out_path, 'wb') as out_file:
47 |         pickle.dump(output_dict, out_file)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     parser = argparse.ArgumentParser()
52 |     parser.add_argument('--input-models', dest='input_models', nargs='+',
53 |                         required=True, type=str,
54 |                         help='The path to the model that will be processed')
55 |     parser.add_argument('--output-folder', dest='output_folder',
56 |                         required=True, type=str,
57 |                         help='The path to the output folder')
58 | 
59 |     args = parser.parse_args()
60 | 
61 |     input_models = args.input_models
62 |     output_folder = args.output_folder
63 |     if not osp.exists(output_folder):
64 |         print('Creating directory: {}'.format(output_folder))
65 |         os.makedirs(output_folder)
66 | 
67 |     for input_model in input_models:
68 |         clean_fn(input_model, output_folder=output_folder)
69 | 


--------------------------------------------------------------------------------
/lib/utils/smplx/smplx/vertex_ids.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from __future__ import print_function
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | 
21 | # Joint name to vertex mapping. SMPL/SMPL-H/SMPL-X vertices that correspond to
22 | # MSCOCO and OpenPose joints
23 | vertex_ids = {
24 |     'smplh': {
25 |         'nose':		    332,
26 |         'reye':		    6260,
27 |         'leye':		    2800,
28 |         'rear':		    4071,
29 |         'lear':		    583,
30 |         'rthumb':		6191,
31 |         'rindex':		5782,
32 |         'rmiddle':		5905,
33 |         'rring':		6016,
34 |         'rpinky':		6133,
35 |         'lthumb':		2746,
36 |         'lindex':		2319,
37 |         'lmiddle':		2445,
38 |         'lring':		2556,
39 |         'lpinky':		2673,
40 |         'LBigToe':		3216,
41 |         'LSmallToe':	3226,
42 |         'LHeel':		3387,
43 |         'RBigToe':		6617,
44 |         'RSmallToe':    6624,
45 |         'RHeel':		6787
46 |     },
47 |     'smplx': {
48 |         'nose':		    9120,
49 |         'reye':		    9929,
50 |         'leye':		    9448,
51 |         'rear':		    616,
52 |         'lear':		    6,
53 |         'rthumb':		8079,
54 |         'rindex':		7669,
55 |         'rmiddle':		7794,
56 |         'rring':		7905,
57 |         'rpinky':		8022,
58 |         'lthumb':		5361,
59 |         'lindex':		4933,
60 |         'lmiddle':		5058,
61 |         'lring':		5169,
62 |         'lpinky':		5286,
63 |         'LBigToe':		5770,
64 |         'LSmallToe':    5780,
65 |         'LHeel':		8846,
66 |         'RBigToe':		8463,
67 |         'RSmallToe': 	8474,
68 |         'RHeel':  		8635
69 |     },
70 |     'mano': {
71 |             'thumb':		744,
72 |             'index':		320,
73 |             'middle':		443,
74 |             'ring':		    554,
75 |             'pinky':		671,
76 |         }
77 | }
78 | 


--------------------------------------------------------------------------------
/lib/core/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | 
 5 | from lib.core.config import cfg
 6 | from lib.utils.human_models import mano
 7 | from lib.core.loss import VCBLoss, RegLoss, SmoothRegLoss
 8 | 
 9 | V_regressor_336 = torch.tensor(np.load(cfg.MODEL.V_regressor_336_path), dtype=torch.float32)
10 | V_regressor_84 = torch.tensor(np.load(cfg.MODEL.V_regressor_84_path), dtype=torch.float32)
11 | J_regressor = torch.tensor(mano.joint_regressor, dtype=torch.float32)
12 | 
13 | # Loss function
14 | vcb_mesh_loss = VCBLoss(v_type='mesh')
15 | vcb_mesh_336_loss = VCBLoss(v_type='mesh_336')
16 | vcb_mesh_84_loss = VCBLoss(v_type='mesh_84')
17 | vcb_joint_loss = VCBLoss(v_type='joint')
18 | reg_loss = RegLoss()
19 | smooth_reg_loss = SmoothRegLoss()
20 | 
21 | 
22 | def compute_loss(preds, targets, epoch):
23 |     total_loss = 0
24 | 
25 |     batch_size = len(preds['contact_out'])
26 |     contact_means = np.load(cfg.MODEL.contact_means_path)
27 |     contact_means = torch.tensor(contact_means)[None].repeat(batch_size, 1)
28 |     regularization_loss = reg_loss(preds['contact_out'], contact_means)
29 |     smooth_regularization_loss = smooth_reg_loss(preds['contact_out'], torch.tensor(mano.layer['right'].faces.astype(np.int32)))
30 | 
31 |     # Calculate loss
32 |     contact_h_mesh = targets['contact_data']['contact_h']
33 |     contact_h_336 = 1 * (torch.mm(contact_h_mesh, V_regressor_336.T) > 0)
34 |     contact_h_84 = 1 * (torch.mm(contact_h_mesh, V_regressor_84.T) > 0)
35 |     contact_h_joint = 1 * (torch.mm(contact_h_mesh, J_regressor.T) > 0)
36 | 
37 |     contact_mesh_loss = vcb_mesh_loss(preds['contact_out'], contact_h_mesh, epoch)
38 |     contact_336_loss = vcb_mesh_336_loss(preds['contact_336_out'], contact_h_336, epoch)
39 |     contact_84_loss = vcb_mesh_84_loss(preds['contact_84_out'], contact_h_84, epoch)
40 |     contact_joint_loss = vcb_joint_loss(preds['contact_joint_out'], contact_h_joint, epoch)
41 |     contact_loss = contact_mesh_loss + contact_336_loss + contact_84_loss + contact_joint_loss + 0.1 * regularization_loss + smooth_regularization_loss
42 | 
43 |     total_loss = contact_loss
44 | 
45 |     loss_dict = dict(total_loss=total_loss, 
46 |                     contact_mesh_loss=contact_mesh_loss, 
47 |                     contact_336_loss=contact_336_loss, 
48 |                     contact_84_loss=contact_84_loss, 
49 |                     contact_joint_loss=contact_joint_loss, 
50 |                     regularization_loss=regularization_loss, 
51 |                     smooth_regularization_loss=smooth_regularization_loss
52 |                     )
53 |     return total_loss, loss_dict


--------------------------------------------------------------------------------
/lib/utils/smplx/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems and the Max Planck Institute for Biological
14 | # Cybernetics. All rights reserved.
15 | #
16 | # Contact: ps-license@tuebingen.mpg.de
17 | 
18 | import io
19 | import os
20 | 
21 | from setuptools import setup
22 | 
23 | # Package meta-data.
24 | NAME = 'smplx'
25 | DESCRIPTION = 'PyTorch module for loading the SMPLX body model'
26 | URL = 'http://smpl-x.is.tuebingen.mpg.de'
27 | EMAIL = 'vassilis.choutas@tuebingen.mpg.de'
28 | AUTHOR = 'Vassilis Choutas'
29 | REQUIRES_PYTHON = '>=3.6.0'
30 | VERSION = '0.1.21'
31 | 
32 | here = os.path.abspath(os.path.dirname(__file__))
33 | 
34 | try:
35 |     FileNotFoundError
36 | except NameError:
37 |     FileNotFoundError = IOError
38 | 
39 | # Import the README and use it as the long-description.
40 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
41 | try:
42 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
43 |         long_description = '\n' + f.read()
44 | except FileNotFoundError:
45 |     long_description = DESCRIPTION
46 | 
47 | # Load the package's __version__.py module as a dictionary.
48 | about = {}
49 | if not VERSION:
50 |     with open(os.path.join(here, NAME, '__version__.py')) as f:
51 |         exec(f.read(), about)
52 | else:
53 |     about['__version__'] = VERSION
54 | 
55 | pyrender_reqs = ['pyrender>=0.1.23', 'trimesh>=2.37.6', 'shapely']
56 | matplotlib_reqs = ['matplotlib']
57 | open3d_reqs = ['open3d-python']
58 | 
59 | setup(name=NAME,
60 |       version=about['__version__'],
61 |       description=DESCRIPTION,
62 |       long_description=long_description,
63 |       long_description_content_type='text/markdown',
64 |       author=AUTHOR,
65 |       author_email=EMAIL,
66 |       python_requires=REQUIRES_PYTHON,
67 |       url=URL,
68 |       install_requires=[
69 |           'numpy>=1.16.2',
70 |           'torch>=1.0.1.post2',
71 |           'torchgeometry>=0.1.2'
72 |       ],
73 |       extras_require={
74 |           'pyrender': pyrender_reqs,
75 |           'open3d': open3d_reqs,
76 |           'matplotlib': matplotlib_reqs,
77 |           'all': pyrender_reqs + matplotlib_reqs + open3d_reqs
78 |       },
79 |       packages=['smplx', 'tools'])
80 | 


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/layers/mano_group_layer.py:
--------------------------------------------------------------------------------
 1 | # DexYCB Toolkit
 2 | # Copyright (C) 2021 NVIDIA Corporation
 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
 4 | 
 5 | """Wrapper layer to hold a group of MANOLayers."""
 6 | 
 7 | import torch
 8 | 
 9 | from torch.nn import Module, ModuleList
10 | 
11 | from .mano_layer import MANOLayer
12 | 
13 | 
14 | class MANOGroupLayer(Module):
15 |   """Wrapper layer to hold a group of MANOLayers."""
16 | 
17 |   def __init__(self, sides, betas):
18 |     """Constructor.
19 | 
20 |     Args:
21 |       sides: A list of MANO sides. 'right' or 'left'.
22 |       betas: A list of numpy arrays of shape [10] containing the betas.
23 |     """
24 |     super(MANOGroupLayer, self).__init__()
25 | 
26 |     self._sides = sides
27 |     self._betas = betas
28 |     self._layers = ModuleList(
29 |         [MANOLayer(s, b) for s, b in zip(self._sides, self._betas)])
30 |     self._num_obj = len(self._sides)
31 | 
32 |     f = []
33 |     for i in range(self._num_obj):
34 |       f.append(self._layers[i].f + 778 * i)
35 |     f = torch.cat(f)
36 |     self.register_buffer('f', f)
37 | 
38 |     r = torch.cat([l.root_trans for l in self._layers])
39 |     self.register_buffer('root_trans', r)
40 | 
41 |   @property
42 |   def num_obj(self):
43 |     return self._num_obj
44 | 
45 |   def forward(self, p, inds=None):
46 |     """Forward function.
47 | 
48 |     Args:
49 |       p: A tensor of shape [B, D] containing the pose vectors.
50 |       inds: A list of sub-layer indices.
51 | 
52 |     Returns:
53 |       v: A tensor of shape [B, N, 3] containing the vertices.
54 |       j: A tensor of shape [B, J, 3] containing the joints.
55 |     """
56 |     if inds is None:
57 |       inds = range(self._num_obj)
58 |     v = [
59 |         torch.zeros((p.size(0), 0, 3),
60 |                     dtype=torch.float32,
61 |                     device=self.f.device)
62 |     ]
63 |     j = [
64 |         torch.zeros((p.size(0), 0, 3),
65 |                     dtype=torch.float32,
66 |                     device=self.f.device)
67 |     ]
68 |     p, t = self._pose2pt(p)
69 |     for i in inds:
70 |       y = self._layers[i](p[:, i], t[:, i])
71 |       v.append(y[0])
72 |       j.append(y[1])
73 |     v = torch.cat(v, dim=1)
74 |     j = torch.cat(j, dim=1)
75 |     return v, j
76 | 
77 |   def _pose2pt(self, pose):
78 |     """Extracts pose and trans from pose vectors.
79 | 
80 |     Args:
81 |       pose: A tensor of shape [B, D] containing the pose vectors.
82 | 
83 |     Returns:
84 |       p: A tensor of shape [B, O, 48] containing the pose.
85 |       t: A tensor of shape [B, O, 3] containing the trans.
86 |     """
87 |     p = torch.stack(
88 |         [pose[:, 51 * i + 0:51 * i + 48] for i in range(self._num_obj)], dim=1)
89 |     t = torch.stack(
90 |         [pose[:, 51 * i + 48:51 * i + 51] for i in range(self._num_obj)], dim=1)
91 |     return p, t
92 | 


--------------------------------------------------------------------------------
/lib/utils/smplx/smplx/vertex_joint_selector.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import print_function
19 | from __future__ import division
20 | 
21 | import numpy as np
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from .utils import to_tensor
27 | 
28 | 
29 | class VertexJointSelector(nn.Module):
30 | 
31 |     def __init__(self, vertex_ids=None,
32 |                  use_hands=True,
33 |                  use_feet_keypoints=True, **kwargs):
34 |         super(VertexJointSelector, self).__init__()
35 | 
36 |         extra_joints_idxs = []
37 | 
38 |         face_keyp_idxs = np.array([
39 |             vertex_ids['nose'],
40 |             vertex_ids['reye'],
41 |             vertex_ids['leye'],
42 |             vertex_ids['rear'],
43 |             vertex_ids['lear']], dtype=np.int64)
44 | 
45 |         extra_joints_idxs = np.concatenate([extra_joints_idxs,
46 |                                             face_keyp_idxs])
47 | 
48 |         if use_feet_keypoints:
49 |             feet_keyp_idxs = np.array([vertex_ids['LBigToe'],
50 |                                        vertex_ids['LSmallToe'],
51 |                                        vertex_ids['LHeel'],
52 |                                        vertex_ids['RBigToe'],
53 |                                        vertex_ids['RSmallToe'],
54 |                                        vertex_ids['RHeel']], dtype=np.int32)
55 | 
56 |             extra_joints_idxs = np.concatenate(
57 |                 [extra_joints_idxs, feet_keyp_idxs])
58 | 
59 |         if use_hands:
60 |             self.tip_names = ['thumb', 'index', 'middle', 'ring', 'pinky']
61 | 
62 |             tips_idxs = []
63 |             for hand_id in ['l', 'r']:
64 |                 for tip_name in self.tip_names:
65 |                     tips_idxs.append(vertex_ids[hand_id + tip_name])
66 | 
67 |             extra_joints_idxs = np.concatenate(
68 |                 [extra_joints_idxs, tips_idxs])
69 | 
70 |         self.register_buffer('extra_joints_idxs',
71 |                              to_tensor(extra_joints_idxs, dtype=torch.long))
72 | 
73 |     def forward(self, vertices, joints):
74 |         extra_joints = torch.index_select(vertices, 1, self.extra_joints_idxs)
75 |         joints = torch.cat([joints, extra_joints], dim=1)
76 | 
77 |         return joints
78 | 


--------------------------------------------------------------------------------
/scripts/download_official_dexycb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set target directories
 4 | data_dir="data/DexYCB/data"
 5 | splits_dir="data/DexYCB/splits"
 6 | 
 7 | # Create directories
 8 | mkdir -p "$data_dir"
 9 | mkdir -p "$splits_dir"
10 | 
11 | # Download, extract, and remove archive
12 | gdown https://drive.google.com/uc?id=1Ehh92wDE3CWAiKG7E9E73HjN2Xk2XfEk -O "$data_dir/20200709-subject-01.tar.gz"
13 | tar -xzf "$data_dir/20200709-subject-01.tar.gz" -C "$data_dir" && rm "$data_dir/20200709-subject-01.tar.gz"
14 | 
15 | gdown https://drive.google.com/uc?id=1Uo7MLqTbXEa-8s7YQZ3duugJ1nXFEo62 -O "$data_dir/20200813-subject-02.tar.gz"
16 | tar -xzf "$data_dir/20200813-subject-02.tar.gz" -C "$data_dir" && rm "$data_dir/20200813-subject-02.tar.gz"
17 | 
18 | gdown https://drive.google.com/uc?id=1FkUxas8sv8UcVGgAzmSZlJw1eI5W5CXq -O "$data_dir/20200820-subject-03.tar.gz"
19 | tar -xzf "$data_dir/20200820-subject-03.tar.gz" -C "$data_dir" && rm "$data_dir/20200820-subject-03.tar.gz"
20 | 
21 | gdown https://drive.google.com/uc?id=14up6qsTpvgEyqOQ5hir-QbjMB_dHfdpA -O "$data_dir/20200903-subject-04.tar.gz"
22 | tar -xzf "$data_dir/20200903-subject-04.tar.gz" -C "$data_dir" && rm "$data_dir/20200903-subject-04.tar.gz"
23 | 
24 | gdown https://drive.google.com/uc?id=1NBA_FPyGWOQF5-X9ueAat5g8lDMz-EmS -O "$data_dir/20200908-subject-05.tar.gz"
25 | tar -xzf "$data_dir/20200908-subject-05.tar.gz" -C "$data_dir" && rm "$data_dir/20200908-subject-05.tar.gz"
26 | 
27 | gdown https://drive.google.com/uc?id=1UWIN2-wOBZX2T0dkAi4ctAAW8KffkXMQ -O "$data_dir/20200918-subject-06.tar.gz"
28 | tar -xzf "$data_dir/20200918-subject-06.tar.gz" -C "$data_dir" && rm "$data_dir/20200918-subject-06.tar.gz"
29 | 
30 | gdown https://drive.google.com/uc?id=1oWEYD_o3PVh39pLzMlJcArkDtMj4nzI0 -O "$data_dir/20200928-subject-07.tar.gz"
31 | tar -xzf "$data_dir/20200928-subject-07.tar.gz" -C "$data_dir" && rm "$data_dir/20200928-subject-07.tar.gz"
32 | 
33 | gdown https://drive.google.com/uc?id=1GTNZwhWbs7Mfez0krTgXwLPndvrw1Ztv -O "$data_dir/20201002-subject-08.tar.gz"
34 | tar -xzf "$data_dir/20201002-subject-08.tar.gz" -C "$data_dir" && rm "$data_dir/20201002-subject-08.tar.gz"
35 | 
36 | gdown https://drive.google.com/uc?id=1j0BLkaCjIuwjakmywKdOO9vynHTWR0UH -O "$data_dir/20201015-subject-09.tar.gz"
37 | tar -xzf "$data_dir/20201015-subject-09.tar.gz" -C "$data_dir" && rm "$data_dir/20201015-subject-09.tar.gz"
38 | 
39 | gdown https://drive.google.com/uc?id=1FvFlRfX-p5a5sAWoKEGc17zKJWwKaSB- -O "$data_dir/20201022-subject-10.tar.gz"
40 | tar -xzf "$data_dir/20201022-subject-10.tar.gz" -C "$data_dir" && rm "$data_dir/20201022-subject-10.tar.gz"
41 | 
42 | # Download split JSON files from GitHub (gSDF)
43 | echo "Downloading train/test split files to $splits_dir ..."
44 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/dexycb/splits/train_s0_29k.json -O "$splits_dir/train_s0_29k.json"
45 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/dexycb/splits/test_s0_5k.json -O "$splits_dir/test_s0_5k.json"
46 | 
47 | echo "All files downloaded, extracted, and archives removed."


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/layers/ycb_group_layer.py:
--------------------------------------------------------------------------------
  1 | # DexYCB Toolkit
  2 | # Copyright (C) 2021 NVIDIA Corporation
  3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
  4 | 
  5 | """Wrapper layer to hold a group of YCBLayers."""
  6 | 
  7 | import torch
  8 | 
  9 | from torch.nn import Module, ModuleList
 10 | 
 11 | from .ycb_layer import YCBLayer
 12 | 
 13 | 
 14 | class YCBGroupLayer(Module):
 15 |   """Wrapper layer to hold a group of YCBLayers."""
 16 | 
 17 |   def __init__(self, ids):
 18 |     """Constructor.
 19 | 
 20 |     Args:
 21 |       ids: A list of YCB object ids.
 22 |     """
 23 |     super(YCBGroupLayer, self).__init__()
 24 | 
 25 |     self._ids = ids
 26 |     self._layers = ModuleList([YCBLayer(i) for i in self._ids])
 27 |     self._num_obj = len(self._ids)
 28 | 
 29 |     f = []
 30 |     offset = 0
 31 |     for i in range(self._num_obj):
 32 |       if i > 0:
 33 |         offset += self._layers[i - 1].v.size(1)
 34 |       f.append(self._layers[i].f + offset)
 35 |     f = torch.cat(f)
 36 |     self.register_buffer('f', f)
 37 | 
 38 |   @property
 39 |   def num_obj(self):
 40 |     return self._num_obj
 41 | 
 42 |   @property
 43 |   def obj_file(self):
 44 |     return [l.obj_file for l in self._layers]
 45 | 
 46 |   @property
 47 |   def count(self):
 48 |     return [l.f.numel() for l in self._layers]
 49 | 
 50 |   @property
 51 |   def material(self):
 52 |     return [l.material for l in self._layers]
 53 | 
 54 |   @property
 55 |   def tex_coords(self):
 56 |     return [l.tex_coords for l in self._layers]
 57 | 
 58 |   def forward(self, p, inds=None):
 59 |     """Forward function.
 60 | 
 61 |     Args:
 62 |       p: A tensor of shape [B, D] containing the pose vectors.
 63 |       inds: A list of sub-layer indices.
 64 | 
 65 |     Returns:
 66 |       v: A tensor of shape [B, N, 3] containing the transformed vertices.
 67 |       n: A tensor of shape [B, N, 3] containing the transformed normals.
 68 |     """
 69 |     if inds is None:
 70 |       inds = range(self._num_obj)
 71 |     v = [
 72 |         torch.zeros((p.size(0), 0, 3),
 73 |                     dtype=torch.float32,
 74 |                     device=self.f.device)
 75 |     ]
 76 |     n = [
 77 |         torch.zeros((p.size(0), 0, 3),
 78 |                     dtype=torch.float32,
 79 |                     device=self.f.device)
 80 |     ]
 81 |     r, t = self._pose2rt(p)
 82 |     for i in inds:
 83 |       y = self._layers[i](r[:, i], t[:, i])
 84 |       v.append(y[0])
 85 |       n.append(y[1])
 86 |     v = torch.cat(v, dim=1)
 87 |     n = torch.cat(n, dim=1)
 88 |     return v, n
 89 | 
 90 |   def _pose2rt(self, pose):
 91 |     """Extracts rotations and translations from pose vectors.
 92 | 
 93 |     Args:
 94 |       pose: A tensor of shape [B, D] containing the pose vectors.
 95 | 
 96 |     Returns:
 97 |       r: A tensor of shape [B, O, 3] containing the rotation vectors.
 98 |       t: A tensor of shape [B, O, 3] containing the translations.
 99 |     """
100 |     r = torch.stack(
101 |         [pose[:, 6 * i + 0:6 * i + 3] for i in range(self._num_obj)], dim=1)
102 |     t = torch.stack(
103 |         [pose[:, 6 * i + 3:6 * i + 6] for i in range(self._num_obj)], dim=1)
104 |     return r, t
105 | 


--------------------------------------------------------------------------------
/scripts/download_official_rich.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # URL encode function
 4 | urle () { [[ "${1}" ]] || return 1; local LANG=C i x; for (( i = 0; i < ${#1}; i++ )); do x="${1:i:1}"; [[ "${x}" == [a-zA-Z0-9.~-] ]] && echo -n "${x}" || printf '%%%02X' "'${x}"; done; echo; }
 5 | 
 6 | # Prompt for username and password
 7 | echo -e "\nYou need to register at https://rich.is.tue.mpg.de/"
 8 | read -p "Username: " username
 9 | read -p "Password: " password
10 | 
11 | # Encode credentials
12 | username=$(urle "$username")
13 | password=$(urle "$password")
14 | 
15 | # Set save directory
16 | save_dir="data/RICH/data"
17 | mkdir -p "$save_dir"
18 | 
19 | # ----------- Download Human-Scene Contact -----------
20 | echo "Downloading train_hsc.zip..."
21 | wget --post-data "username=$username&password=$password" \
22 |   'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=train_hsc.zip' \
23 |   -O "$save_dir/train_hsc.zip" \
24 |   --no-check-certificate --continue
25 | 
26 | echo "Downloading test_hsc.zip..."
27 | wget --post-data "username=$username&password=$password" \
28 |   'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=test_hsc.zip' \
29 |   -O "$save_dir/test_hsc.zip" \
30 |   --no-check-certificate --continue
31 | 
32 | # ----------- Download JPG Image Archives -----------
33 | echo "Downloading JPG_images/train.tar.gz..."
34 | wget --post-data "username=$username&password=$password" \
35 |   'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=JPG_images/train.tar.gz' \
36 |   -O "$save_dir/train.tar.gz" \
37 |   --no-check-certificate --continue
38 | 
39 | echo "Downloading JPG_images/test.tar.gz..."
40 | wget --post-data "username=$username&password=$password" \
41 |   'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=JPG_images/test.tar.gz' \
42 |   -O "$save_dir/test.tar.gz" \
43 |   --no-check-certificate --continue
44 | 
45 | # ----------- Download Scan Calibration -----------
46 | echo "Downloading scan_calibration.zip..."
47 | wget --post-data "username=$username&password=$password" \
48 |   'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=scan_calibration.zip' \
49 |   -O "$save_dir/scan_calibration.zip" \
50 |   --no-check-certificate --continue
51 | 
52 | # ----------- Download Multicam2World Info (no auth needed) -----------
53 | echo "Downloading multicam2world.zip..."
54 | wget 'https://rich.is.tue.mpg.de/media/upload/multicam2world.zip' \
55 |   -O "$save_dir/multicam2world.zip" \
56 |   --continue
57 | 
58 | # ----------- Unzip / Untar -----------
59 | echo "Extracting scan_calibration.zip..."
60 | unzip "$save_dir/scan_calibration.zip" -d "$save_dir"
61 | 
62 | echo "Extracting multicam2world.zip..."
63 | unzip "$save_dir/multicam2world.zip" -d "$save_dir"
64 | 
65 | echo "Extracting train_hsc.zip..."
66 | mkdir -p "$save_dir/hsc"
67 | unzip "$save_dir/train_hsc.zip" -d "$save_dir"
68 | mv "$save_dir/train_hsc" "$save_dir/hsc/train"
69 | 
70 | echo "Extracting test_hsc.zip..."
71 | unzip "$save_dir/test_hsc.zip" -d "$save_dir/hsc"
72 | 
73 | echo "Extracting train.tar.gz..."
74 | tar -xzf "$save_dir/train.tar.gz" -C "$image_dir"
75 | 
76 | echo "Extracting test.tar.gz..."
77 | tar -xzf "$save_dir/test.tar.gz" -C "$image_dir"
78 | 
79 | echo "All RICH files downloaded and extracted to $save_dir"


--------------------------------------------------------------------------------
/lib/utils/func_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_img(path, order='RGB'):
 7 |     img = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
 8 |     if not isinstance(img, np.ndarray):
 9 |         raise IOError("Fail to read %s" % path)
10 | 
11 |     if order=='RGB': img = img[:,:,::-1]
12 |     img = img.astype(np.float32)
13 |     return img
14 | 
15 | 
16 | def get_bbox(joint_img, joint_valid, expansion_factor=1.0):
17 |     x_img, y_img = joint_img[:,0], joint_img[:,1]
18 |     x_img = x_img[joint_valid==1]; y_img = y_img[joint_valid==1];
19 |     xmin = min(x_img); ymin = min(y_img); xmax = max(x_img); ymax = max(y_img);
20 | 
21 |     x_center = (xmin+xmax)/2.; width = (xmax-xmin)*expansion_factor;
22 |     xmin = x_center - 0.5*width
23 |     xmax = x_center + 0.5*width
24 |     
25 |     y_center = (ymin+ymax)/2.; height = (ymax-ymin)*expansion_factor;
26 |     ymin = y_center - 0.5*height
27 |     ymax = y_center + 0.5*height
28 | 
29 |     bbox = np.array([xmin, ymin, xmax - xmin, ymax - ymin]).astype(np.float32)
30 |     return bbox
31 | 
32 | 
33 | def process_bbox(bbox, target_shape, original_img_shape):
34 | 
35 |     # aspect ratio preserving bbox
36 |     w = bbox[2]
37 |     h = bbox[3]
38 |     c_x = bbox[0] + w/2.
39 |     c_y = bbox[1] + h/2.
40 |     aspect_ratio = target_shape[1]/target_shape[0]
41 |     if w > aspect_ratio * h:
42 |         h = w / aspect_ratio
43 |     elif w < aspect_ratio * h:
44 |         w = h * aspect_ratio
45 |     bbox[2] = w*1.25
46 |     bbox[3] = h*1.25
47 |     bbox[0] = c_x - bbox[2]/2.
48 |     bbox[1] = c_y - bbox[3]/2.
49 | 
50 |     return bbox
51 | 
52 | 
53 | def pca_to_axis_angle(pca_pose):
54 |     """
55 |     Converts the PCA pose representation from ManoLayer (use_pca=True)
56 |     to full axis-angle pose (use_pca=False).
57 |     
58 |     Args:
59 |     - pca_pose: The PCA components (batch_size x num_pca_comps).
60 |     
61 |     Returns:
62 |     - full_pose: The full 48D axis-angle pose (batch_size x 48).
63 |     """
64 |     # Ensure pca_pose is a torch tensor
65 |     if isinstance(pca_pose, np.ndarray):
66 |         pca_pose = torch.tensor(pca_pose, dtype=torch.float32)
67 | 
68 |     global_rotation, hand_pose = pca_pose[:, :3], pca_pose[:, 3:]  # This should be a placeholder, adjust as needed.
69 |     
70 |     # Multiply the PCA components by the PCA basis to get the hand pose (45D)
71 |     mano_th_selected_comps = get_mano_pca_basis(ncomps=45, use_pca=True, side='right', mano_root='data/base_data/human_models/mano')
72 |     hand_pose = torch.mm(hand_pose, mano_th_selected_comps)
73 |     
74 |     # Add the mean hand pose to the result (broadcasting over the batch dimension)
75 |     full_hand_pose = hand_pose
76 |     
77 |     # Concatenate the global rotation with the full hand pose
78 |     full_pose = torch.cat([global_rotation, full_hand_pose], dim=1)  # Shape: (batch_size, 48)
79 |     
80 |     return full_pose
81 | 
82 | 
83 | import re
84 | def atoi(text):
85 |     return int(text) if text.isdigit() else text
86 | def natural_keys(text):
87 |     return [atoi(c) for c in re.split(r'(\d+)', text)]
88 | 
89 | 
90 | # Load config
91 | import yaml
92 | def load_config(cfg_path):
93 |     with open(cfg_path, 'r') as f:
94 |         cfg = yaml.safe_load(f)
95 |     return cfg


--------------------------------------------------------------------------------
/lib/utils/smplx/tools/merge_smplh_mano.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems and the Max Planck Institute for Biological
14 | # Cybernetics. All rights reserved.
15 | #
16 | # Contact: ps-license@tuebingen.mpg.de
17 | 
18 | from __future__ import print_function
19 | 
20 | import os
21 | import os.path as osp
22 | import pickle
23 | 
24 | import argparse
25 | 
26 | import numpy as np
27 | 
28 | 
29 | def merge_models(smplh_fn, mano_left_fn, mano_right_fn,
30 |                  output_folder='output'):
31 | 
32 |     with open(smplh_fn, 'rb') as body_file:
33 |         body_data = pickle.load(body_file)
34 | 
35 |     with open(mano_left_fn, 'rb') as lhand_file:
36 |         lhand_data = pickle.load(lhand_file)
37 | 
38 |     with open(mano_right_fn, 'rb') as rhand_file:
39 |         rhand_data = pickle.load(rhand_file)
40 | 
41 |     out_fn = osp.split(smplh_fn)[1]
42 | 
43 |     output_data = body_data.copy()
44 |     output_data['hands_componentsl'] = lhand_data['hands_components']
45 |     output_data['hands_componentsr'] = rhand_data['hands_components']
46 | 
47 |     output_data['hands_coeffsl'] = lhand_data['hands_coeffs']
48 |     output_data['hands_coeffsr'] = rhand_data['hands_coeffs']
49 | 
50 |     output_data['hands_meanl'] = lhand_data['hands_mean']
51 |     output_data['hands_meanr'] = rhand_data['hands_mean']
52 | 
53 |     for key, data in output_data.iteritems():
54 |         if 'chumpy' in str(type(data)):
55 |             output_data[key] = np.array(data)
56 |         else:
57 |             output_data[key] = data
58 | 
59 |     out_path = osp.join(output_folder, out_fn)
60 |     print(out_path)
61 |     print('Saving to {}'.format(out_path))
62 |     with open(out_path, 'wb') as output_file:
63 |         pickle.dump(output_data, output_file)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     parser = argparse.ArgumentParser()
68 |     parser.add_argument('--smplh-fn', dest='smplh_fn', required=True,
69 |                         type=str, help='The path to the SMPLH model')
70 |     parser.add_argument('--mano-left-fn', dest='mano_left_fn', required=True,
71 |                         type=str, help='The path to the left hand MANO model')
72 |     parser.add_argument('--mano-right-fn', dest='mano_right_fn', required=True,
73 |                         type=str, help='The path to the right hand MANO model')
74 |     parser.add_argument('--output-folder', dest='output_folder',
75 |                         required=True, type=str,
76 |                         help='The path to the output folder')
77 | 
78 |     args = parser.parse_args()
79 | 
80 |     smplh_fn = args.smplh_fn
81 |     mano_left_fn = args.mano_left_fn
82 |     mano_right_fn = args.mano_right_fn
83 |     output_folder = args.output_folder
84 | 
85 |     if not osp.exists(output_folder):
86 |         print('Creating directory: {}'.format(output_folder))
87 |         os.makedirs(output_folder)
88 | 
89 |     merge_models(smplh_fn, mano_left_fn, mano_right_fn, output_folder)
90 | 


--------------------------------------------------------------------------------
/lib/utils/smplx/examples/vis_flame_vertices.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import os.path as osp
18 | import argparse
19 | import pickle
20 | 
21 | import numpy as np
22 | import torch
23 | import open3d as o3d
24 | 
25 | import smplx
26 | 
27 | 
28 | def main(model_folder, corr_fname, ext='npz',
29 |          head_color=(0.3, 0.3, 0.6),
30 |          gender='neutral'):
31 | 
32 |     head_idxs = np.load(corr_fname)
33 | 
34 |     model = smplx.create(model_folder, model_type='smplx',
35 |                          gender=gender,
36 |                          ext=ext)
37 |     betas = torch.zeros([1, 10], dtype=torch.float32)
38 |     expression = torch.zeros([1, 10], dtype=torch.float32)
39 | 
40 |     output = model(betas=betas, expression=expression,
41 |                    return_verts=True)
42 |     vertices = output.vertices.detach().cpu().numpy().squeeze()
43 |     joints = output.joints.detach().cpu().numpy().squeeze()
44 | 
45 |     print('Vertices shape =', vertices.shape)
46 |     print('Joints shape =', joints.shape)
47 | 
48 |     mesh = o3d.geometry.TriangleMesh()
49 |     mesh.vertices = o3d.utility.Vector3dVector(vertices)
50 |     mesh.triangles = o3d.utility.Vector3iVector(model.faces)
51 |     mesh.compute_vertex_normals()
52 | 
53 |     colors = np.ones_like(vertices) * [0.3, 0.3, 0.3]
54 |     colors[head_idxs] = head_color
55 | 
56 |     mesh.vertex_colors = o3d.utility.Vector3dVector(colors)
57 | 
58 |     o3d.visualization.draw_geometries([mesh])
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     parser = argparse.ArgumentParser(description='SMPL-X Demo')
63 | 
64 |     parser.add_argument('--model-folder', required=True, type=str,
65 |                         help='The path to the model folder')
66 |     parser.add_argument('--corr-fname', required=True, type=str,
67 |                         dest='corr_fname',
68 |                         help='Filename with the head correspondences')
69 |     parser.add_argument('--gender', type=str, default='neutral',
70 |                         help='The gender of the model')
71 |     parser.add_argument('--ext', type=str, default='npz',
72 |                         help='Which extension to use for loading')
73 |     parser.add_argument('--head', default='right',
74 |                         choices=['right', 'left'],
75 |                         type=str, help='Which head to plot')
76 |     parser.add_argument('--head-color', type=float, nargs=3, dest='head_color',
77 |                         default=(0.3, 0.3, 0.6),
78 |                         help='Color for the head vertices')
79 | 
80 |     args = parser.parse_args()
81 | 
82 |     model_folder = osp.expanduser(osp.expandvars(args.model_folder))
83 |     corr_fname = args.corr_fname
84 |     gender = args.gender
85 |     ext = args.ext
86 |     head = args.head
87 |     head_color = args.head_color
88 | 
89 |     main(model_folder, corr_fname, ext=ext,
90 |          head_color=head_color,
91 |          gender=gender
92 |          )
93 | 


--------------------------------------------------------------------------------
/lib/utils/human_models.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import os.path as osp
 4 | import pickle
 5 | 
 6 | from lib.core.config import cfg
 7 | from lib.utils.transforms import transform_joint_to_other_db
 8 | from lib.utils.smplx import smplx
 9 | 
10 | 
11 | 
12 | class MANO(object):
13 |     def __init__(self):
14 |         self.layer_arg = {'create_global_orient': False, 'create_hand_pose': False, 'create_betas': False, 'create_transl': False}
15 |         self.layer = {'right': smplx.create(cfg.MODEL.human_model_path, 'mano', is_rhand=True, use_pca=False, flat_hand_mean=False, **self.layer_arg), 'left': smplx.create(cfg.MODEL.human_model_path, 'mano', is_rhand=False, use_pca=False, flat_hand_mean=False, **self.layer_arg)}
16 |         self.vertex_num = 778
17 |         self.face = {'right': self.layer['right'].faces, 'left': self.layer['left'].faces}
18 |         self.add_watertight_face = {'right': np.array([[92,38,122], [234,92,122], [239,234,122], [279,239,122], [215,279,122], [215,122,118], [215,118,117], [215,117,119], [215,119,120], [215,120,108], [215,108,79], [215,79,78], [215,78,121], [214,215,121]])}
19 |         self.watertight_face = {'right': np.concatenate((self.layer['right'].faces, self.add_watertight_face['right']), axis=0)}
20 |         self.shape_param_dim = 10
21 | 
22 |         if torch.sum(torch.abs(self.layer['left'].shapedirs[:,0,:] - self.layer['right'].shapedirs[:,0,:])) < 1:
23 |             print('Fix shapedirs bug of MANO')
24 |             self.layer['left'].shapedirs[:,0,:] *= -1
25 | 
26 |         # original MANO joint set
27 |         self.orig_joint_num = 16
28 |         self.orig_joints_name = ('Wrist', 'Index_1', 'Index_2', 'Index_3', 'Middle_1', 'Middle_2', 'Middle_3', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Ring_1', 'Ring_2', 'Ring_3', 'Thumb_1', 'Thumb_2', 'Thumb_3')
29 |         self.orig_root_joint_idx = self.orig_joints_name.index('Wrist')
30 |         self.orig_flip_pairs = ()
31 |         self.orig_joint_regressor = self.layer['right'].J_regressor.numpy() # same for the right and left hands
32 | 
33 |         # changed MANO joint set
34 |         self.joint_num = 21 # manually added fingertips
35 |         self.joints_name = ('Wrist', 'Thumb_1', 'Thumb_2', 'Thumb_3', 'Thumb_4', 'Index_1', 'Index_2', 'Index_3', 'Index_4', 'Middle_1', 'Middle_2', 'Middle_3', 'Middle_4', 'Ring_1', 'Ring_2', 'Ring_3', 'Ring_4', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Pinky_4')
36 |         self.skeleton = ( (0,1), (0,5), (0,9), (0,13), (0,17), (1,2), (2,3), (3,4), (5,6), (6,7), (7,8), (9,10), (10,11), (11,12), (13,14), (14,15), (15,16), (17,18), (18,19), (19,20) )
37 |         self.root_joint_idx = self.joints_name.index('Wrist')
38 |         self.flip_pairs = ()
39 |         # add fingertips to joint_regressor
40 |         self.joint_regressor = transform_joint_to_other_db(self.orig_joint_regressor, self.orig_joints_name, self.joints_name)
41 |         self.joint_regressor[self.joints_name.index('Thumb_4')] = np.array([1 if i == 745 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1)
42 |         self.joint_regressor[self.joints_name.index('Index_4')] = np.array([1 if i == 317 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1)
43 |         self.joint_regressor[self.joints_name.index('Middle_4')] = np.array([1 if i == 445 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1)
44 |         self.joint_regressor[self.joints_name.index('Ring_4')] = np.array([1 if i == 556 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1)
45 |         self.joint_regressor[self.joints_name.index('Pinky_4')] = np.array([1 if i == 673 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1)
46 | 
47 | 
48 | 
49 | mano = MANO()


--------------------------------------------------------------------------------
/data/MOW/dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import json
 4 | import numpy as np
 5 | 
 6 | import torch
 7 | from torch.utils.data import Dataset
 8 | from torchvision.transforms import Normalize
 9 | 
10 | from lib.core.config import cfg
11 | from lib.utils.human_models import mano
12 | from lib.utils.func_utils import load_img
13 | from lib.utils.preprocessing import augmentation_contact, mask2bbox
14 | 
15 | 
16 | 
17 | # Main dataloader code for MOW dataset
18 | class MOW(Dataset):
19 |     def __init__(self, transform, data_split):
20 |         super(MOW, self).__init__()
21 |         self.__dict__.update(locals())
22 | 
23 |         self.transfrom = transform
24 |         dataset_name = 'mow'
25 | 
26 |         self.data_split = data_split
27 |         self.root_path = root_path = 'data/MOW'
28 | 
29 |         self.data_dir = os.path.join(self.root_path, 'data')
30 |         self.split_dir = os.path.join(self.root_path, 'splits')
31 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
32 | 
33 |         with open(os.path.join(self.data_dir, 'poses.json')) as f:
34 |             self.db = {a['image_id']: a for a in json.load(f)}
35 | 
36 |         # Organize db id based on split
37 |         db_split_path = os.path.join(self.split_dir, f'{self.data_split}.json')
38 |         with open(db_split_path, 'r') as f:
39 |             self.split = json.load(f)
40 |             self.length = len(self.split)
41 | 
42 | 
43 |     def __len__(self):
44 |         return self.length
45 | 
46 | 
47 |     def __getitem__(self, index):
48 |         sample_id = self.split[index]
49 |         ann = self.db[sample_id]
50 |         image_id = ann['image_id']
51 | 
52 |         img_path = os.path.join(self.data_dir, 'images', f'{image_id}.jpg')
53 |         orig_img = load_img(img_path)
54 | 
55 |         mano_valid = np.ones((1), dtype=np.float32)
56 | 
57 | 
58 |         ################################## LOAD ANNOTATION DATA #####################################
59 |         mask_ho_path = os.path.join(self.data_dir, 'masks/both', f'{image_id}.jpg')
60 |         mask_ho = (cv2.imread(mask_ho_path) > 128)[:, :, 0]
61 |         bbox_ho = mask2bbox(mask_ho, expansion_factor=cfg.DATASET.ho_bbox_expand_ratio)
62 | 
63 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
64 |         contact_data = dict(contact_h=contact_h)
65 |         ################################## LOAD ANNOTATION DATA #####################################
66 | 
67 | 
68 |         ############################### PROCESS CROP AND AUGMENTATION ###############################
69 |         # Crop image
70 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_ho, self.data_split, enforce_flip=False)
71 |         crop_img = img.copy()  
72 | 
73 |         # Transform for 3D HMR
74 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
75 |             img = self.transform(img.astype(np.float32)/255.0)
76 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
77 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
78 |             img = img.transpose(2, 0, 1) / 255.0
79 |             img = normalize_img(torch.from_numpy(img)).float()
80 |         else:
81 |             raise NotImplementedError
82 |         ############################### PROCESS CROP AND AUGMENTATION ###############################
83 | 
84 | 
85 |         input_data = dict(image=img)
86 |         targets_data = dict(contact_data=contact_data)
87 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
88 | 
89 | 
90 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/docs/data_eval.md:
--------------------------------------------------------------------------------
  1 | ## Data
  2 | You need to follow directory structure of the `data` as below.
  3 | ```
  4 | ${ROOT} 
  5 | |-- data  
  6 | |   |-- base_data
  7 | |   |-- MOW
  8 | |   |   |-- data
  9 | |   |   |-- preprocessed_data
 10 | |   |   |-- splits
 11 | |   |   |-- dataset.py
 12 | |   |-- HIC
 13 | |   |   |-- data
 14 | |   |   |-- preprocessed_data
 15 | |   |   |-- splits
 16 | |   |   |-- dataset.py
 17 | |   |-- RICH
 18 | |   |   |-- data
 19 | |   |   |-- preprocessed_data
 20 | |   |   |-- splits
 21 | |   |   |-- dataset.py
 22 | |   |-- Hi4D
 23 | |   |   |-- data
 24 | |   |   |-- preprocessed_data
 25 | |   |   |-- splits
 26 | |   |   |-- dataset.py
 27 | ```
 28 | * Download `base_data` from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/train/data/base_data.tar.gz) by running:
 29 | ```
 30 | bash scripts/download_train_base_data.sh
 31 | ```
 32 | #### preprocessed_data
 33 | * Download `preprocessed_data` from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data) by running:
 34 | ```
 35 | bash scripts/download_train_preprocessed_data.sh
 36 | ```
 37 | #### MOW dataset
 38 | ```
 39 | ${ROOT} 
 40 | |-- data
 41 | |   |-- MOW
 42 | |   |   |-- data
 43 | |   |   |   |-- images
 44 | |   |   |   |-- masks
 45 | |   |   |   |-- poses.json
 46 | |   |   |-- preprocessed_data
 47 | |   |   |   |-- test
 48 | |   |   |   |   |-- contact_data
 49 | |   |   |-- splits
 50 | |   |   |   |-- test.json
 51 | |   |   |-- dataset.py
 52 | ```
 53 | * Download `images`, `poses.json` by running:
 54 | ```
 55 | bash scripts/download_official_mow.sh
 56 | ```
 57 | * `masks`, `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data).
 58 | #### HIC dataset
 59 | ```
 60 | ${ROOT} 
 61 | |-- data
 62 | |   |-- HIC
 63 | |   |   |-- data
 64 | |   |   |   |-- 01
 65 | |   |   |   |-- ...
 66 | |   |   |   |-- 12
 67 | |   |   |   |-- IJCV16___Results_MANO___parms_for___joints21
 68 | |   |   |   |-- HIC.json
 69 | |   |   |-- preprocessed_data
 70 | |   |   |   |-- test
 71 | |   |   |   |   |-- annot_data
 72 | |   |   |   |   |-- contact_data
 73 | |   |   |-- splits
 74 | |   |   |   |-- test.json
 75 | |   |   |-- dataset.py
 76 | ```
 77 | * Download `data` by running:
 78 | ```
 79 | bash scripts/download_official_hic.sh
 80 | ```
 81 | * `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data).
 82 | #### RICH dataset
 83 | ```
 84 | ${ROOT} 
 85 | |-- data
 86 | |   |-- RICH
 87 | |   |   |-- data
 88 | |   |   |   |-- hsc
 89 | |   |   |   |-- images_jpg_subset
 90 | |   |   |   |-- multicam2world
 91 | |   |   |   |-- scan_calibration
 92 | |   |   |-- preprocessed_data
 93 | |   |   |   |-- test
 94 | |   |   |   |   |-- annot_data
 95 | |   |   |   |   |-- contact_data
 96 | |   |   |-- splits
 97 | |   |   |   |-- test.json
 98 | |   |   |-- dataset.py
 99 | ```
100 | * Download `data` by running:
101 | ```
102 | bash scripts/download_official_rich.sh
103 | ```
104 | * `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data).
105 | #### Hi4D dataset
106 | ```
107 | ${ROOT} 
108 | |-- data
109 | |   |-- Hi4D
110 | |   |   |-- data
111 | |   |   |   |-- pair00
112 | |   |   |   |-- ...
113 | |   |   |   |-- pair37
114 | |   |   |-- preprocessed_data
115 | |   |   |   |-- test
116 | |   |   |   |   |-- annot_data
117 | |   |   |   |   |-- contact_data
118 | |   |   |-- splits
119 | |   |   |   |-- test.json
120 | |   |   |   |-- test_pid.json
121 | |   |   |-- dataset.py
122 | ```
123 | * Download `data` by running after download request from [official website](https://yifeiyin04.github.io/Hi4D):
124 | ```
125 | bash scripts/download_official_hi4d.sh
126 | bash scripts/extract_official_hi4d.sh
127 | ```
128 | * `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data).


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # custom
 12 | # data/*/*.json
 13 | # data/*/*/*.json
 14 | # data/*/images
 15 | # data/*/data
 16 | # data/*/sequences
 17 | # data/*/annotation
 18 | # data/*/annotations
 19 | # data/base_data
 20 | # data/preprocessed_data
 21 | # experiment
 22 | 
 23 | # Distribution / packaging
 24 | .Python
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | 
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | wheels/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | MANIFEST
 41 | 
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | .hypothesis/
 62 | .pytest_cache/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # pyenv
 90 | .python-version
 91 | 
 92 | # celery beat schedule file
 93 | celerybeat-schedule
 94 | 
 95 | # SageMath parsed files
 96 | *.sage.py
 97 | 
 98 | # Environments
 99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 | 
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 | 
111 | # Rope project settings
112 | .ropeproject
113 | 
114 | # mkdocs documentation
115 | /site
116 | 
117 | # mypy
118 | .mypy_cache/
119 | ### macOS template
120 | # General
121 | .DS_Store
122 | .AppleDouble
123 | .LSOverride
124 | 
125 | # Icon must end with two \r
126 | Icon
127 | 
128 | # Thumbnails
129 | ._*
130 | 
131 | # Files that might appear in the root of a volume
132 | .DocumentRevisions-V100
133 | .fseventsd
134 | .Spotlight-V100
135 | .TemporaryItems
136 | .Trashes
137 | .VolumeIcon.icns
138 | .com.apple.timemachine.donotpresent
139 | 
140 | # Directories potentially created on remote AFP share
141 | .AppleDB
142 | .AppleDesktop
143 | Network Trash Folder
144 | Temporary Items
145 | .apdisk
146 | ### JetBrains template
147 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
148 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
149 | 
150 | # User-specific stuff
151 | .idea/**/workspace.xml
152 | .idea/**/tasks.xml
153 | .idea/**/dictionaries
154 | .idea/**/shelf
155 | 
156 | # Sensitive or high-churn files
157 | .idea/**/dataSources/
158 | .idea/**/dataSources.ids
159 | .idea/**/dataSources.local.xml
160 | .idea/**/sqlDataSources.xml
161 | .idea/**/dynamic.xml
162 | .idea/**/uiDesigner.xml
163 | .idea/**/dbnavigator.xml
164 | 
165 | # Gradle
166 | .idea/**/gradle.xml
167 | .idea/**/libraries
168 | 
169 | # CMake
170 | cmake-build-debug/
171 | cmake-build-release/
172 | 
173 | # Mongo Explorer plugin
174 | .idea/**/mongoSettings.xml
175 | 
176 | # File-based project format
177 | *.iws
178 | 
179 | # IntelliJ
180 | out/
181 | 
182 | # mpeltonen/sbt-idea plugin
183 | .idea_modules/
184 | 
185 | # JIRA plugin
186 | atlassian-ide-plugin.xml
187 | 
188 | # Cursive Clojure plugin
189 | .idea/replstate.xml
190 | 
191 | # Crashlytics plugin (for Android Studio and IntelliJ)
192 | com_crashlytics_export_strings.xml
193 | crashlytics.properties
194 | crashlytics-build.properties
195 | fabric.properties
196 | 
197 | # Editor-based Rest Client
198 | .idea/httpRequests


--------------------------------------------------------------------------------
/lib/utils/smplx/examples/vis_mano_vertices.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os.path as osp
 18 | import argparse
 19 | import pickle
 20 | 
 21 | import numpy as np
 22 | import torch
 23 | import open3d as o3d
 24 | 
 25 | import smplx
 26 | 
 27 | 
 28 | def main(model_folder, corr_fname, ext='npz',
 29 |          hand_color=(0.3, 0.3, 0.6),
 30 |          gender='neutral', hand='right'):
 31 | 
 32 |     with open(corr_fname, 'rb') as f:
 33 |         idxs_data = pickle.load(f)
 34 |         if hand == 'both':
 35 |             hand_idxs = np.concatenate(
 36 |                 [idxs_data['left_hand'], idxs_data['right_hand']]
 37 |             )
 38 |         else:
 39 |             hand_idxs = idxs_data[f'{hand}_hand']
 40 | 
 41 |     model = smplx.create(model_folder, model_type='smplx',
 42 |                          gender=gender,
 43 |                          ext=ext)
 44 |     betas = torch.zeros([1, 10], dtype=torch.float32)
 45 |     expression = torch.zeros([1, 10], dtype=torch.float32)
 46 | 
 47 |     output = model(betas=betas, expression=expression,
 48 |                    return_verts=True)
 49 |     vertices = output.vertices.detach().cpu().numpy().squeeze()
 50 |     joints = output.joints.detach().cpu().numpy().squeeze()
 51 | 
 52 |     print('Vertices shape =', vertices.shape)
 53 |     print('Joints shape =', joints.shape)
 54 | 
 55 |     mesh = o3d.geometry.TriangleMesh()
 56 |     mesh.vertices = o3d.utility.Vector3dVector(vertices)
 57 |     mesh.triangles = o3d.utility.Vector3iVector(model.faces)
 58 |     mesh.compute_vertex_normals()
 59 | 
 60 |     colors = np.ones_like(vertices) * [0.3, 0.3, 0.3]
 61 |     colors[hand_idxs] = hand_color
 62 | 
 63 |     mesh.vertex_colors = o3d.utility.Vector3dVector(colors)
 64 | 
 65 |     o3d.visualization.draw_geometries([mesh])
 66 | 
 67 | 
 68 | if __name__ == '__main__':
 69 |     parser = argparse.ArgumentParser(description='SMPL-X Demo')
 70 | 
 71 |     parser.add_argument('--model-folder', required=True, type=str,
 72 |                         help='The path to the model folder')
 73 |     parser.add_argument('--corr-fname', required=True, type=str,
 74 |                         dest='corr_fname',
 75 |                         help='Filename with the hand correspondences')
 76 |     parser.add_argument('--gender', type=str, default='neutral',
 77 |                         help='The gender of the model')
 78 |     parser.add_argument('--ext', type=str, default='npz',
 79 |                         help='Which extension to use for loading')
 80 |     parser.add_argument('--hand', default='right',
 81 |                         choices=['right', 'left', 'both'],
 82 |                         type=str, help='Which hand to plot')
 83 |     parser.add_argument('--hand-color', type=float, nargs=3, dest='hand_color',
 84 |                         default=(0.3, 0.3, 0.6),
 85 |                         help='Color for the hand vertices')
 86 | 
 87 |     args = parser.parse_args()
 88 | 
 89 |     model_folder = osp.expanduser(osp.expandvars(args.model_folder))
 90 |     corr_fname = args.corr_fname
 91 |     gender = args.gender
 92 |     ext = args.ext
 93 |     hand = args.hand
 94 |     hand_color = args.hand_color
 95 | 
 96 |     main(model_folder, corr_fname, ext=ext,
 97 |          hand_color=hand_color,
 98 |          gender=gender, hand=hand
 99 |          )
100 | 


--------------------------------------------------------------------------------
/lib/utils/smplx/smplx/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | from typing import NewType, Union, Optional
 18 | from dataclasses import dataclass, asdict, fields
 19 | import numpy as np
 20 | import torch
 21 | 
 22 | Tensor = NewType('Tensor', torch.Tensor)
 23 | Array = NewType('Array', np.ndarray)
 24 | 
 25 | 
 26 | @dataclass
 27 | class ModelOutput:
 28 |     vertices: Optional[Tensor] = None
 29 |     joints: Optional[Tensor] = None
 30 |     full_pose: Optional[Tensor] = None
 31 |     global_orient: Optional[Tensor] = None
 32 |     transl: Optional[Tensor] = None
 33 | 
 34 |     def __getitem__(self, key):
 35 |         return getattr(self, key)
 36 | 
 37 |     def get(self, key, default=None):
 38 |         return getattr(self, key, default)
 39 | 
 40 |     def __iter__(self):
 41 |         return self.keys()
 42 | 
 43 |     def keys(self):
 44 |         keys = [t.name for t in fields(self)]
 45 |         return iter(keys)
 46 | 
 47 |     def values(self):
 48 |         values = [getattr(self, t.name) for t in fields(self)]
 49 |         return iter(values)
 50 | 
 51 |     def items(self):
 52 |         data = [(t.name, getattr(self, t.name)) for t in fields(self)]
 53 |         return iter(data)
 54 | 
 55 | 
 56 | @dataclass
 57 | class SMPLOutput(ModelOutput):
 58 |     betas: Optional[Tensor] = None
 59 |     body_pose: Optional[Tensor] = None
 60 | 
 61 | 
 62 | @dataclass
 63 | class SMPLHOutput(SMPLOutput):
 64 |     left_hand_pose: Optional[Tensor] = None
 65 |     right_hand_pose: Optional[Tensor] = None
 66 |     transl: Optional[Tensor] = None
 67 | 
 68 | 
 69 | @dataclass
 70 | class SMPLXOutput(SMPLHOutput):
 71 |     expression: Optional[Tensor] = None
 72 |     jaw_pose: Optional[Tensor] = None
 73 | 
 74 | 
 75 | @dataclass
 76 | class MANOOutput(ModelOutput):
 77 |     betas: Optional[Tensor] = None
 78 |     hand_pose: Optional[Tensor] = None
 79 | 
 80 | 
 81 | @dataclass
 82 | class FLAMEOutput(ModelOutput):
 83 |     betas: Optional[Tensor] = None
 84 |     expression: Optional[Tensor] = None
 85 |     jaw_pose: Optional[Tensor] = None
 86 |     neck_pose: Optional[Tensor] = None
 87 | 
 88 | 
 89 | def find_joint_kin_chain(joint_id, kinematic_tree):
 90 |     kin_chain = []
 91 |     curr_idx = joint_id
 92 |     while curr_idx != -1:
 93 |         kin_chain.append(curr_idx)
 94 |         curr_idx = kinematic_tree[curr_idx]
 95 |     return kin_chain
 96 | 
 97 | 
 98 | def to_tensor(
 99 |         array: Union[Array, Tensor], dtype=torch.float32
100 | ) -> Tensor:
101 |     if torch.is_tensor(array):
102 |         return array
103 |     else:
104 |         return torch.tensor(array, dtype=dtype)
105 | 
106 | 
107 | class Struct(object):
108 |     def __init__(self, **kwargs):
109 |         for key, val in kwargs.items():
110 |             setattr(self, key, val)
111 | 
112 | 
113 | def to_np(array, dtype=np.float32):
114 |     if 'scipy.sparse' in str(type(array)):
115 |         array = array.todense()
116 |     return np.array(array, dtype=dtype)
117 | 
118 | 
119 | def rot_mat_to_euler(rot_mats):
120 |     # Calculates rotation matrix to euler angles
121 |     # Careful for extreme cases of eular angles like [0.0, pi, 0.0]
122 | 
123 |     sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] +
124 |                     rot_mats[:, 1, 0] * rot_mats[:, 1, 0])
125 |     return torch.atan2(-rot_mats[:, 2, 0], sy)
126 | 


--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from easydict import EasyDict as edict
  5 | 
  6 | from lib.core.logger import ColorLogger
  7 | from lib.utils.log_utils import init_dirs
  8 | 
  9 | 
 10 | cfg = edict()
 11 | 
 12 | 
 13 | """ Dataset """
 14 | cfg.DATASET = edict()
 15 | cfg.DATASET.train_name = ['ObMan', 'DexYCB', 'HO3D', 'MOW', 'H2O3D', 'HOI4D', 'H2O', 'ARCTIC', 'InterHand26M', 'HIC', 'PROX', 'RICH', 'Decaf', 'Hi4D']
 16 | cfg.DATASET.test_name = 'MOW' # ONLY TEST ONE DATASET AT A TIME. Currently support ['MOW', 'HIC', 'RICH', 'Hi4D']
 17 | cfg.DATASET.workers = 2
 18 | cfg.DATASET.ho_bbox_expand_ratio = 1.3
 19 | cfg.DATASET.hand_bbox_expand_ratio = 1.3
 20 | cfg.DATASET.ho_big_bbox_expand_ratio = 2.0
 21 | cfg.DATASET.hand_scene_bbox_expand_ratio = 2.5
 22 | cfg.DATASET.obj_bbox_expand_ratio = 1.5
 23 | 
 24 | 
 25 | """ Model - HMR """
 26 | cfg.MODEL = edict()
 27 | cfg.MODEL.input_img_shape = (256, 256)
 28 | cfg.MODEL.img_mean = (0.485, 0.456, 0.406)
 29 | cfg.MODEL.img_std = (0.229, 0.224, 0.225)
 30 | # MANO
 31 | cfg.MODEL.human_model_path = 'data/base_data/human_models'
 32 | # Contact
 33 | cfg.MODEL.c_thres_ih = 0.005 # Reasonable for InterHand26M, HIC dataset
 34 | cfg.MODEL.c_thres = 0.01 # Reasonable for ObMan, DexYCB, HO3D, H2O3D, ARCTIC, HOI4D, H2O, PROX dataset
 35 | cfg.MODEL.c_thres_in_the_wild = 0.035 # Reasonable for MOW dataset
 36 | cfg.MODEL.contact_data_path = 'data/base_data/contact_data/all/contact_data_all.npy'
 37 | cfg.MODEL.contact_means_path = 'data/base_data/contact_data/all/contact_means_all.npy'
 38 | # Backbone
 39 | cfg.MODEL.backbone_type = 'hamer'
 40 | cfg.MODEL.hamer_backbone_pretrained_path = 'data/base_data/pretrained_models/hamer/hamer.ckpt'
 41 | cfg.MODEL.hrnet_w32_backbone_config_path = 'data/base_data/pretrained_models/hrnet/cls_hrnet_w32_sgd_lr5e-2_wd1e-4_bs32_x100.yaml'
 42 | cfg.MODEL.hrnet_w32_backbone_pretrained_path = 'data/base_data/pretrained_models/hrnet/hrnet_w32-36af842e.pth'
 43 | cfg.MODEL.hrnet_w48_backbone_config_path = 'data/base_data/pretrained_models/hrnet/cls_hrnet_w48_sgd_lr5e-2_wd1e-4_bs32_x100.yaml'
 44 | cfg.MODEL.hrnet_w48_backbone_pretrained_path = 'data/base_data/pretrained_models/hrnet/hrnet_w48-8ef0771d.pth'
 45 | cfg.MODEL.handoccnet_backbone_pretrained_path = 'data/base_data/pretrained_models/handoccnet/snapshot_demo.pth.tar'
 46 | # Multi-level joint regressor
 47 | cfg.MODEL.V_regressor_336_path = 'data/base_data/human_models/mano/V_regressor_336.npy'
 48 | cfg.MODEL.V_regressor_84_path = 'data/base_data/human_models/mano/V_regressor_84.npy'
 49 | # Hand Detector
 50 | cfg.MODEL.hand_landmarker_path = 'data/base_data/demo_data/hand_landmarker.task'
 51 | # Balanced Sampling
 52 | cfg.MODEL.balanced_sampling = True
 53 | # Loss
 54 | cfg.MODEL.loss_type = 'vcb+reg+smooth'
 55 | 
 56 | 
 57 | """ Train Detail """
 58 | cfg.TRAIN = edict()
 59 | cfg.TRAIN.batch = 24
 60 | cfg.TRAIN.epoch = 10
 61 | cfg.TRAIN.lr = 1e-5
 62 | cfg.TRAIN.weight_decay = 0.0001
 63 | cfg.TRAIN.milestones = (5, 10)
 64 | cfg.TRAIN.step_size = 10
 65 | cfg.TRAIN.gamma = 0.9
 66 | cfg.TRAIN.betas = (0.9, 0.95)
 67 | cfg.TRAIN.print_freq = 1
 68 | cfg.TRAIN.seed = 314 # for reproducibility
 69 | cfg.TRAIN.loss_weight = 1.0
 70 | 
 71 | 
 72 | """ Test Detail """
 73 | cfg.TEST = edict()
 74 | cfg.TEST.batch = 1
 75 | 
 76 | 
 77 | """ CAMERA """
 78 | cfg.CAMERA = edict()
 79 | 
 80 | 
 81 | """ OTHERS """
 82 | torch.backends.cudnn.benchmark = True
 83 | logger = None
 84 | 
 85 | 
 86 | def update_config(backbone_type='', test_name='', exp_dir='', ckpt_path=''):
 87 |     if backbone_type == '':
 88 |         backbone_type = 'hamer'
 89 |     cfg.MODEL.backbone_type = backbone_type
 90 |     if test_name == '':
 91 |         test_name = 'MOW'
 92 |     cfg.DATASET.test_name = test_name
 93 | 
 94 |     global logger
 95 |     log_dir = os.path.join(exp_dir, 'log')
 96 |     try:
 97 |         init_dirs([log_dir])
 98 |         logger = ColorLogger(log_dir)
 99 |         logger.info("Logger initialized successfully!")
100 |     except Exception as e:
101 |         print(f"Failed to initialize logger: {e}")
102 |         logger = None


--------------------------------------------------------------------------------
/lib/models/backbone/resnet.py:
--------------------------------------------------------------------------------
 1 | # This code is from HandOccNet (https://github.com/mks0601/Hand4Whole_RELEASE/blob/main/common/nets/resnet.py)
 2 | import torch
 3 | import torch.nn as nn
 4 | from torchvision.models.resnet import BasicBlock, Bottleneck
 5 | 
 6 | 
 7 | class ResNetBackbone(nn.Module):
 8 |     def __init__(self, resnet_type):
 9 | 	
10 |         resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'),
11 | 		       34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'),
12 | 		       50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'),
13 | 		       101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'),
14 | 		       152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')}
15 |         block, layers, channels, name = resnet_spec[resnet_type]
16 |         
17 |         self.name = name
18 |         self.inplanes = 64
19 |         super(ResNetBackbone, self).__init__()
20 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
21 |                                bias=False)
22 |         self.bn1 = nn.BatchNorm2d(64)
23 |         self.relu = nn.ReLU(inplace=True)
24 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
25 | 
26 |         self.layer1 = self._make_layer(block, 64, layers[0])
27 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
28 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
29 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
30 | 
31 |         for m in self.modules():
32 |             if isinstance(m, nn.Conv2d):
33 |                 nn.init.normal_(m.weight, mean=0, std=0.001)
34 |             elif isinstance(m, nn.BatchNorm2d):
35 |                 nn.init.constant_(m.weight, 1)
36 |                 nn.init.constant_(m.bias, 0)
37 | 
38 |     def _make_layer(self, block, planes, blocks, stride=1):
39 |         downsample = None
40 |         if stride != 1 or self.inplanes != planes * block.expansion:
41 |             downsample = nn.Sequential(
42 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
43 |                           kernel_size=1, stride=stride, bias=False),
44 |                 nn.BatchNorm2d(planes * block.expansion),
45 |             )
46 | 
47 |         layers = []
48 |         layers.append(block(self.inplanes, planes, stride, downsample))
49 |         self.inplanes = planes * block.expansion
50 |         for i in range(1, blocks):
51 |             layers.append(block(self.inplanes, planes))
52 | 
53 |         return nn.Sequential(*layers)
54 | 
55 |     def forward(self, x):
56 |         x = self.conv1(x)
57 |         x = self.bn1(x)
58 |         x = self.relu(x)
59 |         x = self.maxpool(x)
60 | 
61 |         x = self.layer1(x)
62 |         x = self.layer2(x)
63 |         x = self.layer3(x)
64 |         x = self.layer4(x)
65 |         return x
66 | 
67 |     def init_weights(self):
68 |         import torchvision.models as models
69 | 
70 |         if self.name == 'resnet18':
71 |             org_resnet = models.resnet18(pretrained=True)
72 |         elif self.name == 'resnet34':
73 |             org_resnet = models.resnet34(pretrained=True)
74 |         elif self.name == 'resnet50':
75 |             org_resnet = models.resnet50(pretrained=True)
76 |         elif self.name == 'resnet101':
77 |             org_resnet = models.resnet101(pretrained=True)
78 |         elif self.name == 'resnet152':
79 |             org_resnet = models.resnet152(pretrained=True)
80 |         else:
81 |             raise ValueError(f"Unsupported model name: {self.name}")
82 | 
83 |         # Drop the original fully connected layer
84 |         org_resnet.fc = None  # Or you can set it to nn.Identity()
85 | 
86 |         # If you're loading weights manually, extract the state_dict
87 |         org_resnet_state = org_resnet.state_dict()
88 | 
89 |         # Remove FC layer weights to avoid mismatch
90 |         org_resnet_state.pop('fc.weight', None)
91 |         org_resnet_state.pop('fc.bias', None)
92 | 
93 |         # Load into your model
94 |         self.load_state_dict(org_resnet_state, strict=False)
95 |         print("Initialized ResNet from torchvision with pretrained=True")


--------------------------------------------------------------------------------
/lib/utils/smplx/smplx/joint_names.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | JOINT_NAMES = [
 18 |     'pelvis',
 19 |     'left_hip',
 20 |     'right_hip',
 21 |     'spine1',
 22 |     'left_knee',
 23 |     'right_knee',
 24 |     'spine2',
 25 |     'left_ankle',
 26 |     'right_ankle',
 27 |     'spine3',
 28 |     'left_foot',
 29 |     'right_foot',
 30 |     'neck',
 31 |     'left_collar',
 32 |     'right_collar',
 33 |     'head',
 34 |     'left_shoulder',
 35 |     'right_shoulder',
 36 |     'left_elbow',
 37 |     'right_elbow',
 38 |     'left_wrist',
 39 |     'right_wrist',
 40 |     'jaw',
 41 |     'left_eye_smplhf',
 42 |     'right_eye_smplhf',
 43 |     'left_index1',
 44 |     'left_index2',
 45 |     'left_index3',
 46 |     'left_middle1',
 47 |     'left_middle2',
 48 |     'left_middle3',
 49 |     'left_pinky1',
 50 |     'left_pinky2',
 51 |     'left_pinky3',
 52 |     'left_ring1',
 53 |     'left_ring2',
 54 |     'left_ring3',
 55 |     'left_thumb1',
 56 |     'left_thumb2',
 57 |     'left_thumb3',
 58 |     'right_index1',
 59 |     'right_index2',
 60 |     'right_index3',
 61 |     'right_middle1',
 62 |     'right_middle2',
 63 |     'right_middle3',
 64 |     'right_pinky1',
 65 |     'right_pinky2',
 66 |     'right_pinky3',
 67 |     'right_ring1',
 68 |     'right_ring2',
 69 |     'right_ring3',
 70 |     'right_thumb1',
 71 |     'right_thumb2',
 72 |     'right_thumb3',
 73 |     'nose',
 74 |     'right_eye',
 75 |     'left_eye',
 76 |     'right_ear',
 77 |     'left_ear',
 78 |     'left_big_toe',
 79 |     'left_small_toe',
 80 |     'left_heel',
 81 |     'right_big_toe',
 82 |     'right_small_toe',
 83 |     'right_heel',
 84 |     'left_thumb',
 85 |     'left_index',
 86 |     'left_middle',
 87 |     'left_ring',
 88 |     'left_pinky',
 89 |     'right_thumb',
 90 |     'right_index',
 91 |     'right_middle',
 92 |     'right_ring',
 93 |     'right_pinky',
 94 |     'right_eye_brow1',
 95 |     'right_eye_brow2',
 96 |     'right_eye_brow3',
 97 |     'right_eye_brow4',
 98 |     'right_eye_brow5',
 99 |     'left_eye_brow5',
100 |     'left_eye_brow4',
101 |     'left_eye_brow3',
102 |     'left_eye_brow2',
103 |     'left_eye_brow1',
104 |     'nose1',
105 |     'nose2',
106 |     'nose3',
107 |     'nose4',
108 |     'right_nose_2',
109 |     'right_nose_1',
110 |     'nose_middle',
111 |     'left_nose_1',
112 |     'left_nose_2',
113 |     'right_eye1',
114 |     'right_eye2',
115 |     'right_eye3',
116 |     'right_eye4',
117 |     'right_eye5',
118 |     'right_eye6',
119 |     'left_eye4',
120 |     'left_eye3',
121 |     'left_eye2',
122 |     'left_eye1',
123 |     'left_eye6',
124 |     'left_eye5',
125 |     'right_mouth_1',
126 |     'right_mouth_2',
127 |     'right_mouth_3',
128 |     'mouth_top',
129 |     'left_mouth_3',
130 |     'left_mouth_2',
131 |     'left_mouth_1',
132 |     'left_mouth_5',  # 59 in OpenPose output
133 |     'left_mouth_4',  # 58 in OpenPose output
134 |     'mouth_bottom',
135 |     'right_mouth_4',
136 |     'right_mouth_5',
137 |     'right_lip_1',
138 |     'right_lip_2',
139 |     'lip_top',
140 |     'left_lip_2',
141 |     'left_lip_1',
142 |     'left_lip_3',
143 |     'lip_bottom',
144 |     'right_lip_3',
145 |     # Face contour
146 |     'right_contour_1',
147 |     'right_contour_2',
148 |     'right_contour_3',
149 |     'right_contour_4',
150 |     'right_contour_5',
151 |     'right_contour_6',
152 |     'right_contour_7',
153 |     'right_contour_8',
154 |     'contour_middle',
155 |     'left_contour_8',
156 |     'left_contour_7',
157 |     'left_contour_6',
158 |     'left_contour_5',
159 |     'left_contour_4',
160 |     'left_contour_3',
161 |     'left_contour_2',
162 |     'left_contour_1',
163 | ]
164 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import numpy as np
  4 | from tqdm import tqdm
  5 | 
  6 | import torch
  7 | import torch.nn.functional as F
  8 | from torch.utils.data import DataLoader
  9 | import torchvision.transforms as transforms
 10 | 
 11 | from lib.core.config import cfg, update_config
 12 | from lib.models.model import HACO
 13 | from lib.utils.contact_utils import get_contact_thres
 14 | from lib.utils.train_utils import get_transform, worker_init_fn
 15 | from lib.utils.eval_utils import evaluation
 16 | 
 17 | 
 18 | parser = argparse.ArgumentParser(description='Test HACO')
 19 | parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model')
 20 | parser.add_argument('--test_name', type=str, default='MOW', help='dataset name for evaluation')
 21 | parser.add_argument('--checkpoint', type=str, default='', help='model path for evaluation')
 22 | args = parser.parse_args()
 23 | 
 24 | 
 25 | # Import dataset
 26 | exec(f'from data.{args.test_name}.dataset import {args.test_name}')
 27 | 
 28 | 
 29 | # Set device as CUDA
 30 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 31 | torch.set_num_threads(cfg.DATASET.workers) # Limit Torch
 32 | os.environ["OMP_NUM_THREADS"] = "4" # Limit OpenMP (NumPy, MKL)
 33 | os.environ["MKL_NUM_THREADS"] = "4" # Limit MKL operations
 34 | 
 35 | 
 36 | # Initialize directories
 37 | experiment_dir = f'experiments_test_{args.test_name.lower()}'
 38 | checkpoint_dir = os.path.join(experiment_dir, 'full', 'checkpoints')
 39 | os.makedirs(checkpoint_dir, exist_ok=True)
 40 | 
 41 | 
 42 | # Load config
 43 | update_config(backbone_type=args.backbone, test_name=args.test_name, exp_dir=experiment_dir)
 44 | 
 45 | 
 46 | ############## Dataset ###############
 47 | transform = get_transform(args.backbone)
 48 | 
 49 | test_dataset = eval(f'{cfg.DATASET.test_name}')(transform, 'test')
 50 | ############## Dataset ###############
 51 | 
 52 | 
 53 | ############# Dataloader #############
 54 | test_dataloader = DataLoader(test_dataset, batch_size=cfg.TEST.batch, shuffle=False, num_workers=cfg.DATASET.workers, pin_memory=True, drop_last=False, worker_init_fn=worker_init_fn)
 55 | ############# Dataloader #############
 56 | 
 57 | 
 58 | from lib.core.config import logger
 59 | logger.info(f"# of test samples: {len(test_dataset)}")
 60 | 
 61 | 
 62 | ############# Model #############
 63 | model = HACO().to(device)
 64 | model.eval()
 65 | ############# Model #############
 66 | 
 67 | 
 68 | # Load model checkpoint if provided
 69 | if args.checkpoint:
 70 |     checkpoint = torch.load(args.checkpoint, map_location=device)
 71 |     model.load_state_dict(checkpoint['state_dict'])
 72 | 
 73 | 
 74 | ############################### Test Loop ###############################
 75 | eval_result = {
 76 |     'cont_pre': [None for _ in range(len(test_dataset))],
 77 |     'cont_rec': [None for _ in range(len(test_dataset))],
 78 |     'cont_f1': [None for _ in range(len(test_dataset))],
 79 |     }
 80 | 
 81 | test_iterator = tqdm(enumerate(test_dataloader), total=len(test_dataloader), leave=False)
 82 | model.eval()
 83 | 
 84 | 
 85 | for idx, data in test_iterator:
 86 |     ############# Run model #############
 87 |     with torch.no_grad():
 88 |         outputs = model({'input': data['input_data'], 'target': data['targets_data'], 'meta_info': data['meta_info']}, mode="test")
 89 |     ############# Run model #############
 90 | 
 91 | 
 92 |     ############## Evaluation ###############
 93 |     # Compute evaluation metrics
 94 |     eval_thres = get_contact_thres(args.backbone)
 95 |     eval_out = evaluation(outputs, data['targets_data'], data['meta_info'], mode='test', thres=eval_thres)
 96 |     for key in [*eval_out]:
 97 |         eval_result[key][idx] = eval_out[key]
 98 | 
 99 |     # Hand Contact Estimator (HCE)
100 |     total_cont_pre = np.mean([x if x is not None else 0.0 for x in eval_result['cont_pre'][:idx+1]])
101 |     total_cont_rec = np.mean([x if x is not None else 0.0 for x in eval_result['cont_rec'][:idx+1]])
102 |     total_cont_f1  = np.mean([x if x is not None else 0.0 for x in eval_result['cont_f1'][:idx+1]])
103 |     ############## Evaluation ###############
104 | 
105 | 
106 |     logger.info(f"C-Pre: {total_cont_pre:.3f} | C-Rec: {total_cont_rec:.3f} | C-F1: {total_cont_f1:.3f}")
107 | ############################### Test Loop ###############################
108 | 
109 | 
110 | logger.info('Test finished!!!!')
111 | logger.info(f"Final Results --- C-Pre: {total_cont_pre:.3f} | C-Rec: {total_cont_rec:.3f} | C-F1: {total_cont_f1:.3f}")


--------------------------------------------------------------------------------
/lib/models/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from lib.core.config import cfg
  6 | 
  7 | 
  8 | 
  9 | class HACO(nn.Module):
 10 |     def __init__(self):
 11 |         super(HACO, self).__init__()
 12 |         if torch.cuda.is_available():
 13 |             self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 14 |             self.to(self.device)
 15 | 
 16 |         # Load modules
 17 |         self.backbone = get_backbone_network(type=cfg.MODEL.backbone_type)
 18 |         self.decoder = get_decoder_network(type=cfg.MODEL.backbone_type)
 19 | 
 20 |     def forward(self, inputs, mode='test'):
 21 |         image = inputs['input']['image'].to(self.device)
 22 | 
 23 |         if 'vit' in cfg.MODEL.backbone_type:
 24 |             image = F.interpolate(image, size=(224, 224), mode='bilinear', align_corners=False)
 25 |         
 26 |         img_feat = self.backbone(image)
 27 |         contact_out, contact_336_out, contact_84_out, contact_joint_out = self.decoder(img_feat)
 28 |         
 29 |         return dict(contact_out=contact_out, contact_336_out=contact_336_out, contact_84_out=contact_84_out, contact_joint_out=contact_joint_out)
 30 | 
 31 | 
 32 | 
 33 | def get_backbone_network(type='hamer'):
 34 |     if type in ['hamer']:
 35 |         from lib.models.backbone.backbone_hamer_style import ViT_HaMeR
 36 |         backbone = ViT_HaMeR()
 37 |         checkpoint = torch.load(cfg.MODEL.hamer_backbone_pretrained_path, map_location='cuda')['state_dict']
 38 |         filtered_state_dict = {k[len("backbone."):]: v for k, v in checkpoint.items() if k.startswith("backbone.")}
 39 |         backbone.load_state_dict(filtered_state_dict)
 40 |     elif type in ['resnet-18']:
 41 |         from lib.models.backbone.resnet import ResNetBackbone
 42 |         backbone = ResNetBackbone(18) # ResNet
 43 |         backbone.init_weights()
 44 |     elif type in ['resnet-34']:
 45 |         from lib.models.backbone.resnet import ResNetBackbone
 46 |         backbone = ResNetBackbone(34) # ResNet
 47 |         backbone.init_weights()
 48 |     elif type in ['resnet-50']:
 49 |         from lib.models.backbone.resnet import ResNetBackbone
 50 |         backbone = ResNetBackbone(50) # ResNet
 51 |         backbone.init_weights()
 52 |     elif type in ['resnet-101']:
 53 |         from lib.models.backbone.resnet import ResNetBackbone
 54 |         backbone = ResNetBackbone(101) # ResNet
 55 |         backbone.init_weights()
 56 |     elif type in ['resnet-152']:
 57 |         from lib.models.backbone.resnet import ResNetBackbone
 58 |         backbone = ResNetBackbone(152) # ResNet
 59 |         backbone.init_weights()
 60 |     elif type in ['hrnet-w32']:
 61 |         from lib.models.backbone.hrnet import HighResolutionNet
 62 |         from lib.utils.func_utils import load_config
 63 |         config = load_config(cfg.MODEL.hrnet_w32_backbone_config_path)
 64 |         pretrained = cfg.MODEL.hrnet_w32_backbone_pretrained_path
 65 |         backbone = HighResolutionNet(config)
 66 |         backbone.init_weights(pretrained=pretrained)
 67 |     elif type in ['hrnet-w48']:
 68 |         from lib.models.backbone.hrnet import HighResolutionNet
 69 |         from lib.utils.func_utils import load_config
 70 |         config = load_config(cfg.MODEL.hrnet_w48_backbone_config_path)
 71 |         pretrained = cfg.MODEL.hrnet_w48_backbone_pretrained_path
 72 |         backbone = HighResolutionNet(config)
 73 |         backbone.init_weights(pretrained=pretrained)
 74 |     elif type in ['handoccnet']:
 75 |         from lib.models.backbone.fpn import FPN
 76 |         backbone = FPN(pretrained=False)
 77 |         pretrained = cfg.MODEL.handoccnet_backbone_pretrained_path
 78 |         state_dict = {k[len('module.backbone.'):]: v for k, v in torch.load(pretrained)['network'].items() if k.startswith('module.backbone.')}
 79 |         backbone.load_state_dict(state_dict, strict=True)
 80 |     elif type in ['vit-s-16']:
 81 |         from lib.models.backbone.vit import ViTBackbone
 82 |         backbone = ViTBackbone(model_name='vit_small_patch16_224', pretrained=True)
 83 |     elif type in ['vit-b-16']:
 84 |         from lib.models.backbone.vit import ViTBackbone
 85 |         backbone = ViTBackbone(model_name='vit_base_patch16_224', pretrained=True)
 86 |     elif type in ['vit-l-16']:
 87 |         from lib.models.backbone.vit import ViTBackbone
 88 |         backbone = ViTBackbone(model_name='vit_large_patch16_224', pretrained=True)
 89 |     else:
 90 |         raise NotImplementedError
 91 | 
 92 |     return backbone
 93 | 
 94 | 
 95 | 
 96 | def get_decoder_network(type='hamer'):
 97 |     from lib.models.decoder.decoder_hamer_style import ContactTransformerDecoderHead
 98 |     decoder = ContactTransformerDecoderHead()
 99 | 
100 |     return decoder


--------------------------------------------------------------------------------
/data/PROX/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import json
  4 | import pickle
  5 | import numpy as np
  6 | 
  7 | import torch
  8 | from torch.utils.data import Dataset
  9 | from torchvision.transforms import Normalize
 10 | 
 11 | from lib.core.config import cfg
 12 | from lib.utils.human_models import mano
 13 | from lib.utils.func_utils import load_img
 14 | from lib.utils.preprocessing import augmentation_contact
 15 | 
 16 | 
 17 | 
 18 | # Main dataloader code for PROX dataset
 19 | class PROX(Dataset):
 20 |     def __init__(self, transform, data_split):
 21 |         super(PROX, self).__init__()
 22 |         self.__dict__.update(locals())
 23 | 
 24 |         self.transform = transform
 25 |         dataset_name = 'prox'
 26 | 
 27 |         self.data_split = data_split
 28 |         self.root_path = root_path = os.path.join('data', 'PROX')
 29 |         self.data_dir = os.path.join(self.root_path, 'data')
 30 | 
 31 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 32 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 33 |         os.makedirs(self.annot_data_path, exist_ok=True)
 34 |         os.makedirs(self.contact_data_path, exist_ok=True)
 35 | 
 36 |         # SMPL-X to MANO mapping
 37 |         smplx_mano_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smplx_to_mano.pkl')
 38 | 
 39 |         with open(smplx_mano_mapping_path, 'rb') as f:
 40 |             self.smplx_to_mano_mapping = pickle.load(f)
 41 |             self.smplx_to_mano_mapping_r = self.smplx_to_mano_mapping["right_hand"]
 42 | 
 43 |         # Camera
 44 |         with open(os.path.join(self.data_dir, 'quantitative/calibration/Color.json'), 'r') as f:
 45 |             calibration = json.load(f)
 46 |             self.cam_param = {'focal': calibration['f'], 'princpt': calibration['c']}
 47 | 
 48 |         # Organize db id based on split
 49 |         db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 50 |         with open(db_split_path, 'r') as f:
 51 |             self.db = json.load(f)
 52 | 
 53 | 
 54 |     def __len__(self):
 55 |         return len(self.db)
 56 | 
 57 | 
 58 |     def __getitem__(self, index):
 59 |         aid = self.db[index]
 60 |         seq_name = aid.split('/')[-3]
 61 |         img_name = aid.split('/')[-1]
 62 |         sample_id = f'{seq_name}-{img_name}'
 63 | 
 64 |         orig_img_path = os.path.join(os.path.join(self.data_dir, 'quantitative', 'recordings', seq_name, 'Color', f'{img_name}.jpg'))
 65 |         
 66 |         orig_img = load_img(orig_img_path)
 67 |         orig_img = cv2.flip(orig_img, 1) # only for PROX dataset
 68 |         img_shape = orig_img.shape[:2]
 69 |         img_h, img_w = img_shape
 70 | 
 71 |         mano_valid = np.ones((1), dtype=np.float32)
 72 | 
 73 | 
 74 |         ################################## LOAD ANNOTATION DATA #####################################
 75 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
 76 |         
 77 |         annot_data = np.load(annot_data_path, allow_pickle=True)
 78 |         bbox_hand_r = annot_data['bbox_ho']
 79 | 
 80 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
 81 |         contact_data = dict(contact_h=contact_h)
 82 |         ################################## LOAD ANNOTATION DATA #####################################
 83 |         
 84 | 
 85 |         ############################### PROCESS CROP AND AUGMENTATION ################################
 86 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
 87 |         crop_img = img.copy()  
 88 | 
 89 |         # Transform for 3D HMR
 90 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
 91 |             img = self.transform(img.astype(np.float32)/255.0)
 92 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
 93 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
 94 |             img = img.transpose(2, 0, 1) / 255.0
 95 |             img = normalize_img(torch.from_numpy(img)).float()
 96 |         else:
 97 |             raise NotImplementedError
 98 |         ############################### PROCESS CROP AND AUGMENTATION ################################
 99 | 
100 | 
101 |         input_data = dict(image=img)
102 |         targets_data = dict(contact_data=contact_data)
103 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
104 |         
105 | 
106 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/lib/utils/mano_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2017 Javier Romero, Dimitrios Tzionas, Michael J Black and the Max Planck Gesellschaft.  All rights reserved.
  3 | This software is provided for research purposes only.
  4 | By using this software you agree to the terms of the MANO/SMPL+H Model license here http://mano.is.tue.mpg.de/license
  5 | 
  6 | More information about MANO/SMPL+H is available at http://mano.is.tue.mpg.de.
  7 | For comments or questions, please email us at: mano@tue.mpg.de
  8 | 
  9 | 
 10 | About this file:
 11 | ================
 12 | This file defines a wrapper for the loading functions of the MANO model.
 13 | 
 14 | Modules included:
 15 | - load_model:
 16 |   loads the MANO model from a given file location (i.e. a .pkl file location),
 17 |   or a dictionary object.
 18 | 
 19 | '''
 20 | import os
 21 | import cv2
 22 | import torch
 23 | import numpy as np
 24 | import pickle
 25 | import chumpy as ch
 26 | from chumpy.ch import MatVecMult
 27 | 
 28 | 
 29 | class Rodrigues(ch.Ch):
 30 |     dterms = 'rt'
 31 | 
 32 |     def compute_r(self):
 33 |         return cv2.Rodrigues(self.rt.r)[0]
 34 | 
 35 |     def compute_dr_wrt(self, wrt):
 36 |         if wrt is self.rt:
 37 |             return cv2.Rodrigues(self.rt.r)[1].T
 38 | 
 39 | 
 40 | def lrotmin(p):
 41 |     if isinstance(p, np.ndarray):
 42 |         p = p.ravel()[3:]
 43 |         return np.concatenate(
 44 |             [(cv2.Rodrigues(np.array(pp))[0] - np.eye(3)).ravel()
 45 |              for pp in p.reshape((-1, 3))]).ravel()
 46 |     if p.ndim != 2 or p.shape[1] != 3:
 47 |         p = p.reshape((-1, 3))
 48 |     p = p[1:]
 49 |     return ch.concatenate([(Rodrigues(pp) - ch.eye(3)).ravel()
 50 |                            for pp in p]).ravel()
 51 | 
 52 | 
 53 | def posemap(s):
 54 |     if s == 'lrotmin':
 55 |         return lrotmin
 56 |     else:
 57 |         raise Exception('Unknown posemapping: %s' % (str(s), ))
 58 | 
 59 | 
 60 | def ready_arguments(fname_or_dict, posekey4vposed='pose'):
 61 |     if not isinstance(fname_or_dict, dict):
 62 |         dd = pickle.load(open(fname_or_dict, 'rb'), encoding='latin1')
 63 |     else:
 64 |         dd = fname_or_dict
 65 | 
 66 |     want_shapemodel = 'shapedirs' in dd
 67 |     nposeparms = dd['kintree_table'].shape[1] * 3
 68 | 
 69 |     if 'trans' not in dd:
 70 |         dd['trans'] = np.zeros(3)
 71 |     if 'pose' not in dd:
 72 |         dd['pose'] = np.zeros(nposeparms)
 73 |     if 'shapedirs' in dd and 'betas' not in dd:
 74 |         dd['betas'] = np.zeros(dd['shapedirs'].shape[-1])
 75 | 
 76 |     for s in [
 77 |             'v_template', 'weights', 'posedirs', 'pose', 'trans', 'shapedirs',
 78 |             'betas', 'J'
 79 |     ]:
 80 |         if (s in dd) and not hasattr(dd[s], 'dterms'):
 81 |             dd[s] = ch.array(dd[s])
 82 | 
 83 |     assert (posekey4vposed in dd)
 84 |     if want_shapemodel:
 85 |         dd['v_shaped'] = dd['shapedirs'].dot(dd['betas']) + dd['v_template']
 86 |         v_shaped = dd['v_shaped']
 87 |         J_tmpx = MatVecMult(dd['J_regressor'], v_shaped[:, 0])
 88 |         J_tmpy = MatVecMult(dd['J_regressor'], v_shaped[:, 1])
 89 |         J_tmpz = MatVecMult(dd['J_regressor'], v_shaped[:, 2])
 90 |         dd['J'] = ch.vstack((J_tmpx, J_tmpy, J_tmpz)).T
 91 |         pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed])
 92 |         dd['v_posed'] = v_shaped + dd['posedirs'].dot(pose_map_res)
 93 |     else:
 94 |         pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed])
 95 |         dd_add = dd['posedirs'].dot(pose_map_res)
 96 |         dd['v_posed'] = dd['v_template'] + dd_add
 97 | 
 98 |     return dd
 99 | 
100 | 
101 | 
102 | def get_mano_pca_basis(ncomps=45, use_pca=True, side='right', mano_root='data/base_data/human_models/mano'):
103 |     if use_pca:
104 |         ncomps = ncomps
105 |     else:
106 |         ncomps = 45
107 | 
108 |     if side == 'right':
109 |         mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl')
110 |     elif side == 'left':
111 |         mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl')
112 |     smpl_data = ready_arguments(mano_path)
113 |     hands_components = smpl_data['hands_components']
114 |     selected_components = hands_components[:ncomps]
115 |     th_selected_comps = selected_components
116 | 
117 |     return torch.tensor(th_selected_comps, dtype=torch.float32)
118 | 
119 | 
120 | 
121 | def change_flat_hand_mean(hand_pose, remove=True, side='right', mano_root='data/base_data/human_models/mano'):
122 |     if side == 'right':
123 |         mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl')
124 |     elif side == 'left':
125 |         mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl')
126 |     smpl_data = ready_arguments(mano_path)
127 | 
128 |     # Get hand mean
129 |     hands_mean = smpl_data['hands_mean']
130 |     hands_mean = hands_mean.copy() # hands_mean: (45)
131 | 
132 |     if remove:
133 |         hand_pose[3:] = hand_pose[3:] - hands_mean
134 |     else:
135 |         hand_pose[3:] = hand_pose[3:] + hands_mean
136 |     return hand_pose


--------------------------------------------------------------------------------
/lib/utils/demo_utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from collections import defaultdict, deque
  4 | 
  5 | import mediapipe as mp
  6 | 
  7 | 
  8 | from lib.utils.vis_utils import draw_landmarks_on_image, draw_landmarks_on_image_simple
  9 | 
 10 | 
 11 | def smooth_bbox(prev_bbox, curr_bbox, alpha=0.8):
 12 |     if prev_bbox is None:
 13 |         return curr_bbox
 14 |     return [alpha * p + (1 - alpha) * c for p, c in zip(prev_bbox, curr_bbox)]
 15 | 
 16 | 
 17 | def smooth_contact_mask(prev_mask, curr_mask, alpha=0.8):
 18 |     if prev_mask is None:
 19 |         return curr_mask.astype(np.float32)
 20 |     return alpha * prev_mask + (1 - alpha) * curr_mask.astype(np.float32)
 21 | 
 22 | 
 23 | def remove_small_contact_components(contact_mask, faces, min_size=20):
 24 |     vertex_to_faces = defaultdict(list)
 25 |     for i, f in enumerate(faces):
 26 |         for v in f:
 27 |             vertex_to_faces[v].append(i)
 28 | 
 29 |     visited = np.zeros(len(contact_mask), dtype=bool)
 30 |     filtered_mask = np.zeros_like(contact_mask, dtype=bool)
 31 | 
 32 |     for v in range(len(contact_mask)):
 33 |         if visited[v] or not contact_mask[v]:
 34 |             continue
 35 | 
 36 |         queue = deque([v])
 37 |         component = []
 38 |         while queue:
 39 |             curr = queue.popleft()
 40 |             if visited[curr] or not contact_mask[curr]:
 41 |                 continue
 42 |             visited[curr] = True
 43 |             component.append(curr)
 44 |             for f_idx in vertex_to_faces[curr]:
 45 |                 for neighbor in faces[f_idx]:
 46 |                     if not visited[neighbor] and contact_mask[neighbor]:
 47 |                         queue.append(neighbor)
 48 | 
 49 |         if len(component) >= min_size:
 50 |             filtered_mask[component] = True
 51 | 
 52 |     return filtered_mask
 53 | 
 54 | 
 55 | def initialize_video_writer(output_path, fps, frame_size):
 56 |     tried_codecs = ['avc1', 'H264', 'X264', 'MJPG', 'mp4v'] # we recommend using 'MJPG'
 57 |     for codec in tried_codecs:
 58 |         fourcc = cv2.VideoWriter_fourcc(*codec)
 59 |         writer = cv2.VideoWriter(output_path, fourcc, fps, frame_size)
 60 |         if writer.isOpened():
 61 |             print(f"Using codec '{codec}' for {output_path}")
 62 |             return writer
 63 |         writer.release()
 64 |     raise RuntimeError(f"Failed to initialize VideoWriter for {output_path}")
 65 | 
 66 | 
 67 | def run_wilor_hand_detector(orig_img, detector):
 68 |     conf = 0.3
 69 |     IoU_threshold = 0.3
 70 | 
 71 |     detections = detector(orig_img, conf=conf, verbose=False, iou=IoU_threshold)[0]
 72 | 
 73 |     img_h, img_w, _ = orig_img.shape
 74 | 
 75 |     right_hand_bbox = [0, 0, img_w, img_h] # [x_min_expand, y_min_expand, bb_width_expand, bb_height_expand]
 76 |     best_conf = 0.
 77 | 
 78 |     # Find the most confident right hand
 79 |     for det in detections: 
 80 |         Bbox = det.boxes.data.cpu().detach().squeeze().numpy()
 81 |         Conf = det.boxes.conf.data.cpu().detach()[0].numpy().reshape(-1).astype(np.float16)
 82 |         Side = det.boxes.cls.data.cpu().detach()
 83 | 
 84 |         if (Side.item() == 1.) and (Conf.item() > best_conf):
 85 |             right_hand_bbox = [Bbox[0], Bbox[1], Bbox[2]-Bbox[0], Bbox[3]-Bbox[1]]
 86 |     
 87 |     return right_hand_bbox
 88 | 
 89 | 
 90 | def extract_frames_with_hand(cap, detector, detector_type='wilor'):
 91 |     frames_with_hand = []
 92 |     frame_idx = 0
 93 | 
 94 |     while cap.isOpened():
 95 |         ret, frame = cap.read()
 96 |         if not ret:
 97 |             break
 98 | 
 99 |         orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
100 |         if detector_type == 'wilor':
101 |             right_hand_bbox = run_wilor_hand_detector(orig_img, detector)
102 |             _, right_hand_bbox = draw_landmarks_on_image_simple(orig_img.copy(), right_hand_bbox)
103 |         elif detector_type == 'mediapipe':
104 |             mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=orig_img)
105 |             detection_result = detector.detect(mp_image)
106 |             _, right_hand_bbox = draw_landmarks_on_image(orig_img.copy(), detection_result)
107 | 
108 |         if right_hand_bbox is not None:
109 |             frames_with_hand.append((frame_idx, frame, right_hand_bbox))
110 | 
111 |         frame_idx += 1
112 | 
113 |     cap.release()
114 |     return frames_with_hand
115 | 
116 | 
117 | def find_longest_continuous_segment(frames_with_hand):
118 |     longest_segment = []
119 |     current_segment = []
120 | 
121 |     for i in range(len(frames_with_hand)):
122 |         if i == 0 or frames_with_hand[i][0] == frames_with_hand[i - 1][0] + 1:
123 |             current_segment.append(frames_with_hand[i])
124 |         else:
125 |             if len(current_segment) > len(longest_segment):
126 |                 longest_segment = current_segment
127 |             current_segment = [frames_with_hand[i]]
128 | 
129 |     if len(current_segment) > len(longest_segment):
130 |         longest_segment = current_segment
131 | 
132 |     return longest_segment


--------------------------------------------------------------------------------
/data/HIC/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | from torchvision.transforms import Normalize
  8 | 
  9 | from lib.core.config import cfg
 10 | from lib.utils.human_models import mano
 11 | from lib.utils.func_utils import load_img
 12 | from lib.utils.preprocessing import augmentation_contact
 13 | 
 14 | 
 15 | 
 16 | # Main dataloader code for HIC dataset
 17 | class HIC(Dataset):
 18 |     def __init__(self, transform, data_split):
 19 |         super(HIC, self).__init__()
 20 |         self.__dict__.update(locals())
 21 | 
 22 |         self.transform = transform
 23 |         dataset_name = 'hic'
 24 | 
 25 |         self.data_split = data_split
 26 |         self.root_path = root_path = os.path.join('data', 'HIC')
 27 |         self.data_dir = os.path.join(self.root_path, 'data')
 28 | 
 29 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 30 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 31 | 
 32 |         # inter: 01, 02, 03, 04, 05, 06, 07 | single: 08, 09, 10, 11 | Single-HOI: 15, 19, 20, 21 | Inter-HOI: 16, 17, 18
 33 |         self.inter_seq_names = ['01', '02', '03', '04', '05', '06', '07']
 34 |         self.single_seq_names = ['08', '09', '10', '11']
 35 |         self.single_hoi_seq_names = ['15', '19', '20', '21']
 36 |         self.inter_hoi_seq_names = ['16', '17', '18']
 37 | 
 38 |         # Split train/test set (we only use inter seq)
 39 |         self.train_seq_names = ['01', '02', '03', '04', '05', '06']
 40 |         self.test_seq_names = ['07']
 41 | 
 42 |         self.cam_param = {'focal': [525.0, 525.0], 'princpt': [319.5, 239.5]} # this is fixed for HIC dataset
 43 | 
 44 |         # Organize db id based on split
 45 |         db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 46 |         with open(db_split_path, 'r') as f:
 47 |             self.db = json.load(f)
 48 | 
 49 | 
 50 |     def __len__(self):
 51 |         return len(self.db['images'])
 52 | 
 53 | 
 54 |     def __getitem__(self, index):
 55 |         images_info = self.db['images'][index]
 56 |         annotations = self.db['annotations'][index]
 57 |         
 58 |         aid = annotations['id']
 59 |         image_id = annotations['image_id']
 60 |         seq_name = images_info['seq_name']
 61 |         file_name = images_info['file_name']
 62 |         img_w, img_h = images_info['width'], images_info['height']
 63 | 
 64 |         bbox = annotations['bbox']
 65 |         hand_type = annotations['hand_type']
 66 |         right_mano_path = annotations['right_mano_path']
 67 |         left_mano_path = annotations['left_mano_path']
 68 | 
 69 |         sample_id = image_id
 70 | 
 71 |         # Load image
 72 |         orig_img_path = os.path.join(self.data_dir, file_name)
 73 |         orig_img = load_img(orig_img_path)
 74 |         img_shape = orig_img.shape[:2]
 75 |         img_h, img_w = img_shape
 76 | 
 77 |         mano_valid = np.ones((1), dtype=np.float32)
 78 | 
 79 | 
 80 |         ################################## LOAD ANNOTATION DATA #####################################
 81 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
 82 | 
 83 |         annot_data = np.load(annot_data_path, allow_pickle=True)
 84 |         bbox_hand_r = annot_data['bbox_hand']
 85 |         cam_param = annot_data['cam_param']
 86 | 
 87 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
 88 |         contact_data = dict(contact_h=contact_h)
 89 |         ################################## LOAD ANNOTATION DATA #####################################
 90 | 
 91 |         
 92 |         ############################### PROCESS CROP AND AUGMENTATION ################################
 93 |         img, img2bb_trans, bb2img_trans, rot, do_flip, _ = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
 94 |         crop_img = img.copy()
 95 | 
 96 |         # Transform for 3D HMR
 97 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
 98 |             img = self.transform(img.astype(np.float32)/255.0)
 99 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
100 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
101 |             img = img.transpose(2, 0, 1) / 255.0
102 |             img = normalize_img(torch.from_numpy(img)).float()
103 |         else:
104 |             raise NotImplementedError
105 |         ############################### PROCESS CROP AND AUGMENTATION ################################
106 | 
107 | 
108 |         input_data = dict(image=img)
109 |         targets_data = dict(contact_data=contact_data)
110 |         meta_info = dict(sample_id=str(sample_id), mano_valid=mano_valid)
111 | 
112 | 
113 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/layers/ycb_layer.py:
--------------------------------------------------------------------------------
  1 | # DexYCB Toolkit
  2 | # Copyright (C) 2021 NVIDIA Corporation
  3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
  4 | 
  5 | """Layer to transform YCB mesh vertices with SE3 transformation."""
  6 | 
  7 | import os
  8 | import torch
  9 | 
 10 | from torch.nn import Module
 11 | 
 12 | from ..obj import OBJ
 13 | 
 14 | 
 15 | class YCBLayer(Module):
 16 |   """Layer to transform YCB mesh vertices with SE3 transformation."""
 17 | 
 18 |   def __init__(self, i):
 19 |     """Constructor.
 20 | 
 21 |     Args:
 22 |       i: YCB object index.
 23 |     """
 24 |     super(YCBLayer, self).__init__()
 25 | 
 26 |     assert 'DEX_YCB_DIR' in os.environ, "environment variable 'DEX_YCB_DIR' is not set"
 27 |     self._path = os.environ['DEX_YCB_DIR'] + "/models"
 28 |     self._classes = ('__background__', '002_master_chef_can', '003_cracker_box',
 29 |                      '004_sugar_box', '005_tomato_soup_can',
 30 |                      '006_mustard_bottle', '007_tuna_fish_can',
 31 |                      '008_pudding_box', '009_gelatin_box',
 32 |                      '010_potted_meat_can', '011_banana', '019_pitcher_base',
 33 |                      '021_bleach_cleanser', '024_bowl', '025_mug',
 34 |                      '035_power_drill', '036_wood_block', '037_scissors',
 35 |                      '040_large_marker', '051_large_clamp',
 36 |                      '052_extra_large_clamp', '061_foam_brick')
 37 |     self._class_name = self._classes[i]
 38 |     self._obj_file = self._path + '/' + self._class_name + "/textured_simple.obj"
 39 |     self._obj = OBJ(self._obj_file)
 40 |     assert len(self._obj.mesh_list) == 1
 41 |     assert len(self._obj.mesh_list[0].groups) == 1
 42 |     g = self._obj.mesh_list[0].groups[0]
 43 | 
 44 |     self._material = g.material
 45 |     self._tex_coords = self._obj.t[g.f_t]
 46 | 
 47 |     v = torch.from_numpy(self._obj.v).t()
 48 |     n = torch.from_numpy(self._obj.n).t()
 49 |     assert (g.f_v == g.f_n).all()
 50 |     f = torch.from_numpy(g.f_v).view((-1, 3))
 51 |     self.register_buffer('v', v)
 52 |     self.register_buffer('n', n)
 53 |     self.register_buffer('f', f)
 54 | 
 55 |   @property
 56 |   def obj_file(self):
 57 |     return self._obj_file
 58 | 
 59 |   @property
 60 |   def material(self):
 61 |     return self._material
 62 | 
 63 |   @property
 64 |   def tex_coords(self):
 65 |     return self._tex_coords
 66 | 
 67 |   def forward(self, r, t):
 68 |     """Forward function.
 69 | 
 70 |     Args:
 71 |       r: A tensor of shape [B, 3] containing the rotation in axis-angle.
 72 |       t: A tensor of shape [B, 3] containing the translation.
 73 | 
 74 |     Returns:
 75 |       v: A tensor of shape [B, N, 3] containing the transformed vertices.
 76 |       n: A tensor of shape [B, N, 3] containing the transformed normals.
 77 |     """
 78 |     R = rv2dcm(r)
 79 |     v = torch.matmul(R, self.v).permute(0, 2, 1) + t.unsqueeze(1)
 80 |     n = torch.matmul(R, self.n).permute(0, 2, 1)
 81 |     return v, n
 82 | 
 83 | 
 84 | # https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula#Matrix_notation
 85 | def rv2dcm(rv):
 86 |   """Converts rotation vectors to direction cosine matrices.
 87 | 
 88 |   Args:
 89 |     rv: A tensor of shape [B, 3] containing the rotation vectors.
 90 | 
 91 |   Returns:
 92 |     A tensor of shape [B, 3, 3] containing the direction cosine matrices.
 93 |   """
 94 |   angle = torch.norm(rv + 1e-8, p=2, dim=1)
 95 |   axis = rv / angle.unsqueeze(1)
 96 |   s = torch.sin(angle).unsqueeze(1).unsqueeze(2)
 97 |   c = torch.cos(angle).unsqueeze(1).unsqueeze(2)
 98 |   I = torch.eye(3, device=rv.device).expand(rv.size(0), -1, -1)
 99 |   z = torch.zeros_like(angle)
100 |   K = torch.stack(
101 |       (torch.stack((z, -axis[:, 2], axis[:, 1]),
102 |                    dim=1), torch.stack((axis[:, 2], z, -axis[:, 0]), dim=1),
103 |        torch.stack((-axis[:, 1], axis[:, 0], z), dim=1)),
104 |       dim=1)
105 |   dcm = I + s * K + (1 - c) * torch.bmm(K, K)
106 |   return dcm
107 | 
108 | 
109 | # https://en.wikipedia.org/wiki/Rotation_formalisms_in_three_dimensions#Rotation_matrix_%E2%86%94_Euler_axis/angle
110 | # https://github.com/kashif/ceres-solver/blob/087462a90dd1c23ac443501f3314d0fcedaea5f7/include/ceres/rotation.h#L178
111 | # S. Sarabandi and F. Thomas. A Survey on the Computation of Quaternions from Rotation Matrices. J MECH ROBOT, 2019.
112 | # https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula
113 | def dcm2rv(dcm):
114 |   """Converts direction cosine matrices to rotation vectors.
115 | 
116 |   Args:
117 |     dcm: A tensor of shape [B, 3, 3] containing the direction cosine matrices.
118 | 
119 |   Returns:
120 |     A tensor of shape [B, 3] containing the rotation vectors.
121 |   """
122 |   X = torch.stack((dcm[:, 2, 1] - dcm[:, 1, 2], dcm[:, 0, 2] - dcm[:, 2, 0],
123 |                    dcm[:, 1, 0] - dcm[:, 0, 1]),
124 |                   dim=1)
125 |   s = torch.norm(X, p=2, dim=1) / 2
126 |   c = (dcm[:, 0, 0] + dcm[:, 1, 1] + dcm[:, 2, 2] - 1) / 2
127 |   c = torch.clamp(c, -1, 1)
128 |   angle = torch.atan2(s, c)
129 |   Y = torch.stack((dcm[:, 0, 0], dcm[:, 1, 1], dcm[:, 2, 2]), dim=1)
130 |   Y = torch.sqrt((Y - c.unsqueeze(1)) / (1 - c.unsqueeze(1)))
131 |   rv = torch.zeros((dcm.size(0), 3), device=dcm.device)
132 |   i1 = s > 1e-3
133 |   i2 = (s <= 1e-3) & (c > 0)
134 |   i3 = (s <= 1e-3) & (c < 0)
135 |   rv[i1] = angle[i1].unsqueeze(1) * X[i1] / (2 * s[i1].unsqueeze(1))
136 |   rv[i2] = X[i2] / 2
137 |   rv[i3] = angle[i3].unsqueeze(1) * torch.sign(X[i3]) * Y[i3]
138 |   return rv
139 | 


--------------------------------------------------------------------------------
/lib/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from scipy.spatial.transform import Rotation as R
  4 | 
  5 | 
  6 | def cam2pixel(cam_coord, f, c):
  7 |     x = cam_coord[:,0] / (cam_coord[:,2] + 1e-5) * f[0] + c[0]
  8 |     y = cam_coord[:,1] / (cam_coord[:,2] + 1e-5) * f[1] + c[1]
  9 |     z = cam_coord[:,2] + 1e-5
 10 |     return np.stack((x,y,z),1)
 11 | 
 12 | 
 13 | def world2cam(world_coord, R, t):
 14 |     cam_coord = np.dot(R, world_coord.transpose(1,0)).transpose(1,0) + t.reshape(1,3)
 15 |     return cam_coord
 16 | 
 17 | 
 18 | def transform_joint_to_other_db(src_joint, src_name, dst_name):
 19 |     src_joint_num = len(src_name)
 20 |     dst_joint_num = len(dst_name)
 21 | 
 22 |     new_joint = np.zeros(((dst_joint_num,) + src_joint.shape[1:]), dtype=np.float32)
 23 |     
 24 |     for src_idx in range(len(src_name)):
 25 |         name = src_name[src_idx]
 26 |         if name in dst_name:
 27 |             dst_idx = dst_name.index(name)
 28 |             new_joint[dst_idx] = src_joint[src_idx]
 29 | 
 30 |     return new_joint
 31 | 
 32 | 
 33 | def apply_homogeneous_transformation(vertices, transform_matrix):
 34 |     # Convert vertices to homogeneous coordinates (add a column of ones)
 35 |     num_verts = vertices.shape[0]
 36 |     verts_homogeneous = torch.cat([vertices, torch.ones((num_verts, 1), dtype=vertices.dtype, device=vertices.device)], dim=1)  # Shape (num_verts, 4)
 37 | 
 38 |     # Apply the homogeneous transformation
 39 |     transformed_homogeneous = torch.matmul(transform_matrix, verts_homogeneous.T).T  # Shape (num_verts, 4)
 40 | 
 41 |     # Convert back to Cartesian coordinates (divide by the homogeneous component)
 42 |     transformed_vertices = transformed_homogeneous[:, :3] / transformed_homogeneous[:, 3][:, None]  # Shape (num_verts, 3)
 43 | 
 44 |     return transformed_vertices
 45 | 
 46 | 
 47 | def apply_homogeneous_transformation_np(vertices, transform_matrix):
 48 |     # Convert vertices to homogeneous coordinates (add a column of ones)
 49 |     num_verts = vertices.shape[0]
 50 |     verts_homogeneous = np.concatenate([vertices, np.ones((num_verts, 1), dtype=vertices.dtype)], axis=1)  # Shape (num_verts, 4)
 51 | 
 52 |     # Apply the homogeneous transformation
 53 |     transformed_homogeneous = np.dot(transform_matrix, verts_homogeneous.T).T  # Shape (num_verts, 4)
 54 | 
 55 |     # Convert back to Cartesian coordinates (divide by the homogeneous component)
 56 |     transformed_vertices = transformed_homogeneous[:, :3] / transformed_homogeneous[:, 3][:, None]  # Shape (num_verts, 3)
 57 | 
 58 |     return transformed_vertices
 59 | 
 60 | 
 61 | # Revert MANO global rotation and translation
 62 | def inv_mano_global_orient(mano_verts, mano_root, mano_global_orient, mano_trans):
 63 |     """
 64 |     Reverts the global orientation and translation applied to MANO vertices
 65 |     (i.e., transforms them from the global coordinate space back to a local space).
 66 |     
 67 |     Args:
 68 |         mano_verts (Tensor): shape (num_verts, 3), the MANO vertices.
 69 |         mano_joints (Tensor): shape (num_joints, 3), the MANO joint positions.
 70 |         mano_global_orient (Tensor): shape (3,), global orientation in axis-angle format.
 71 |         mano_trans (Tensor): shape (3,), global translation.
 72 | 
 73 |     Returns:
 74 |         vertices_transformed (Tensor): shape (num_verts, 3), the locally transformed vertices.
 75 |         transform_matrix (Tensor): shape (4, 4), the homogeneous transformation matrix
 76 |                                    that undoes the global transform.
 77 |         transform_matrix_inv (Tensor): shape (4, 4), the inverse of transform_matrix
 78 |                                        (i.e., the forward transform).
 79 |     """
 80 |     device = mano_verts.device
 81 | 
 82 |     # 1) Convert global orientation (axis-angle) -> rotation matrix
 83 |     R = axis_angle_to_rotation_matrix(mano_global_orient)   # shape (3, 3)
 84 | 
 85 |     # 2) Invert rotation matrix
 86 |     #    (for an orthonormal rotation, inverse is transpose)
 87 |     R_inv = invert_rotation_matrix(R)
 88 | 
 89 |     # 3) Identify the 'root' for the transform
 90 |     #    Typically 'Wrist' in MANO
 91 |     wrist_position = mano_root
 92 |     adjust_root    = wrist_position
 93 | 
 94 |     # 4) Build the matrix that undoes global transform (global -> local)
 95 |     transform_matrix = torch.eye(4, device=device)
 96 |     transform_matrix[:3, :3] = R_inv
 97 |     transform_matrix[:3, 3]  = (
 98 |         -torch.matmul(R_inv, adjust_root) 
 99 |         - mano_trans 
100 |         + wrist_position
101 |     )
102 | 
103 |     # 5) Apply transform_matrix to vertices
104 |     verts_hom = torch.cat(
105 |         [mano_verts, torch.ones((mano_verts.shape[0], 1), device=device)],
106 |         dim=1
107 |     )
108 |     vertices_transformed = (transform_matrix @ verts_hom.T).T[:, :3]
109 | 
110 |     # 6) Manually invert transform_matrix without torch.linalg.inv
111 |     #    
112 |     #    If T = [[A, b],
113 |     #            [0, 1]],
114 |     #    then T^-1 = [[A^-1, -A^-1 b],
115 |     #                 [0,    1   ]].
116 |     #
117 |     #    Here, A = R_inv, so A^-1 = R (the original rotation),
118 |     #    b = transform_matrix[:3, 3].
119 |     #
120 |     #    So T^-1[:3, :3] = R
121 |     #       T^-1[:3, 3]  = -R @ b
122 |     #
123 |     transform_matrix_inv = torch.eye(4, device=device)
124 |     transform_matrix_inv[:3, :3] = R  # because R is (R_inv)^-1
125 |     transform_matrix_inv[:3, 3]  = -R @ transform_matrix[:3, 3]
126 | 
127 |     return vertices_transformed, transform_matrix, transform_matrix_inv


--------------------------------------------------------------------------------
/data/H2O/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | from torchvision.transforms import Normalize
  8 | 
  9 | from lib.core.config import cfg
 10 | from lib.utils.human_models import mano
 11 | from lib.utils.func_utils import load_img
 12 | from lib.utils.preprocessing import augmentation_contact
 13 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 14 | 
 15 | 
 16 | def get_sample_id(split, index):
 17 |     aid = split[index]
 18 |     subject_name = aid.split('/')[0] + '_ego' # we only use ego split for H2O dataset
 19 |     seq_name = aid.split('/')[1]
 20 |     obj_id = aid.split('/')[2]
 21 |     cam_name = aid.split('/')[3]
 22 |     img_name = aid.split('/')[5]
 23 |     img_id = img_name.split('.png')[0]
 24 | 
 25 |     sample_id = f'{subject_name}-{seq_name}-{obj_id}-{cam_name}-{img_id}'
 26 |     return sample_id
 27 | 
 28 | 
 29 | 
 30 | # Main dataloader code for H2O dataset
 31 | class H2O(Dataset):
 32 |     def __init__(self, transform, data_split):
 33 |         super(H2O, self).__init__()
 34 |         self.__dict__.update(locals())
 35 | 
 36 |         self.transform = transform
 37 |         dataset_name = 'h2o'
 38 | 
 39 |         self.data_split = data_split
 40 |         self.root_path = root_path = os.path.join('data', 'H2O')
 41 |         self.data_dir = os.path.join(self.root_path, 'data')
 42 | 
 43 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 44 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 45 | 
 46 |         # Organize db id based on split
 47 |         db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 48 |         with open(db_split_path, 'r') as f:
 49 |             self.split = json.load(f)
 50 | 
 51 |         # Sort contact by difficulty (Balanced contact sampling)
 52 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 53 |             sample_id_to_split_id = {}
 54 |             for split_idx in range(len(self.split)):
 55 |                 each_sample_id = get_sample_id(self.split, split_idx)
 56 |                 if each_sample_id in sample_id_to_split_id:
 57 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 58 |                 else:
 59 |                     sample_id_to_split_id[each_sample_id] = self.split[split_idx]
 60 | 
 61 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 62 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path)
 63 | 
 64 |             new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list if key in [*sample_id_to_split_id]]
 65 |             self.split = new_split
 66 | 
 67 | 
 68 |     def __len__(self):
 69 |         return len(self.split)
 70 | 
 71 | 
 72 |     def __getitem__(self, index):
 73 |         aid = self.split[index]
 74 |         subject_name = aid.split('/')[0] + '_ego' # we only use ego split for H2O dataset
 75 |         seq_name = aid.split('/')[1]
 76 |         obj_id = aid.split('/')[2]
 77 |         cam_name = aid.split('/')[3]
 78 |         img_name = aid.split('/')[5]
 79 |         img_id = img_name.split('.png')[0]
 80 | 
 81 |         sample_id = f'{subject_name}-{seq_name}-{obj_id}-{cam_name}-{img_id}'
 82 | 
 83 |         orig_img_path = os.path.join(self.data_dir, subject_name, seq_name, obj_id, cam_name, 'rgb', f'{img_id}.png')
 84 | 
 85 |         orig_img = load_img(orig_img_path)
 86 |         img_shape = orig_img.shape[:2]
 87 |         img_h, img_w = img_shape
 88 | 
 89 |         mano_valid = np.ones((1), dtype=np.float32)
 90 | 
 91 | 
 92 |         ################################## LOAD ANNOTATION DATA #####################################
 93 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
 94 |         
 95 |         annot_data = np.load(annot_data_path, allow_pickle=True)
 96 |         bbox_hand_r = annot_data['bbox_hand_r']
 97 | 
 98 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
 99 |         contact_data = dict(contact_h=contact_h)
100 |         ################################## LOAD ANNOTATION DATA #####################################
101 | 
102 | 
103 |         ############################### PROCESS CROP AND AUGMENTATION ################################
104 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
105 |         crop_img = img.copy()  
106 | 
107 |         # Transform for 3D HMR
108 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
109 |             img = self.transform(img.astype(np.float32)/255.0)
110 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
111 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
112 |             img = img.transpose(2, 0, 1) / 255.0
113 |             img = normalize_img(torch.from_numpy(img)).float()
114 |         else:
115 |             raise NotImplementedError
116 |         ############################### PROCESS CROP AND AUGMENTATION ################################
117 | 
118 | 
119 |         input_data = dict(image=img)
120 |         targets_data = dict(contact_data=contact_data)
121 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
122 | 
123 | 
124 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/data/HOI4D/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from torchvision.transforms import Normalize
  9 | 
 10 | from lib.core.config import cfg
 11 | from lib.utils.human_models import mano
 12 | from lib.utils.func_utils import load_img
 13 | from lib.utils.preprocessing import augmentation_contact
 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 15 | 
 16 | 
 17 | obj_cls_mapping = [
 18 |             '', 'ToyCar', 'Mug', 'Laptop', 'StorageFurniture', 'Bottle',
 19 |             'Safe', 'Bowl', 'Bucket', 'Scissors', '', 'Pliers', 'Kettle',
 20 |             'Knife', 'TrashCan', '', '', 'Lamp', 'Stapler', '', 'Chair'
 21 | ]
 22 | 
 23 | 
 24 | 
 25 | # Main dataloader code for HOI4D dataset
 26 | class HOI4D(Dataset):
 27 |     def __init__(self, transform, data_split):
 28 |         super(HOI4D, self).__init__()
 29 |         self.__dict__.update(locals())
 30 | 
 31 |         self.transform = transform
 32 |         dataset_name = 'hoi4d'
 33 | 
 34 |         self.data_split = data_split
 35 |         self.root_path = root_path = os.path.join('data', 'HOI4D')
 36 |         self.data_dir = os.path.join(self.root_path, 'data')
 37 | 
 38 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 39 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 40 | 
 41 |         # Organize db id based on split
 42 |         db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 43 |         with open(db_split_path, 'r') as f:
 44 |             db_split = json.load(f)
 45 | 
 46 |         self.split = db_split
 47 | 
 48 |         # Sort contact by difficulty (Balanced contact sampling)
 49 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 50 |             sample_id_to_split_id = {}
 51 |             for split_idx in range(len(self.split)):
 52 |                 each_sample_id = self.split[split_idx]
 53 |                 if each_sample_id in sample_id_to_split_id:
 54 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 55 |                 else:
 56 |                     sample_id_to_split_id[each_sample_id] = self.split[split_idx]
 57 | 
 58 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 59 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path)
 60 | 
 61 |             new_split = [key for key in sample_id_difficulty_list]
 62 |             self.split = new_split
 63 | 
 64 | 
 65 |     def __len__(self):
 66 |         return len(self.split)
 67 | 
 68 | 
 69 |     def __getitem__(self, index):
 70 |         image_id = self.split[index]
 71 |         seq_name, image_name = re.match(r'(.+?)-(\d+)$', image_id).groups()
 72 |         seq_name = seq_name.replace('-', '/')
 73 | 
 74 |         camera_name = seq_name.split('/')[0]
 75 |         person_name = seq_name.split('/')[1]
 76 |         obj_class_name = seq_name.split('/')[2]
 77 |         obj_instance_name = seq_name.split('/')[3]
 78 |         room_name = seq_name.split('/')[4]
 79 |         room_layout_name = seq_name.split('/')[5]
 80 |         task_name = seq_name.split('/')[6]
 81 | 
 82 |         obj_cat = obj_cls_mapping[int(obj_class_name[1:])]
 83 |         obj_id = int(obj_instance_name[1:])
 84 | 
 85 |         orig_img_path = os.path.join(self.data_dir, 'HOI4D_color', 'HOI4D_release', seq_name, 'align_rgb', f'{image_name}.jpg')
 86 | 
 87 |         sample_id = str(image_id)
 88 | 
 89 |         orig_img = load_img(orig_img_path)
 90 |         img_shape = orig_img.shape[:2]
 91 |         img_h, img_w = img_shape
 92 | 
 93 |         mano_valid = np.ones((1), dtype=np.float32)
 94 | 
 95 | 
 96 |         ################################## LOAD ANNOTATION DATA #####################################
 97 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
 98 |         contact_data_path = os.path.join(self.contact_data_path, f'{sample_id}.npy')
 99 | 
100 |         annot_data = np.load(annot_data_path, allow_pickle=True)
101 |         bbox_hand = annot_data['bbox_ho']
102 | 
103 |         contact_h = np.load(contact_data_path).astype(np.float32)
104 |         contact_data = dict(contact_h=contact_h)
105 |         ################################## LOAD ANNOTATION DATA #####################################
106 |         
107 | 
108 |         ############################### PROCESS CROP AND AUGMENTATION ################################
109 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
110 |         crop_img = img.copy()  
111 | 
112 |         # Transform for 3D HMR
113 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
114 |             img = self.transform(img.astype(np.float32)/255.0)
115 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
116 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
117 |             img = img.transpose(2, 0, 1) / 255.0
118 |             img = normalize_img(torch.from_numpy(img)).float()
119 |         else:
120 |             raise NotImplementedError
121 |         ############################### PROCESS CROP AND AUGMENTATION ################################
122 | 
123 | 
124 |         input_data = dict(image=img)
125 |         targets_data = dict(contact_data=contact_data)
126 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
127 | 
128 | 
129 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/data/ARCTIC/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | from tqdm import tqdm
  5 | from easydict import EasyDict
  6 | 
  7 | import torch
  8 | from torch.utils.data import Dataset
  9 | from torchvision.transforms import Normalize
 10 | 
 11 | from lib.core.config import cfg
 12 | from lib.utils.human_models import mano
 13 | from lib.utils.func_utils import load_img
 14 | from lib.utils.preprocessing import augmentation_contact
 15 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 16 | 
 17 | 
 18 | def get_sample_id(db, split, index):
 19 |     index = split[index]
 20 |     aid = db['imgnames'][index].split('./arctic_data/data/images/')[-1]
 21 |     subject_name = aid.split('/')[0]
 22 |     seq_name = aid.split('/')[1]
 23 |     obj_name, action_name = seq_name.split('_')[0], seq_name.split('_')[1]
 24 |     cam_name = aid.split('/')[2]
 25 |     img_name = aid.split('/')[3]
 26 |     img_id = img_name.split('.jpg')[0]
 27 | 
 28 |     sample_id = f'{subject_name}-{seq_name}-{cam_name}-{img_id}'
 29 |     return sample_id
 30 | 
 31 | 
 32 | 
 33 | # Main dataloader code for ARCTIC dataset
 34 | class ARCTIC(Dataset):
 35 |     def __init__(self, transform, data_split):
 36 |         super(ARCTIC, self).__init__()
 37 |         self.__dict__.update(locals())
 38 | 
 39 |         self.transform = transform
 40 |         dataset_name = 'arctic'
 41 | 
 42 |         self.data_split = data_split
 43 |         self.root_path = root_path = os.path.join('data', 'ARCTIC')
 44 |         self.data_dir = os.path.join(self.root_path, 'data')
 45 | 
 46 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 47 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 48 | 
 49 |         # DB (we only use protocol 2)
 50 |         db_p2_path = os.path.join(self.data_dir, f'splits/p2_{self.data_split}.npy')
 51 |         db_p2 = np.load(db_p2_path, allow_pickle=True).item() # keys: ['data_dict', 'imgnames'] | egocentric
 52 | 
 53 |         self.db = db_p2
 54 | 
 55 |         # Organize db id based on split
 56 |         split_db_id_file_path = os.path.join(self.root_path, 'splits', f'{data_split}.json')
 57 |         with open(split_db_id_file_path, 'r') as f:
 58 |             self.split = json.load(f)
 59 |             self.split = [int(item) for item in self.split]
 60 | 
 61 |         # Sort contact by difficulty (Balanced contact sampling)
 62 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 63 |             sample_id_to_split_id = {}
 64 |             for split_idx in range(len(self.split)):
 65 |                 each_sample_id = get_sample_id(self.db, self.split, split_idx)
 66 |                 if each_sample_id in sample_id_to_split_id:
 67 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 68 |                 else:
 69 |                     sample_id_to_split_id[each_sample_id] = self.split[split_idx]
 70 | 
 71 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 72 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path)
 73 | 
 74 |             new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list]
 75 |             self.split = new_split
 76 | 
 77 | 
 78 |     def __len__(self):
 79 |         return len(self.split)
 80 | 
 81 | 
 82 |     def __getitem__(self, index):
 83 |         db_id = self.split[index]
 84 |         aid = self.db['imgnames'][db_id].split('./arctic_data/data/images/')[-1]
 85 |         subject_name = aid.split('/')[0]
 86 |         seq_name = aid.split('/')[1]
 87 |         obj_name, action_name = seq_name.split('_')[0], seq_name.split('_')[1]
 88 |         cam_name = aid.split('/')[2]
 89 |         img_name = aid.split('/')[3]
 90 |         img_id = img_name.split('.jpg')[0]
 91 |         sample_id = f'{subject_name}-{seq_name}-{cam_name}-{img_id}'
 92 | 
 93 |         orig_img_path = os.path.join(self.data_dir, 'cropped_images', subject_name, seq_name, cam_name, f'{img_id}.jpg')
 94 |         
 95 |         orig_img = load_img(orig_img_path)
 96 |         img_shape = orig_img.shape[:2]
 97 |         img_h, img_w = img_shape
 98 | 
 99 |         mano_valid = np.ones((1), dtype=np.float32)
100 | 
101 | 
102 |         ################################## LOAD ANNOTATION DATA #####################################
103 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
104 | 
105 |         annot_data = np.load(annot_data_path, allow_pickle=True)
106 |         bbox_hand_r = annot_data['bbox_hand_r']
107 | 
108 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
109 |         contact_data = dict(contact_h=contact_h)
110 |         ################################## LOAD ANNOTATION DATA #####################################
111 |         
112 | 
113 |         ############################### PROCESS CROP AND AUGMENTATION ###############################
114 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
115 |         crop_img = img.copy()  
116 | 
117 |         # Transform for 3D HMR
118 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
119 |             img = self.transform(img.astype(np.float32)/255.0)
120 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
121 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
122 |             img = img.transpose(2, 0, 1) / 255.0
123 |             img = normalize_img(torch.from_numpy(img)).float()
124 |         else:
125 |             raise NotImplementedError
126 |         ############################### PROCESS CROP AND AUGMENTATION ###############################
127 | 
128 | 
129 |         input_data = dict(image=img)
130 |         targets_data = dict(contact_data=contact_data)
131 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
132 | 
133 | 
134 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/lib/utils/smplx/LICENSE:
--------------------------------------------------------------------------------
 1 | License
 2 | 
 3 | Software Copyright License for non-commercial scientific research purposes
 4 | Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use the SMPL-X/SMPLify-X model, data and software, (the "Model & Software"), including 3D meshes, blend weights, blend shapes, textures, software, scripts, and animations. By downloading and/or using the Model & Software (including downloading, cloning, installing, and any other use of this github repository), you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Model & Software. Any infringement of the terms of this agreement will automatically terminate your rights under this License
 5 | 
 6 | Ownership / Licensees
 7 | The Software and the associated materials has been developed at the
 8 | 
 9 | Max Planck Institute for Intelligent Systems (hereinafter "MPI").
10 | 
11 | Any copyright or patent right is owned by and proprietary material of the
12 | 
13 | Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter collectively “Max-Planck”)
14 | 
15 | hereinafter the “Licensor”.
16 | 
17 | License Grant
18 | Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right:
19 | 
20 | To install the Model & Software on computers owned, leased or otherwise controlled by you and/or your organization;
21 | To use the Model & Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects;
22 | Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artifacts for commercial purposes. The Model & Software may not be reproduced, modified and/or made available in any form to any third party without Max-Planck’s prior written permission.
23 | 
24 | The Model & Software may not be used for pornographic purposes or to generate pornographic material whether commercial or not. This license also prohibits the use of the Model & Software to train methods/algorithms/neural networks/etc. for commercial use of any kind. By downloading the Model & Software, you agree not to reverse engineer it.
25 | 
26 | No Distribution
27 | The Model & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive purposes only.
28 | 
29 | Disclaimer of Representations and Warranties
30 | You expressly acknowledge and agree that the Model & Software results from basic research, is provided “AS IS”, may contain errors, and that any use of the Model & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE MODEL & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of the Model & Software, (ii) that the use of the Model & Software will not infringe any patents, copyrights or other intellectual property rights of a third party, and (iii) that the use of the Model & Software will not cause any damage of any kind to you or a third party.
31 | 
32 | Limitation of Liability
33 | Because this Model & Software License Agreement qualifies as a donation, according to Section 521 of the German Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only. If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee for the resulting damage.
34 | Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be liable in accordance with the German Product Liability Act in the event of product liability. The foregoing applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded.
35 | Patent claims generated through the usage of the Model & Software cannot be directed towards the copyright holders.
36 | The Model & Software is provided in the state of development the licensor defines. If modified or extended by Licensee, the Licensor makes no claims about the fitness of the Model & Software and is not responsible for any problems such modifications cause.
37 | 
38 | No Maintenance Services
39 | You understand and agree that Licensor is under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Model & Software. Licensor nevertheless reserves the right to update, modify, or discontinue the Model & Software at any time.
40 | 
41 | Defects of the Model & Software must be notified in writing to the Licensor with a comprehensible description of the error symptoms. The notification of the defect should enable the reproduction of the error. The Licensee is encouraged to communicate any use, results, modification or publication.
42 | 
43 | Publications using the Model & Software
44 | You acknowledge that the Model & Software is a valuable scientific resource and agree to appropriately reference the following paper in any publication making use of the Model & Software.
45 | 
46 | Citation:
47 | 
48 | 
49 | @inproceedings{SMPL-X:2019,
50 |   title = {Expressive Body Capture: 3D Hands, Face, and Body from a Single Image},
51 |   author = {Pavlakos, Georgios and Choutas, Vasileios and Ghorbani, Nima and Bolkart, Timo and Osman, Ahmed A. A. and Tzionas, Dimitrios and Black, Michael J.},
52 |   booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)},
53 |   year = {2019}
54 | }
55 | Commercial licensing opportunities
56 | For commercial uses of the Software, please send email to ps-license@tue.mpg.de
57 | 
58 | This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention.
59 | 


--------------------------------------------------------------------------------
/data/HO3D/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from pycocotools.coco import COCO
  4 | 
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | from torchvision.transforms import Normalize
  8 | 
  9 | from lib.core.config import cfg
 10 | from lib.utils.human_models import mano
 11 | from lib.utils.func_utils import load_img
 12 | from lib.utils.preprocessing import augmentation_contact
 13 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 14 | 
 15 | 
 16 | def get_sample_id(db, split, index):
 17 |     image_id = split[index]
 18 |     ann_ids = db.getAnnIds(imgIds=[image_id])
 19 |     ann = db.loadAnns(ann_ids)[0]
 20 |     img = db.loadImgs(image_id)[0]
 21 |     sample_id = str(image_id)
 22 |     return sample_id
 23 | 
 24 | 
 25 | 
 26 | # Main dataloader code for HO3D dataset
 27 | class HO3D(Dataset):
 28 |     def __init__(self, transform, data_split):
 29 |         super(HO3D, self).__init__()
 30 |         self.__dict__.update(locals())
 31 | 
 32 |         self.transform = transform
 33 |         dataset_name = 'ho3d'
 34 | 
 35 |         if data_split == 'train':
 36 |             self.data_split_name = 'train'
 37 |         elif data_split == 'test':
 38 |             self.data_split_name = 'evaluation'
 39 |         else:
 40 |             raise NotImplementedError
 41 | 
 42 |         self.data_split = data_split
 43 |         self.root_path = root_path = os.path.join('data', 'HO3D')
 44 |         self.data_dir = os.path.join(self.root_path, 'data')
 45 |         self.annot_dir = os.path.join(self.root_path, 'annotations')
 46 | 
 47 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 48 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 49 | 
 50 |         self.joints_name = ('wrist', 'index1', 'index2', 'index3', 'middle1', 'middle2', 'middle3', 'pinky1', 'pinky2', 'pinky3', 'ring1', 'ring2', 'ring3', 'thumb1', 'thumb2', 'thumb3', 'thumb4', 'index4', 'middle4', 'ring4', 'pinky4')
 51 |         self.mano_joints_name = ('wrist', 'thumb1', 'thumb2', 'thumb3', 'thumb4', 'index1', 'index2', 'index3', 'index4', 'middle1', 'middle2', 'middle3', 'middle4', 'ring1', 'ring2', 'ring3', 'ring4', 'pinky1', 'pinky2', 'pinky3', 'pinky4')
 52 | 
 53 |         # Organize db id based on split
 54 |         self.db = COCO(os.path.join(self.annot_dir, f"HO3D_{self.data_split_name}_data.json"))
 55 |         self.split = self.db.getImgIds()
 56 | 
 57 |         # Sort contact by difficulty (Balanced contact sampling)
 58 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 59 |             sample_id_to_split_id = {}
 60 |             for split_idx in range(len(self.split)):
 61 |                 each_sample_id = get_sample_id(self.db, self.split, split_idx)
 62 |                 if each_sample_id in sample_id_to_split_id:
 63 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 64 |                 else:
 65 |                     sample_id_to_split_id[each_sample_id] = self.split[split_idx]
 66 | 
 67 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 68 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path)
 69 | 
 70 |             new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list]
 71 |             self.split = new_split
 72 | 
 73 | 
 74 |     def __len__(self):
 75 |         return len(self.split)
 76 | 
 77 | 
 78 |     def __getitem__(self, index):
 79 |         image_id = self.split[index]
 80 |         ann_ids = self.db.getAnnIds(imgIds=[image_id])
 81 |         ann = self.db.loadAnns(ann_ids)[0]
 82 |         img = self.db.loadImgs(image_id)[0]
 83 |         orig_img_path = os.path.join(self.data_dir, self.data_split_name, img['file_name'])
 84 |         meta_path = os.path.join(self.data_dir, self.data_split_name, img['file_name'].replace('/rgb/', '/meta/').replace('.png', '.pkl'))
 85 |         img_shape = (img['height'], img['width'])
 86 |         sample_id = str(image_id)
 87 | 
 88 |         orig_img = load_img(orig_img_path)
 89 |         img_shape = orig_img.shape[:2]
 90 |         img_h, img_w = img_shape
 91 | 
 92 |         mano_valid = np.ones((1), dtype=np.float32)
 93 | 
 94 | 
 95 |         ################################## LOAD ANNOTATION DATA #####################################
 96 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
 97 | 
 98 |         annot_data = np.load(annot_data_path, allow_pickle=True)
 99 |         bbox_ho = annot_data['bbox_ho']
100 |         cam_param = annot_data['cam_param']
101 | 
102 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
103 |         contact_data = dict(contact_h=contact_h)
104 |         ################################## LOAD ANNOTATION DATA #####################################
105 | 
106 | 
107 |         ############################### PROCESS CROP AND AUGMENTATION ################################
108 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_ho, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
109 |         crop_img = img.copy()  
110 | 
111 |         # Transform for 3D HMR
112 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
113 |             img = self.transform(img.astype(np.float32)/255.0)
114 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
115 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
116 |             img = img.transpose(2, 0, 1) / 255.0
117 |             img = normalize_img(torch.from_numpy(img)).float()
118 |         else:
119 |             raise NotImplementedError
120 |         ############################### PROCESS CROP AND AUGMENTATION ################################
121 | 
122 | 
123 |         input_data = dict(image=img)
124 |         targets_data = dict(contact_data=contact_data)
125 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
126 | 
127 | 
128 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import torch
  4 | import argparse
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | import mediapipe as mp
  9 | from mediapipe.tasks.python import vision
 10 | from mediapipe.tasks.python import BaseOptions
 11 | 
 12 | from lib.core.config import cfg, update_config
 13 | from lib.models.model import HACO
 14 | from lib.utils.human_models import mano
 15 | from lib.utils.contact_utils import get_contact_thres
 16 | from lib.utils.vis_utils import ContactRenderer, draw_landmarks_on_image, draw_landmarks_on_image_simple
 17 | from lib.utils.preprocessing import augmentation_contact
 18 | from lib.utils.demo_utils import remove_small_contact_components, run_wilor_hand_detector
 19 | 
 20 | 
 21 | parser = argparse.ArgumentParser(description='Demo HACO')
 22 | parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model')
 23 | parser.add_argument('--detector', type=str, default='wilor', choices=['wilor', 'mediapipe'], help='detector model')
 24 | parser.add_argument('--checkpoint', type=str, default='', help='model path for demo')
 25 | parser.add_argument('--input_path', type=str, default='asset/example_images', help='image path for demo')
 26 | args = parser.parse_args()
 27 | 
 28 | 
 29 | # Set device as CUDA
 30 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 31 | 
 32 | 
 33 | # Initialize directories
 34 | experiment_dir = 'experiments_demo_image'
 35 | 
 36 | 
 37 | # Load config
 38 | update_config(backbone_type=args.backbone, exp_dir=experiment_dir)
 39 | 
 40 | 
 41 | # Initialize renderer
 42 | contact_renderer = ContactRenderer()
 43 | 
 44 | 
 45 | # Load demo images
 46 | input_dir = args.input_path
 47 | images = [f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
 48 | 
 49 | 
 50 | # Initialize MediaPipe HandLandmarker
 51 | if args.detector == 'wilor':
 52 |     from ultralytics import YOLO
 53 |     detector_path = f'data/base_data/demo_data/wilor_detector.pt'
 54 |     detector = YOLO(detector_path)
 55 | elif args.detector == 'mediapipe':
 56 |     base_options = BaseOptions(model_asset_path=cfg.MODEL.hand_landmarker_path)
 57 |     hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
 58 |     detector = vision.HandLandmarker.create_from_options(hand_options)
 59 | else:
 60 |     raise NotImplementedError(f"Unsupported detector: {args.detector}")
 61 | 
 62 | 
 63 | ############# Model #############
 64 | model = HACO().to(device)
 65 | model.eval()
 66 | ############# Model #############
 67 | 
 68 | 
 69 | # Load model checkpoint if provided
 70 | if args.checkpoint:
 71 |     checkpoint = torch.load(args.checkpoint, map_location=device)
 72 |     model.load_state_dict(checkpoint['state_dict'])
 73 | 
 74 | 
 75 | ############################### Demo Loop ###############################
 76 | for i, frame_name in tqdm(enumerate(images), total=len(images)):
 77 |     print(f"Processing: {frame_name}")
 78 | 
 79 |     # Load and convert image
 80 |     frame_path = os.path.join(input_dir, frame_name)
 81 |     frame = cv2.imread(frame_path)
 82 |     orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 83 |     frame_name_base = os.path.splitext(frame_name)[0]
 84 | 
 85 |     # Hand landmark detection
 86 |     if args.detector == 'wilor':
 87 |         right_hand_bbox = run_wilor_hand_detector(orig_img, detector)
 88 |         annotated_image, right_hand_bbox = draw_landmarks_on_image_simple(orig_img.copy(), right_hand_bbox)
 89 |     elif args.detector == 'mediapipe':
 90 |         mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=orig_img.copy())
 91 |         detection_result = detector.detect(mp_image)
 92 |         annotated_image, right_hand_bbox = draw_landmarks_on_image(orig_img.copy(), detection_result)
 93 |     else:
 94 |         raise NotImplementedError(f"Unsupported detector: {args.detector}")
 95 |     
 96 | 
 97 |     if right_hand_bbox is None:
 98 |         print(f"Skipping {frame_name} - no hand detected.")
 99 |         continue
100 | 
101 |     print(f"Frame {i}: Right hand bbox: {right_hand_bbox}")
102 | 
103 |     # Image preprocessing
104 |     crop_img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), right_hand_bbox, 'test', enforce_flip=False)
105 | 
106 |     # Convert to model input format
107 |     if args.backbone in ['handoccnet'] or 'resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type:
108 |         from torchvision import transforms
109 |         img_tensor = transforms.ToTensor()(crop_img.astype(np.float32) / 255.0)
110 |     elif args.backbone in ['hamer'] or 'vit' in cfg.MODEL.backbone_type:
111 |         from torchvision.transforms import Normalize
112 |         normalize = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
113 |         img_tensor = crop_img.transpose(2, 0, 1) / 255.0
114 |         img_tensor = normalize(torch.from_numpy(img_tensor)).float()
115 |     else:
116 |         raise NotImplementedError(f"Unsupported backbone: {args.backbone}")
117 | 
118 |     ############# Run model #############
119 |     with torch.no_grad():
120 |         outputs = model({'input': {'image': img_tensor[None].to(device)}}, mode="test")
121 |     ############# Run model #############
122 | 
123 |     # Save result
124 |     os.makedirs('outputs', exist_ok=True)
125 |     os.makedirs('outputs/detection', exist_ok=True)
126 |     os.makedirs('outputs/crop_img', exist_ok=True)
127 |     os.makedirs('outputs/contact', exist_ok=True)
128 | 
129 |     cv2.imwrite(f'outputs/detection/{frame_name_base}.png', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
130 |     cv2.imwrite(f'outputs/crop_img/{frame_name_base}.png', crop_img[..., ::-1])
131 | 
132 |     eval_thres = get_contact_thres(args.backbone)
133 |     contact_mask = (outputs['contact_out'].sigmoid()[0] > eval_thres).detach().cpu().numpy()
134 |     contact_mask = remove_small_contact_components(contact_mask, faces=mano.watertight_face['right'], min_size=20)
135 |     contact_rendered = contact_renderer.render_contact(crop_img[..., ::-1], contact_mask)
136 |     cv2.imwrite(f'outputs/contact/{frame_name_base}.png', contact_rendered)
137 | ############################### Demo Loop ###############################


--------------------------------------------------------------------------------
/data/Decaf/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | from torchvision.transforms import Normalize
  8 | 
  9 | from lib.core.config import cfg
 10 | from lib.utils.human_models import mano
 11 | from lib.utils.func_utils import load_img
 12 | from lib.utils.preprocessing import augmentation_contact
 13 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 14 | 
 15 | 
 16 | def get_sample_id(split, index):
 17 |     aid = split[index]
 18 |     seq_name = aid.split('_')[0]
 19 |     cam_name = aid.split('_')[1]
 20 |     img_name = aid.split('_')[2]
 21 |     sample_id = aid
 22 |     return sample_id
 23 | 
 24 | 
 25 | 
 26 | # Main dataloader code for Decaf dataset
 27 | class Decaf(Dataset):
 28 |     def __init__(self, transform, data_split):
 29 |         super(Decaf, self).__init__()
 30 |         self.__dict__.update(locals())
 31 | 
 32 |         self.transfrom = transform
 33 |         dataset_name = 'decaf'
 34 | 
 35 |         self.data_split = data_split
 36 |         self.root_path = root_path = 'data/Decaf'
 37 |         self.data_dir = os.path.join(self.root_path, 'data')
 38 | 
 39 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 40 | 
 41 |         self.mano_joints_name = ('wrist', 'thumb1', 'thumb2', 'thumb3', 'thumb4', 'index1', 'index2', 'index3', 'index4', 'middle1', 'middle2', 'middle3', 'middle4', 'ring1', 'ring2', 'ring3', 'ring4', 'pinky1', 'pinky2', 'pinky3', 'pinky4')
 42 |         
 43 |         seq_list = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8']
 44 |         cam_list = ['084', '100', '102', '108', '110', '111', '121', '122']
 45 | 
 46 |         # Make db
 47 |         contact_rh_data = {seq: np.load(os.path.join(self.data_dir, self.data_split, 'contacts', seq, 'contacts_rh.npy')) for seq in seq_list}
 48 |         
 49 |         bb_rh_data = {
 50 |             f"{seq}_{cam}": np.load(os.path.join(self.data_dir, self.data_split, 'right_hand_bbs', seq, f'{cam}.npy'))
 51 |             for seq in seq_list
 52 |             for cam in cam_list
 53 |         }
 54 | 
 55 |         # Organize db id based on split
 56 |         db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 57 |         with open(db_split_path, 'r') as f:
 58 |             db_keys = json.load(f)
 59 | 
 60 |         self.split = list(db_keys)
 61 | 
 62 |         # Sort contact by difficulty (Balanced contact sampling)
 63 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 64 |             sample_id_to_split_id = {}
 65 |             for split_idx in range(len(self.split)):
 66 |                 each_sample_id = get_sample_id(self.split, split_idx)
 67 |                 if each_sample_id in sample_id_to_split_id:
 68 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 69 |                 else:
 70 |                     sample_id_to_split_id[each_sample_id] = self.split[split_idx]
 71 | 
 72 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 73 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path)
 74 | 
 75 |             new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list if key in [*sample_id_to_split_id]]
 76 |             self.split = new_split
 77 | 
 78 | 
 79 |     def __len__(self):
 80 |         return len(self.split)
 81 | 
 82 | 
 83 |     def __getitem__(self, index):
 84 |         aid = self.split[index]
 85 |         seq_name = aid.split('_')[0]
 86 |         cam_name = aid.split('_')[1]
 87 |         img_name = aid.split('_')[2]
 88 |         sample_id = aid
 89 | 
 90 |         orig_img_path = os.path.join(self.data_dir, self.data_split, 'images', seq_name, cam_name, f'{img_name}.jpg')
 91 |         
 92 |         orig_img = load_img(orig_img_path)
 93 |         img_shape = orig_img.shape[:2]
 94 |         img_h, img_w = img_shape
 95 | 
 96 |         mano_valid = np.ones((1), dtype=np.float32)
 97 | 
 98 | 
 99 |         ################################## LOAD ANNOTATION DATA #####################################
100 |         db_contact_path = os.path.join(self.root_path, 'preprocessed_data', self.data_split, 'db_contact', f'{sample_id}.npy')
101 |         db_bb_path = os.path.join(self.root_path, 'preprocessed_data', self.data_split, 'db_bb', f'{sample_id}.npy')
102 |         db_contact = np.load(db_contact_path)
103 |         db_bb = np.load(db_bb_path)
104 | 
105 |         contact_rh = db_contact.astype(np.float32)
106 |         bbox_rh = db_bb.tolist() # GT bbox is in [x_min, y_min, x_max, y_max]
107 |         bbox_rh = np.array([bbox_rh[0], bbox_rh[1], bbox_rh[2]-bbox_rh[0], bbox_rh[3]-bbox_rh[1]]) # Change from [x_min, y_min, x_max, y_max] to [x_min, y_min, width, height]
108 |         contact_data = dict(contact_h=contact_rh)
109 |         ################################## LOAD ANNOTATION DATA #####################################
110 | 
111 | 
112 |         ############################### PROCESS CROP AND AUGMENTATION ################################
113 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_rh, self.data_split, enforce_flip=False)
114 |         crop_img = img.copy()  
115 | 
116 |         # Transform for 3D HMR
117 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
118 |             img = self.transform(img.astype(np.float32)/255.0)
119 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
120 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
121 |             img = img.transpose(2, 0, 1) / 255.0
122 |             img = normalize_img(torch.from_numpy(img)).float()
123 |         else:
124 |             raise NotImplementedError
125 |         ############################### PROCESS CROP AND AUGMENTATION ################################
126 | 
127 | 
128 |         input_data = dict(image=img)
129 |         targets_data = dict(contact_data=contact_data)
130 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
131 | 
132 | 
133 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/data/RICH/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import pickle
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from torchvision.transforms import Normalize
  9 | 
 10 | from lib.core.config import cfg
 11 | from lib.utils.human_models import mano
 12 | from lib.utils.func_utils import load_img
 13 | from lib.utils.preprocessing import augmentation_contact
 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 15 | 
 16 | 
 17 | def get_sample_id(db, index):
 18 |     aid = db[index]
 19 |     seq_name = aid.split('/')[-3]
 20 |     seq_loc_name = seq_name.split('_')[0]
 21 |     annot_name = seq_name.split('_')[1]
 22 |     cam_name = aid.split('/')[-2]
 23 |     cam_id = int(cam_name.split('cam_')[-1])
 24 |     img_name = aid.split('/')[-1].split('.jpeg')[0] # we used jpg version
 25 |     img_annot_name = img_name.split('_')[0]
 26 |     sample_id = f'{seq_name}-{cam_name}-{img_name}'
 27 |     return sample_id
 28 | 
 29 | 
 30 | 
 31 | # Main dataloader code for RICH dataset
 32 | class RICH(Dataset):
 33 |     def __init__(self, transform, data_split):
 34 |         super(RICH, self).__init__()
 35 |         self.__dict__.update(locals())
 36 | 
 37 |         self.transform = transform
 38 |         dataset_name = 'rich'
 39 | 
 40 |         self.data_split = data_split
 41 |         self.root_path = root_path = os.path.join('data', 'RICH')
 42 |         self.data_dir = os.path.join(self.root_path, 'data')
 43 | 
 44 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 45 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 46 | 
 47 |         # SMPL, SMPLX, MANO conversion mappings
 48 |         smpl_to_smplx_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smpl_to_smplx.pkl')
 49 |         smplx_mano_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smplx_to_mano.pkl')
 50 | 
 51 |         with open(smpl_to_smplx_mapping_path, 'rb') as f:
 52 |             self.smpl_to_smplx_mapping = pickle.load(f)
 53 | 
 54 |         with open(smplx_mano_mapping_path, 'rb') as f:
 55 |             self.smplx_to_mano_mapping = pickle.load(f)
 56 |             self.smplx_to_mano_mapping_r = self.smplx_to_mano_mapping["right_hand"]
 57 | 
 58 |         # Organize db id based on split
 59 |         db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 60 |         with open(db_split_path, 'r') as f:
 61 |             self.db = json.load(f)
 62 | 
 63 |         # Sort contact by difficulty (Balanced contact sampling)
 64 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 65 |             sample_id_to_db_id = {}
 66 |             for db_idx in range(len(self.db)):
 67 |                 each_sample_id = get_sample_id(self.db, db_idx)
 68 |                 if each_sample_id in sample_id_to_db_id:
 69 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 70 |                 else:
 71 |                     sample_id_to_db_id[each_sample_id] = self.db[db_idx]
 72 | 
 73 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 74 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_db_id, self.contact_data_path, contact_means_path)
 75 | 
 76 |             new_db = [sample_id_to_db_id[key] for key in sample_id_difficulty_list]
 77 |             self.db = new_db
 78 | 
 79 | 
 80 |     def __len__(self):
 81 |         return len(self.db)
 82 | 
 83 | 
 84 |     def __getitem__(self, index):
 85 |         aid = self.db[index]
 86 |         seq_name = aid.split('/')[-3]
 87 |         seq_loc_name = seq_name.split('_')[0]
 88 |         annot_name = seq_name.split('_')[1]
 89 |         cam_name = aid.split('/')[-2]
 90 |         cam_id = int(cam_name.split('cam_')[-1])
 91 |         img_name = aid.split('/')[-1].split('.jpeg')[0] # we used jpg version
 92 |         img_annot_name = img_name.split('_')[0]
 93 |         sample_id = f'{seq_name}-{cam_name}-{img_name}'
 94 | 
 95 |         orig_img_path = os.path.join(self.data_dir, 'images_jpg_subset', self.data_split, seq_name, cam_name, f'{img_name}.jpeg')
 96 | 
 97 |         orig_img = load_img(orig_img_path)
 98 |         img_shape = orig_img.shape[:2]
 99 |         img_h, img_w = img_shape
100 | 
101 |         mano_valid = np.ones((1), dtype=np.float32)
102 | 
103 | 
104 |         ################################## LOAD ANNOTATION DATA #####################################
105 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
106 | 
107 |         annot_data = np.load(annot_data_path, allow_pickle=True)
108 |         bbox_hand_r = annot_data['bbox_ho']
109 | 
110 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
111 |         contact_data = dict(contact_h=contact_h)
112 |         ################################## LOAD ANNOTATION DATA #####################################
113 |         
114 | 
115 |         ############################### PROCESS CROP AND AUGMENTATION ###############################
116 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False)
117 |         crop_img = img.copy()  
118 | 
119 |         # Transform for 3D HMR
120 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
121 |             img = self.transform(img.astype(np.float32)/255.0)
122 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
123 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
124 |             img = img.transpose(2, 0, 1) / 255.0
125 |             img = normalize_img(torch.from_numpy(img)).float()
126 |         else:
127 |             raise NotImplementedError
128 |         ############################### PROCESS CROP AND AUGMENTATION ###############################
129 | 
130 | 
131 |         input_data = dict(image=img)
132 |         targets_data = dict(contact_data=contact_data)
133 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
134 |         
135 | 
136 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/demo_video.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import torch
  4 | import argparse
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | import mediapipe as mp
  9 | from mediapipe.tasks.python import vision
 10 | from mediapipe.tasks.python import BaseOptions
 11 | 
 12 | from lib.core.config import cfg, update_config
 13 | from lib.models.model import HACO
 14 | from lib.utils.human_models import mano
 15 | from lib.utils.contact_utils import get_contact_thres
 16 | from lib.utils.vis_utils import ContactRenderer, draw_landmarks_on_image
 17 | from lib.utils.preprocessing import augmentation_contact
 18 | from lib.utils.demo_utils import smooth_bbox, smooth_contact_mask, remove_small_contact_components, initialize_video_writer, extract_frames_with_hand, find_longest_continuous_segment, run_wilor_hand_detector
 19 | 
 20 | 
 21 | parser = argparse.ArgumentParser(description='Demo HACO')
 22 | parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model')
 23 | parser.add_argument('--detector', type=str, default='wilor', choices=['wilor', 'mediapipe'], help='detector model')
 24 | parser.add_argument('--checkpoint', type=str, default='', help='model path for demo')
 25 | parser.add_argument('--input_path', type=str, default='asset/example_videos', help='video path for demo')
 26 | args = parser.parse_args()
 27 | 
 28 | 
 29 | # Set device as CUDA
 30 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 31 | 
 32 | 
 33 | # Initialize directories
 34 | experiment_dir = 'experiments_demo_video'
 35 | 
 36 | 
 37 | # Load config
 38 | update_config(backbone_type=args.backbone, exp_dir=experiment_dir)
 39 | 
 40 | 
 41 | # Initialize renderer
 42 | contact_renderer = ContactRenderer()
 43 | 
 44 | 
 45 | # Load demo videos
 46 | input_dir = args.input_path
 47 | video_files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
 48 | 
 49 | 
 50 | # Initialize MediaPipe HandLandmarker
 51 | if args.detector == 'wilor':
 52 |     from ultralytics import YOLO
 53 |     detector_path = f'data/base_data/demo_data/wilor_detector.pt'
 54 |     detector = YOLO(detector_path)
 55 | elif args.detector == 'mediapipe':
 56 |     base_options = BaseOptions(model_asset_path=cfg.MODEL.hand_landmarker_path)
 57 |     hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
 58 |     detector = vision.HandLandmarker.create_from_options(hand_options)
 59 | else:
 60 |     raise NotImplementedError
 61 | 
 62 | 
 63 | ############# Model #############
 64 | model = HACO().to(device)
 65 | model.eval()
 66 | ############# Model #############
 67 | 
 68 | 
 69 | # Load model checkpoint if provided
 70 | if args.checkpoint:
 71 |     checkpoint = torch.load(args.checkpoint, map_location=device)
 72 |     model.load_state_dict(checkpoint['state_dict'])
 73 | 
 74 | 
 75 | ############################### Demo Loop ###############################
 76 | for i, video_name in tqdm(enumerate(video_files), total=len(video_files)):
 77 |     print(f"Processing: {video_name}")
 78 | 
 79 |     # Organize input and output path
 80 |     video_path = os.path.join(input_dir, video_name)
 81 |     os.makedirs("outputs_video", exist_ok=True)
 82 |     output_path = os.path.join("outputs_video", f"{os.path.splitext(video_name)[0]}_out.mp4")
 83 | 
 84 |     # Load and convert video
 85 |     cap = cv2.VideoCapture(video_path)
 86 |     fps = cap.get(cv2.CAP_PROP_FPS)
 87 |     fps = 30 if fps == 0 or np.isnan(fps) else fps
 88 | 
 89 |     # Extract meaningful video segment
 90 |     frames_with_hand = extract_frames_with_hand(cap, detector, args.detector)
 91 |     longest_segment = find_longest_continuous_segment(frames_with_hand)
 92 | 
 93 |     if not longest_segment:
 94 |         print(f"No hand detected in any continuous segment for {video_name}")
 95 |         continue
 96 | 
 97 |     writer = None
 98 |     smoothed_bbox = None
 99 |     smoothed_contact = None
100 | 
101 |     for _, frame, bbox in longest_segment:
102 |         # Image preprocessing
103 |         smoothed_bbox = smooth_bbox(smoothed_bbox, bbox, alpha=0.8)
104 |         orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
105 |         crop_img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), smoothed_bbox, 'test', enforce_flip=False, bkg_color='white')
106 | 
107 |         # Convert to model input format
108 |         if args.backbone in ['handoccnet'] or 'resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type:
109 |             from torchvision import transforms
110 |             img_tensor = transforms.ToTensor()(crop_img.astype(np.float32) / 255.0)
111 |         elif args.backbone in ['hamer'] or 'vit' in cfg.MODEL.backbone_type:
112 |             from torchvision.transforms import Normalize
113 |             normalize = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
114 |             img_tensor = crop_img.transpose(2, 0, 1) / 255.0
115 |             img_tensor = normalize(torch.from_numpy(img_tensor)).float()
116 |         else:
117 |             raise NotImplementedError(f"Unsupported backbone: {args.backbone}")
118 | 
119 |         ############# Run model #############
120 |         with torch.no_grad():
121 |             outputs = model({'input': {'image': img_tensor[None].to(device)}}, mode="test")
122 |         ############# Run model #############
123 | 
124 |         # Save result
125 |         eval_thres = get_contact_thres(args.backbone)
126 |         raw_contact = (outputs['contact_out'].sigmoid()[0] > eval_thres).detach().cpu().numpy()
127 |         smoothed_contact = smooth_contact_mask(smoothed_contact, raw_contact, alpha=0.8)
128 |         contact_mask = smoothed_contact > 0.5
129 |         contact_mask = remove_small_contact_components(contact_mask, faces=mano.watertight_face['right'], min_size=20)
130 |         contact_rendered = contact_renderer.render_contact(crop_img, contact_mask, mode='demo')
131 | 
132 |         if writer is None:
133 |             ch, cw = contact_rendered.shape[:2]
134 |             writer = initialize_video_writer(output_path, fps, (cw, ch))
135 | 
136 |         writer.write(cv2.cvtColor(contact_rendered, cv2.COLOR_RGB2BGR))
137 | 
138 |     if writer:
139 |         writer.release()
140 | ############################### Demo Loop ###############################


--------------------------------------------------------------------------------
/data/DexYCB/toolkit/obj.py:
--------------------------------------------------------------------------------
  1 | # DexYCB Toolkit
  2 | # Copyright (C) 2021 NVIDIA Corporation
  3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details]
  4 | 
  5 | """Wavefront OBJ file loader.
  6 | 
  7 | Functions and classes are largely derived from
  8 | https://github.com/pyglet/pyglet/blob/f762169c9dd88c22c8d6d2399a129cc23654d99c/contrib/model/model/obj_batch.py
  9 | """
 10 | 
 11 | import os
 12 | import logging
 13 | import numpy as np
 14 | 
 15 | 
 16 | class Material:
 17 |   """Material."""
 18 |   diffuse = [.8, .8, .8]
 19 |   ambient = [.2, .2, .2]
 20 |   specular = [0., 0., 0.]
 21 |   emission = [0., 0., 0.]
 22 |   shininess = 0.
 23 |   opacity = 1.
 24 |   texture_path = None
 25 | 
 26 |   def __init__(self, name):
 27 |     """Constructor.
 28 | 
 29 |     Args:
 30 |       name: Material name.
 31 |     """
 32 |     self.name = name
 33 | 
 34 | 
 35 | class MaterialGroup:
 36 |   """Material group."""
 37 | 
 38 |   def __init__(self, material):
 39 |     """Constructor.
 40 | 
 41 |     Args:
 42 |       material: A Material object.
 43 |     """
 44 |     self.material = material
 45 | 
 46 |     self.f_v = []
 47 |     self.f_n = []
 48 |     self.f_t = []
 49 | 
 50 | 
 51 | class Mesh:
 52 |   """Mesh."""
 53 | 
 54 |   def __init__(self, name):
 55 |     """Constructor.
 56 | 
 57 |     Args:
 58 |       name: Mesh name.
 59 |     """
 60 |     self.name = name
 61 |     self.groups = []
 62 | 
 63 | 
 64 | class OBJ:
 65 |   """3D data loaded from an OBJ file."""
 66 | 
 67 |   def __init__(self, filename, file=None, path=None):
 68 |     """Constructor.
 69 | 
 70 |     Args:
 71 |       filename: Path to the OBJ file.
 72 |       file: An file object.
 73 |       path: Path to the directory storing the material files.
 74 |     """
 75 |     self.materials = {}
 76 |     self.meshes = {}
 77 |     self.mesh_list = []
 78 | 
 79 |     if file is None:
 80 |       file = open(filename, 'r')
 81 | 
 82 |     if path is None:
 83 |       path = os.path.dirname(filename)
 84 |     self.path = path
 85 | 
 86 |     mesh = None
 87 |     group = None
 88 |     material = None
 89 | 
 90 |     self.v = []
 91 |     self.n = []
 92 |     self.t = []
 93 | 
 94 |     for line in file:
 95 |       if line.startswith('#'):
 96 |         continue
 97 |       values = line.split()
 98 |       if not values:
 99 |         continue
100 | 
101 |       if values[0] == 'v':
102 |         self.v.append(list(map(float, values[1:4])))
103 |       elif values[0] == 'vn':
104 |         self.n.append(list(map(float, values[1:4])))
105 |       elif values[0] == 'vt':
106 |         self.t.append(list(map(float, values[1:3])))
107 |       elif values[0] == 'mtllib':
108 |         self._load_material_library(values[1])
109 |       elif values[0] in ('usemtl', 'usemat'):
110 |         material = self.materials.get(values[1], None)
111 |         if material is None:
112 |           logging.warn('Unknown material: %s' % values[1])
113 |         if mesh is not None:
114 |           group = MaterialGroup(material)
115 |           mesh.groups.append(group)
116 |       elif values[0] == 'o':
117 |         mesh = Mesh(values[1])
118 |         self.meshes[mesh.name] = mesh
119 |         self.mesh_list.append(mesh)
120 |         group = None
121 |       elif values[0] == 'f':
122 |         if mesh is None:
123 |           mesh = Mesh('')
124 |           self.mesh_list.append(mesh)
125 |         if material is None:
126 |           material = Material("<unknown>")
127 |         if group is None:
128 |           group = MaterialGroup(material)
129 |           mesh.groups.append(group)
130 | 
131 |         for i, v in enumerate(values[1:]):
132 |           v_index, t_index, n_index = \
133 |               (list(map(int, [j or 0 for j in v.split('/')])) + [0, 0])[:3]
134 |           if v_index < 0:
135 |             v_index += len(vertices)
136 |           if t_index < 0:
137 |             t_index += len(tex_coords)
138 |           if n_index < 0:
139 |             n_index += len(normals)
140 |           if i < 3:
141 |             group.f_v.append(v_index - 1)
142 |             group.f_n.append(n_index - 1)
143 |             group.f_t.append(t_index - 1)
144 |           else:
145 |             # Triangulate.
146 |             group.f_v += [group.f_v[-3 * (i - 2)], group.f_v[-1], v_index - 1]
147 |             group.f_n += [group.f_n[-3 * (i - 2)], group.f_n[-1], n_index - 1]
148 |             group.f_t += [group.f_t[-3 * (i - 2)], group.f_t[-1], t_index - 1]
149 | 
150 |     self.v = np.array(self.v, dtype=np.float32)
151 |     self.n = np.array(self.n, dtype=np.float32)
152 |     self.t = np.array(self.t, dtype=np.float32)
153 | 
154 |     for mesh in self.mesh_list:
155 |       for group in mesh.groups:
156 |         group.f_v = np.array(group.f_v, dtype=np.int64).reshape(-1, 3)
157 |         group.f_n = np.array(group.f_n, dtype=np.int64).reshape(-1, 3)
158 |         group.f_t = np.array(group.f_t, dtype=np.int64).reshape(-1, 3)
159 | 
160 |   def _open_material_file(self, filename):
161 |     """Opens a material file.
162 | 
163 |     Args:
164 |       filename: Path to the material file.
165 | 
166 |     Returns:
167 |       A file object.
168 |     """
169 |     return open(os.path.join(self.path, filename), 'r')
170 | 
171 |   def _load_material_library(self, filename):
172 |     """Loads the material from a material file.
173 | 
174 |     Args:
175 |       filename: Path to the material file.
176 |     """
177 |     material = None
178 |     file = self._open_material_file(filename)
179 | 
180 |     for line in file:
181 |       if line.startswith('#'):
182 |         continue
183 |       values = line.split()
184 |       if not values:
185 |         continue
186 | 
187 |       if values[0] == 'newmtl':
188 |         material = Material(values[1])
189 |         self.materials[material.name] = material
190 |       elif material is None:
191 |         logging.warn('Expected "newmtl" in %s' % filename)
192 |         continue
193 | 
194 |       try:
195 |         if values[0] == 'Kd':
196 |           material.diffuse = list(map(float, values[1:]))
197 |         elif values[0] == 'Ka':
198 |           material.ambient = list(map(float, values[1:]))
199 |         elif values[0] == 'Ks':
200 |           material.specular = list(map(float, values[1:]))
201 |         elif values[0] == 'Ke':
202 |           material.emissive = list(map(float, values[1:]))
203 |         elif values[0] == 'Ns':
204 |           material.shininess = float(values[1])
205 |         elif values[0] == 'd':
206 |           material.opacity = float(values[1])
207 |         elif values[0] == 'map_Kd':
208 |           material.texture_path = os.path.abspath(self.path + '/' + values[1])
209 |       except BaseException as ex:
210 |         logging.warning('Parse error in %s.' % (filename, ex))
211 | 


--------------------------------------------------------------------------------
/data/ObMan/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | from pycocotools.coco import COCO
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from torchvision.transforms import Normalize
  9 | 
 10 | from lib.core.config import cfg
 11 | from lib.utils.human_models import mano
 12 | from lib.utils.func_utils import load_img
 13 | from lib.utils.preprocessing import augmentation_contact
 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 15 | 
 16 | 
 17 | def get_sample_id(db, split, index):
 18 |     aid = split[index]
 19 |     ann = db.anns[aid]
 20 |     img_data = db.loadImgs(ann['image_id'])[0]
 21 |     sample_id = img_data['file_name']
 22 |     return sample_id, img_data
 23 | 
 24 | 
 25 | 
 26 | # Main dataloader code for ObMan dataset
 27 | class ObMan(Dataset):
 28 |     def __init__(self, transform, data_split):
 29 |         super(ObMan, self).__init__()
 30 |         self.__dict__.update(locals())
 31 |         
 32 |         self.transfrom = transform
 33 |         dataset_name = 'obman'
 34 | 
 35 |         if data_split == 'train':
 36 |             data_split_name = 'train_87k'
 37 |         elif data_split == 'test':
 38 |             data_split_name = 'test_6k'
 39 |         else:
 40 |             raise NotImplementedError
 41 |         
 42 |         self.data_split = data_split
 43 |         self.root_path = root_path = 'data/ObMan'
 44 | 
 45 |         self.data_dir = os.path.join(self.root_path, 'data')
 46 |         self.split_dir = os.path.join(self.root_path, 'splits')
 47 |         self.annot_dir = os.path.join(self.root_path, 'annotations')
 48 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 49 | 
 50 |         with open(os.path.join(self.split_dir , f'{data_split_name}.json'), 'r') as f:
 51 |             self.split = json.load(f)
 52 | 
 53 |         self.split = [int(idx) for idx in self.split]
 54 | 
 55 |         self.anno_file = os.path.join(self.annot_dir, f'{dataset_name}_{data_split}.json')
 56 |         self.img_source = os.path.join(self.data_dir, data_split, 'rgb')
 57 |         self.seg_source = os.path.join(self.data_dir, data_split, 'segm')
 58 |         self.input_img_shape = cfg.MODEL.input_img_shape # should be (256, 256)
 59 | 
 60 |         self.cam_intr = np.array([[480., 0., 128.], [0., 480., 128.],
 61 |                                   [0., 0., 1.]]).astype(np.float32)
 62 |         self.cam_extr = np.array([[1., 0., 0., 0.], [0., -1., 0., 0.],
 63 |                                   [0., 0., -1., 0.]]).astype(np.float32)
 64 | 
 65 |         self.joint_set = {'hand': \
 66 |                             {'joint_num': 21, # single hand
 67 |                             'joints_name': ('Wrist', 'Thumb_1', 'Thumb_2', 'Thumb_3', 'Thumb_4', 'Index_1', 'Index_2', 'Index_3', 'Index_4', 'Middle_1', 'Middle_2', 'Middle_3', 'Middle_4', 'Ring_1', 'Ring_2', 'Ring_3', 'Ring_4', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Pinky_4'),
 68 |                             'flip_pairs': ()
 69 |                             }
 70 |                         }
 71 |         self.joint_set['hand']['root_joint_idx'] = self.joint_set['hand']['joints_name'].index('Wrist')
 72 | 
 73 |         # Organize db id based on split
 74 |         self.db = COCO(self.anno_file)
 75 |         self.start_point = 0
 76 |         self.end_point = len(self.split)
 77 |         self.length = self.end_point - self.start_point
 78 | 
 79 |         # Sort contact by difficulty (Balanced contact sampling)
 80 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 81 |             sample_id_to_split_id = {}
 82 |             for split_idx in range(len(self.split)):
 83 |                 each_sample_id, _ = get_sample_id(self.db, self.split, split_idx)
 84 |                 if each_sample_id in sample_id_to_split_id:
 85 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 86 |                 else:
 87 |                     sample_id_to_split_id[each_sample_id] = self.split[split_idx]
 88 | 
 89 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 90 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path)
 91 | 
 92 |             new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list]
 93 |             self.split = new_split
 94 | 
 95 | 
 96 |     def __len__(self):
 97 |         return len(self.split)
 98 | 
 99 | 
100 |     def __getitem__(self, index):
101 |         sample_id, img_data = get_sample_id(self.db, self.split, index)
102 | 
103 |         # Base path
104 |         img_path = os.path.join(self.img_source, img_data['file_name'] + '.jpg')
105 |         seg_path = os.path.join(self.seg_source, img_data['file_name'] + '.png')
106 | 
107 |         # Full image
108 |         orig_img = load_img(img_path)
109 |         orig_img_shape = orig_img.shape[:2]
110 | 
111 |         mano_valid = np.ones((1), dtype=np.float32)
112 | 
113 | 
114 |         ################################## LOAD ANNOTATION DATA #####################################
115 |         bbox_ho = np.array([0, 0, 256, 256])
116 |         
117 |         contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32)
118 |         contact_data = dict(contact_h=contact_h)
119 |         contact_h = contact_data['contact_h']
120 |         ################################## LOAD ANNOTATION DATA #####################################
121 | 
122 | 
123 |         ############################### PROCESS CROP AND AUGMENTATION ################################
124 |         # Crop image
125 |         img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_ho, self.data_split, enforce_flip=False)
126 |         crop_img = img.copy()  
127 | 
128 |         # Transform for 3D HMR
129 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
130 |             img = self.transform(img.astype(np.float32)/255.0)
131 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
132 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
133 |             img = img.transpose(2, 0, 1) / 255.0
134 |             img = normalize_img(torch.from_numpy(img)).float()
135 |         else:
136 |             raise NotImplementedError
137 |         ############################### PROCESS CROP AND AUGMENTATION ################################
138 | 
139 | 
140 |         input_data = dict(image=img)
141 |         targets_data = dict(contact_data=contact_data)
142 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
143 | 
144 | 
145 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------
/data/Hi4D/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import pickle
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from torchvision.transforms import Normalize
  9 | 
 10 | from lib.core.config import cfg
 11 | from lib.utils.human_models import mano
 12 | from lib.utils.func_utils import load_img
 13 | from lib.utils.preprocessing import augmentation_contact
 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id
 15 | 
 16 | 
 17 | def get_sample_id(db, db_pid, index):
 18 |     aid = db[index]
 19 |     pid = db_pid[index]
 20 |     pair_name = aid.split('/')[-5]
 21 |     action_name = aid.split('/')[-4]
 22 |     cam_name = aid.split('/')[-2]
 23 |     img_name = aid.split('/')[-1].split('.jpg')[0]
 24 |     sample_id = f'{pair_name}-{action_name}-{cam_name}-{img_name}-{pid}'
 25 |     return sample_id
 26 | 
 27 | 
 28 | 
 29 | # Main dataloader code for Hi4D dataset
 30 | class Hi4D(Dataset):
 31 |     def __init__(self, transform, data_split):
 32 |         super(Hi4D, self).__init__()
 33 |         self.__dict__.update(locals())
 34 | 
 35 |         self.transform = transform
 36 |         dataset_name = 'hi4d'
 37 | 
 38 |         self.data_split = data_split
 39 |         self.root_path = root_path = os.path.join('data', 'Hi4D')
 40 |         self.data_dir = os.path.join(self.root_path, 'data')
 41 | 
 42 |         self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data')
 43 |         self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data')
 44 | 
 45 |         # SMPL-X to MANO mapping
 46 |         smpl_smplx_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smpl_to_smplx.pkl')
 47 |         smplx_mano_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smplx_to_mano.pkl')
 48 | 
 49 |         with open(smpl_smplx_mapping_path, 'rb') as f:
 50 |             self.smpl_to_smplx_mapping = pickle.load(f)
 51 | 
 52 |         with open(smplx_mano_mapping_path, 'rb') as f:
 53 |             self.smplx_to_mano_mapping = pickle.load(f)
 54 |             self.smplx_to_mano_mapping_r = self.smplx_to_mano_mapping["right_hand"]
 55 | 
 56 |         # Organize db id based on split
 57 |         split_db_id_file_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json')
 58 |         with open(split_db_id_file_path, 'r') as f:
 59 |             self.db = json.load(f)
 60 |         split_db_pid_file_path = os.path.join(self.root_path, 'splits', f'{self.data_split}_pid.json')
 61 |         with open(split_db_pid_file_path, 'r') as f:
 62 |             self.db_pid = json.load(f)
 63 |         
 64 |         # Sort contact by difficulty (Balanced contact sampling)
 65 |         if self.data_split == 'train' and cfg.MODEL.balanced_sampling:
 66 |             sample_id_to_db_id = {}
 67 |             for db_idx in range(len(self.db)):
 68 |                 each_sample_id = get_sample_id(self.db, self.db_pid, db_idx)
 69 |                 if each_sample_id in sample_id_to_db_id:
 70 |                     raise KeyError(f"Key '{key}' already exists in the dictionary.")
 71 |                 else:
 72 |                     sample_id_to_db_id[each_sample_id] = self.db[db_idx]
 73 | 
 74 |             contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy')
 75 |             sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_db_id, self.contact_data_path, contact_means_path)
 76 | 
 77 |             new_db = [sample_id_to_db_id[key] for key in sample_id_difficulty_list]
 78 |             self.db = new_db
 79 | 
 80 | 
 81 |     def __len__(self):
 82 |         return len(self.db)
 83 | 
 84 | 
 85 |     def __getitem__(self, index):
 86 |         aid = self.db[index]
 87 |         pid = self.db_pid[index]
 88 |         pair_name = aid.split('/')[-5]
 89 |         action_name = aid.split('/')[-4]
 90 |         cam_name = aid.split('/')[-2]
 91 |         img_name = aid.split('/')[-1].split('.jpg')[0]
 92 |         sample_id = f'{pair_name}-{action_name}-{cam_name}-{img_name}-{pid}'
 93 | 
 94 |         orig_img_path = os.path.join(self.data_dir, pair_name, action_name, 'images', cam_name, f'{img_name}.jpg')
 95 |         
 96 |         orig_img = load_img(orig_img_path)
 97 |         img_shape = orig_img.shape[:2]
 98 |         img_h, img_w = img_shape
 99 | 
100 |         mano_valid = np.ones((1), dtype=np.float32)
101 | 
102 | 
103 |         ################################## LOAD ANNOTATION DATA #####################################
104 |         annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz')
105 | 
106 |         annot_data = np.load(annot_data_path, allow_pickle=True)
107 |         mano_r_contact_0 = annot_data['mano_r_contact_0']
108 |         mano_r_contact_1 = annot_data['mano_r_contact_1']
109 |         bbox_hand_r_0 = annot_data['bbox_hand_r_0']
110 |         bbox_hand_r_1 = annot_data['bbox_hand_r_1']
111 |         
112 |         if pid == 0:
113 |             contact_h = mano_r_contact_0.astype(np.float32)
114 |         else:
115 |             contact_h = mano_r_contact_1.astype(np.float32)
116 | 
117 |         contact_data = dict(contact_h=contact_h)
118 |         ################################## LOAD ANNOTATION DATA #####################################
119 | 
120 | 
121 |         ############################### PROCESS CROP AND AUGMENTATION ################################
122 |         if pid == 0:
123 |             img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r_0, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
124 |         else:
125 |             img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r_1, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING
126 |         crop_img = img.copy()
127 | 
128 |         # Transform for 3D HMR
129 |         if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type):
130 |             img = self.transform(img.astype(np.float32)/255.0)
131 |         elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type):
132 |             normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
133 |             img = img.transpose(2, 0, 1) / 255.0
134 |             img = normalize_img(torch.from_numpy(img)).float()
135 |         else:
136 |             raise NotImplementedError
137 |         ############################### PROCESS CROP AND AUGMENTATION ################################
138 | 
139 | 
140 |         input_data = dict(image=img)
141 |         targets_data = dict(contact_data=contact_data)
142 |         meta_info = dict(sample_id=sample_id, mano_valid=mano_valid)
143 | 
144 | 
145 |         return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info)


--------------------------------------------------------------------------------