├── asset ├── teaser.png ├── logo_cvlab.png └── example_images │ ├── using_pen1.jpg │ ├── holding_cup1.jpg │ ├── holding_cup2.jpg │ ├── holding_hammer1.jpg │ ├── squidgame_demo2.png │ └── holding_scissors2.jpg ├── scripts ├── download_official_decaf.sh ├── download_official_ycb_models.sh ├── download_official_hoi4d.sh ├── download_demo_base_data.sh ├── download_train_base_data.sh ├── download_demo_example_videos.sh ├── download_official_mow.sh ├── download_official_prox.sh ├── download_official_hi4d.sh ├── download_gsdf_obman.sh ├── download_train_preprocessed_data.sh ├── download_haco_checkpoints.sh ├── download_official_hic.sh ├── download_initial_checkpoints.sh ├── extract_official_hi4d.sh ├── download_official_interhand26m.sh ├── download_official_dexycb.sh └── download_official_rich.sh ├── requirements.txt ├── docs ├── data_demo.md └── data_eval.md ├── lib ├── utils │ ├── log_utils.py │ ├── smplx │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── README.md │ │ │ ├── clean_ch.py │ │ │ └── merge_smplh_mano.py │ │ ├── smplx │ │ │ ├── __init__.py │ │ │ ├── vertex_ids.py │ │ │ ├── vertex_joint_selector.py │ │ │ ├── utils.py │ │ │ └── joint_names.py │ │ ├── setup.py │ │ ├── examples │ │ │ ├── vis_flame_vertices.py │ │ │ └── vis_mano_vertices.py │ │ └── LICENSE │ ├── eval_utils.py │ ├── contact_utils.py │ ├── func_utils.py │ ├── human_models.py │ ├── mano_utils.py │ ├── demo_utils.py │ └── transforms.py ├── models │ ├── backbone │ │ ├── vit.py │ │ └── resnet.py │ └── model.py └── core │ ├── logger.py │ ├── base.py │ └── config.py ├── data ├── DexYCB │ └── toolkit │ │ ├── factory.py │ │ ├── logging.py │ │ ├── layers │ │ ├── mano_layer.py │ │ ├── mano_group_layer.py │ │ ├── ycb_group_layer.py │ │ └── ycb_layer.py │ │ └── obj.py ├── dataset.py ├── MOW │ └── dataset.py ├── PROX │ └── dataset.py ├── HIC │ └── dataset.py ├── H2O │ └── dataset.py ├── HOI4D │ └── dataset.py ├── ARCTIC │ └── dataset.py ├── HO3D │ └── dataset.py ├── Decaf │ └── dataset.py ├── RICH │ └── dataset.py ├── ObMan │ └── dataset.py └── Hi4D │ └── dataset.py ├── .gitignore ├── test.py ├── demo.py └── demo_video.py /asset/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/teaser.png -------------------------------------------------------------------------------- /asset/logo_cvlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/logo_cvlab.png -------------------------------------------------------------------------------- /asset/example_images/using_pen1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/using_pen1.jpg -------------------------------------------------------------------------------- /asset/example_images/holding_cup1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_cup1.jpg -------------------------------------------------------------------------------- /asset/example_images/holding_cup2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_cup2.jpg -------------------------------------------------------------------------------- /asset/example_images/holding_hammer1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_hammer1.jpg -------------------------------------------------------------------------------- /asset/example_images/squidgame_demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/squidgame_demo2.png -------------------------------------------------------------------------------- /asset/example_images/holding_scissors2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqj5182/HACO_RELEASE/HEAD/asset/example_images/holding_scissors2.jpg -------------------------------------------------------------------------------- /scripts/download_official_decaf.sh: -------------------------------------------------------------------------------- 1 | mkdir -p data/Decaf 2 | wget -P data/Decaf https://vcai.mpi-inf.mpg.de/projects/Decaf/static/DecafDataset.zip 3 | unzip data/Decaf/DecafDataset.zip -d data/Decaf 4 | mv data/Decaf/DecafDataset data/Decaf/data 5 | rm -f data/Decaf/DecafDataset.zip -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | easydict 2 | tqdm 3 | pytz 4 | opencv-python 5 | trimesh 6 | rtree 7 | point-cloud-utils 8 | chumpy 9 | pyyaml 10 | plyfile 11 | einops 12 | timm==1.0.14 13 | smplx 14 | gdown 15 | mediapipe 16 | pyrender==0.1.33 17 | huggingface_hub 18 | pycocotools 19 | ultralytics 20 | rtree -------------------------------------------------------------------------------- /docs/data_demo.md: -------------------------------------------------------------------------------- 1 | ## Data 2 | You need to follow directory structure of the `data` as below. 3 | ``` 4 | ${ROOT} 5 | |-- data 6 | | |-- base_data 7 | ``` 8 | * Download `base_data` from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data/blob/main/demo/data/base_data.tar.gz) by running: 9 | ``` 10 | bash scripts/download_demo_base_data.sh 11 | ``` -------------------------------------------------------------------------------- /lib/utils/log_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | from datetime import datetime 5 | from pytz import timezone 6 | 7 | 8 | def get_datetime(timezone_name='Asia/Seoul'): 9 | datetime_out = datetime.now(timezone(timezone_name)).strftime('%Y-%m-%d_%H:%M:%S') 10 | return datetime_out 11 | 12 | 13 | def init_dirs(dir_list): 14 | for dir in dir_list: 15 | if os.path.exists(dir) and os.path.isdir(dir): 16 | shutil.rmtree(dir) 17 | os.makedirs(dir) -------------------------------------------------------------------------------- /scripts/download_official_ycb_models.sh: -------------------------------------------------------------------------------- 1 | # Create target directory for DexYCB 2 | mkdir -p data/DexYCB/data 3 | gdown https://drive.google.com/uc?id=1gmcDD-5bkJfcMKLZb3zGgH_HUFbulQWu -O data/DexYCB/data/YCB_Video_Models.zip 4 | 5 | # Unzip in DexYCB 6 | unzip data/DexYCB/data/YCB_Video_Models.zip -d data/DexYCB/data 7 | rm data/DexYCB/data/YCB_Video_Models.zip 8 | 9 | # Copy to H2O3D 10 | mkdir -p data/H2O3D/YCB_object_models 11 | cp -r data/DexYCB/data/models data/H2O3D/YCB_object_models/models -------------------------------------------------------------------------------- /scripts/download_official_hoi4d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Target directory 4 | target_dir="data/HOI4D/data/datalist" 5 | mkdir -p "$target_dir" 6 | 7 | # Download files 8 | wget https://raw.githubusercontent.com/leolyliu/HOI4D-Instructions/main/prepare_4Dseg/datalists/train_all.txt -O "$target_dir/train_all.txt" 9 | wget https://raw.githubusercontent.com/leolyliu/HOI4D-Instructions/main/prepare_4Dseg/datalists/test_all.txt -O "$target_dir/test_all.txt" 10 | 11 | echo "Download complete: Files saved to $target_dir" -------------------------------------------------------------------------------- /scripts/download_demo_base_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET_DIR="data" 4 | FILE_URL="https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/demo/data/base_data.tar.gz" 5 | ARCHIVE_NAME="$TARGET_DIR/base_data.tar.gz" 6 | 7 | mkdir -p "$TARGET_DIR" 8 | 9 | echo "Downloading base_data.tar.gz..." 10 | wget -c "$FILE_URL" -O "$ARCHIVE_NAME" 11 | 12 | echo "Decompressing into $TARGET_DIR..." 13 | tar -xvzf "$ARCHIVE_NAME" -C "$TARGET_DIR" 14 | 15 | echo "Removing archive..." 16 | rm "$ARCHIVE_NAME" 17 | 18 | echo "Done. Extracted to $TARGET_DIR" -------------------------------------------------------------------------------- /scripts/download_train_base_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET_DIR="data" 4 | FILE_URL="https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/train/data/base_data.tar.gz" 5 | ARCHIVE_NAME="$TARGET_DIR/base_data.tar.gz" 6 | 7 | mkdir -p "$TARGET_DIR" 8 | 9 | echo "Downloading base_data.tar.gz..." 10 | wget -c "$FILE_URL" -O "$ARCHIVE_NAME" 11 | 12 | echo "Decompressing into $TARGET_DIR..." 13 | tar -xvzf "$ARCHIVE_NAME" -C "$TARGET_DIR" 14 | 15 | echo "Removing archive..." 16 | rm "$ARCHIVE_NAME" 17 | 18 | echo "Done. Extracted to $TARGET_DIR" -------------------------------------------------------------------------------- /scripts/download_demo_example_videos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET_DIR="asset" 4 | FILE_URL="https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/demo/asset/example_videos.zip" 5 | ARCHIVE_NAME="$TARGET_DIR/example_videos.zip" 6 | 7 | mkdir -p "$TARGET_DIR" 8 | 9 | echo "Downloading example_videos.zip..." 10 | wget -c "$FILE_URL" -O "$ARCHIVE_NAME" 11 | 12 | echo "Unzipping into $TARGET_DIR..." 13 | unzip -o "$ARCHIVE_NAME" -d "$TARGET_DIR" 14 | 15 | # Remove zip after extraction 16 | rm "$ARCHIVE_NAME" 17 | 18 | echo "Done. Extracted to $TARGET_DIR" -------------------------------------------------------------------------------- /scripts/download_official_mow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Download and extract MOW dataset 5 | mkdir -p data/MOW 6 | wget --show-progress -P data/MOW https://zhec.github.io/rhoi/mow.zip 7 | unzip -q data/MOW/mow.zip -d data/MOW 8 | mkdir -p data/MOW/data 9 | mv data/MOW/mow/images data/MOW/data/ 10 | mv data/MOW/mow/models data/MOW/data/ 11 | rm -rf data/MOW/__MACOSX data/MOW/mow data/MOW/mow.zip 12 | 13 | # Download poses.json 14 | wget --show-progress -O data/MOW/data/poses.json https://raw.githubusercontent.com/ZheC/MOW/b2acbb4fac40acc4c286833da895fc9f23e58bb6/poses.json -------------------------------------------------------------------------------- /lib/utils/smplx/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | import clean_ch 19 | import merge_smplh_mano 20 | -------------------------------------------------------------------------------- /data/DexYCB/toolkit/factory.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Factory method for easily getting datasets by name.""" 6 | 7 | from .dex_ycb import DexYCBDataset 8 | 9 | _sets = {} 10 | 11 | for setup in ('s0', 's1', 's2', 's3'): 12 | for split in ('train', 'val', 'test'): 13 | name = '{}_{}'.format(setup, split) 14 | _sets[name] = (lambda setup=setup, split=split: DexYCBDataset(setup, split)) 15 | 16 | 17 | def get_dataset(name): 18 | """Gets a dataset by name. 19 | 20 | Args: 21 | name: Dataset name. E.g., 's0_test'. 22 | 23 | Returns: 24 | A dataset. 25 | 26 | Raises: 27 | KeyError: If name is not supported. 28 | """ 29 | if name not in _sets: 30 | raise KeyError('Unknown dataset name: {}'.format(name)) 31 | return _sets[name]() 32 | -------------------------------------------------------------------------------- /scripts/download_official_prox.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | urle () { [[ "${1}" ]] || return 1; local LANG=C i x; for (( i = 0; i < ${#1}; i++ )); do x="${1:i:1}"; [[ "${x}" == [a-zA-Z0-9.~-] ]] && echo -n "${x}" || printf '%%%02X' "'${x}"; done; echo; } 4 | 5 | # username and password input 6 | echo -e "\nYou need to register at https://prox.is.tue.mpg.de/" 7 | read -p "Username: " username 8 | read -p "Password: " password 9 | 10 | # Set save directory (hardcoded) 11 | save_dir="data/PROX/data" 12 | 13 | username=$(urle $username) 14 | password=$(urle $password) 15 | 16 | mkdir -p "$save_dir" 17 | 18 | # Download 19 | wget --post-data "username=$username&password=$password" \ 20 | 'https://download.is.tue.mpg.de/download.php?domain=prox&resume=1&sfile=quantitative.zip' \ 21 | -O "$save_dir/quantitative.zip" \ 22 | --no-check-certificate --continue 23 | 24 | unzip data/PROX/data/quantitative.zip -d data/PROX/data 25 | rm -f data/PROX/data/quantitative.zip -------------------------------------------------------------------------------- /lib/utils/smplx/tools/README.md: -------------------------------------------------------------------------------- 1 | ## Removing Chumpy objects 2 | 3 | In a Python 2 virtual environment with [Chumpy](https://github.com/mattloper/chumpy) installed run the following to remove any Chumpy objects from the model data: 4 | 5 | ```bash 6 | python tools/clean_ch.py --input-models path-to-models/*.pkl --output-folder output-folder 7 | ``` 8 | 9 | ## Merging SMPL-H and MANO parameters 10 | 11 | In order to use the given PyTorch SMPL-H module we first need to merge the SMPL-H and MANO parameters in a single file. After agreeing to the license and downloading the models, run the following command: 12 | 13 | ```bash 14 | python tools/merge_smplh_mano.py --smplh-fn SMPLH_FOLDER/SMPLH_GENDER.pkl \ 15 | --mano-left-fn MANO_FOLDER/MANO_LEFT.pkl \ 16 | --mano-right-fn MANO_FOLDER/MANO_RIGHT.pkl \ 17 | --output-folder OUTPUT_FOLDER 18 | ``` 19 | 20 | where SMPLH_FOLDER is the folder with the SMPL-H files and MANO_FOLDER the one for the MANO files. 21 | -------------------------------------------------------------------------------- /scripts/download_official_hi4d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Ask user for the personalized Hi4D URL (usually https://hi4d.ait.ethz.ch/download.php?dt=...) 4 | read -p "Enter your personal Hi4D download URL: " URL 5 | 6 | # Set target directory 7 | TARGET_DIR="data/Hi4D/data" 8 | mkdir -p "$TARGET_DIR" 9 | cd "$TARGET_DIR" || exit 1 10 | 11 | BASE_URL="https://hi4d.ait.ethz.ch" 12 | 13 | echo "Fetching page content from $URL..." 14 | html=$(curl -s "$URL") 15 | 16 | echo "Extracting .tar.gz links..." 17 | echo "$html" | grep -oP 'href="\K[^"]+\.tar\.gz' > hrefs.txt 18 | 19 | if [ ! -s hrefs.txt ]; then 20 | echo "No .tar.gz links found. Please check your URL or access permissions." 21 | exit 1 22 | fi 23 | 24 | echo "Downloading files into $TARGET_DIR..." 25 | while read -r href; do 26 | filename=$(basename "$href") 27 | full_url="$BASE_URL/$href" 28 | echo "Downloading $filename..." 29 | wget -c "$full_url" -O "$filename" 30 | done < hrefs.txt 31 | 32 | echo "Done. Files downloaded to $TARGET_DIR" -------------------------------------------------------------------------------- /data/DexYCB/toolkit/logging.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Functions for logging.""" 6 | 7 | import logging 8 | import sys 9 | 10 | 11 | def get_logger(log_file): 12 | """Gets a logger given the path to the log file. 13 | 14 | Args: 15 | name: Path to the log file. 16 | 17 | Returns: 18 | A logger. 19 | """ 20 | logger = logging.getLogger() 21 | logger.setLevel(logging.INFO) 22 | 23 | formatter = logging.Formatter('%(asctime)s: %(message)s', '%Y-%m-%d %H:%M:%S') 24 | 25 | stdout_handler = logging.StreamHandler(sys.stdout) 26 | stdout_handler.setLevel(logging.INFO) 27 | stdout_handler.setFormatter(formatter) 28 | logger.addHandler(stdout_handler) 29 | 30 | file_handler = logging.FileHandler(log_file, mode='w') 31 | file_handler.setLevel(logging.INFO) 32 | file_handler.setFormatter(formatter) 33 | logger.addHandler(file_handler) 34 | 35 | return logger 36 | -------------------------------------------------------------------------------- /lib/utils/smplx/smplx/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from .body_models import ( 18 | create, 19 | SMPL, 20 | SMPLH, 21 | SMPLX, 22 | MANO, 23 | FLAME, 24 | build_layer, 25 | SMPLLayer, 26 | SMPLHLayer, 27 | SMPLXLayer, 28 | MANOLayer, 29 | FLAMELayer, 30 | ) 31 | -------------------------------------------------------------------------------- /scripts/download_gsdf_obman.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############ Download ObMan Annotations and Splits ############ 4 | # Set target directories 5 | ann_dir="data/ObMan/annotations" 6 | splits_dir="data/ObMan/splits" 7 | 8 | # Create directories 9 | mkdir -p "$ann_dir" 10 | mkdir -p "$splits_dir" 11 | 12 | # Download annotations folder from Google Drive 13 | echo "Downloading annotations to $ann_dir ..." 14 | gdown --folder https://drive.google.com/drive/folders/1DBzG9J0uLzCy4A6W6Uq6Aq4JNAHiiNJQ -O "$ann_dir" 15 | 16 | # Download split JSON files from GitHub 17 | echo "Downloading train/test split files to $splits_dir ..." 18 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/obman/splits/train_87k.json -O "$splits_dir/train_87k.json" 19 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/obman/splits/test_6k.json -O "$splits_dir/test_6k.json" 20 | 21 | echo "ObMan annotations and splits successfully downloaded." 22 | ############ End of ObMan setup ############ -------------------------------------------------------------------------------- /scripts/download_train_preprocessed_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set directories 4 | TEMP_DIR="temp_haco_train_data" 5 | TARGET_DIR="data" 6 | 7 | # Initialize git-lfs and clone dataset repo 8 | git lfs install 9 | git clone https://huggingface.co/datasets/dqj5182/haco-data "$TEMP_DIR" 10 | 11 | # --------- Extract all .tar.gz in TEMP_DIR BEFORE moving --------- 12 | echo "Extracting .tar.gz files inside $TEMP_DIR..." 13 | 14 | find "$TEMP_DIR" -type f -name "*.tar.gz" | while read -r file; do 15 | echo "Extracting: $file" 16 | dir=$(dirname "$file") 17 | tar -xzf "$file" -C "$dir" 18 | if [ $? -eq 0 ]; then 19 | echo "Successfully extracted: $file" 20 | rm "$file" 21 | else 22 | echo "Failed to extract: $file" 23 | fi 24 | done 25 | 26 | # Create target directory if needed 27 | mkdir -p "$TARGET_DIR" 28 | 29 | # Now sync only the extracted contents (excluding .tar.gz) 30 | rsync -av --exclude='*.tar.gz' "$TEMP_DIR/train/data/" "$TARGET_DIR/" 31 | 32 | # Clean up temporary cloned repo 33 | rm -rf "$TEMP_DIR" 34 | 35 | echo "All extracted data moved to $TARGET_DIR" -------------------------------------------------------------------------------- /scripts/download_haco_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Target directory 4 | TARGET_DIR="release_checkpoint" 5 | mkdir -p "$TARGET_DIR" 6 | 7 | # Base URL of the Hugging Face dataset repo (using 'resolve/main') 8 | BASE_URL="https://huggingface.co/datasets/dqj5182/haco-checkpoints/resolve/main/NeurIPS2025" 9 | 10 | # List of files to download (add more as needed) 11 | FILES=( 12 | "haco_neurips_hamer_checkpoint.ckpt" 13 | "haco_neurips_handoccnet_checkpoint.ckpt" 14 | "haco_neurips_vit_l_checkpoint.ckpt" 15 | "haco_neurips_vit_b_checkpoint.ckpt" 16 | "haco_neurips_vit_s_checkpoint.ckpt" 17 | "haco_neurips_hrnet_w48_checkpoint.ckpt" 18 | "haco_neurips_hrnet_w32_checkpoint.ckpt" 19 | "haco_neurips_resnet_152_checkpoint.ckpt" 20 | "haco_neurips_resnet_50_checkpoint.ckpt" 21 | "haco_neurips_resnet_101_checkpoint.ckpt" 22 | "haco_neurips_resnet_34_checkpoint.ckpt" 23 | "haco_neurips_resnet_18_checkpoint.ckpt" 24 | ) 25 | 26 | # Download each file directly to the target directory 27 | for file in "${FILES[@]}"; do 28 | echo "Downloading $file to $TARGET_DIR..." 29 | wget -c "$BASE_URL/$file" -O "$TARGET_DIR/$file" 30 | done 31 | 32 | echo "All files downloaded to $TARGET_DIR" -------------------------------------------------------------------------------- /lib/models/backbone/vit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | import torch.nn as nn 3 | 4 | 5 | class ViTBackbone(nn.Module): 6 | def __init__(self, model_name='vit_base_patch16_224', pretrained=True, return_cls=False): 7 | """ 8 | Args: 9 | model_name (str): 'vit_base_patch16_224' or 'vit_large_patch16_224' 10 | pretrained (bool): load pretrained weights from timm 11 | return_cls (bool): if True, return CLS token instead of patch tokens 12 | """ 13 | super().__init__() 14 | self.return_cls = return_cls 15 | 16 | # Load model with no classification head 17 | self.vit = timm.create_model(model_name, pretrained=pretrained, num_classes=0) 18 | 19 | # Get dimensions 20 | self.embed_dim = self.vit.embed_dim # 768 for B/16, 1024 for L/16 21 | self.patch_size = self.vit.patch_embed.patch_size 22 | 23 | def forward(self, x): 24 | # Features includes CLS + patch tokens: [B, 1 + N, D] 25 | x = self.vit.forward_features(x) 26 | 27 | if self.return_cls: 28 | return x[:, 0] # [B, D] – CLS token 29 | else: 30 | patch_tokens = x[:, 1:] # [B, N, D] 31 | B, N, D = patch_tokens.shape 32 | H = W = int(N ** 0.5) 33 | return patch_tokens.view(B, D, H, W) # [B, H, W, D] -------------------------------------------------------------------------------- /scripts/download_official_hic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set target directory 4 | save_dir="data/HIC/data" 5 | mkdir -p "$save_dir" 6 | cd "$save_dir" || exit 7 | 8 | # Download and unzip Hand_Hand sequences 9 | for seq_idx in 01 02 03 04 05 06 07 08 09 10 11; do 10 | echo "Downloading Hand_Hand sequence $seq_idx..." 11 | wget http://files.is.tue.mpg.de/dtzionas/Hand-Object-Capture/Dataset/Hand_Hand___All_Files/"$seq_idx".zip 12 | unzip "$seq_idx".zip 13 | rm "$seq_idx".zip 14 | done 15 | 16 | # Download and unzip Hand_Object sequences 17 | for seq_idx in 15 16 17 18 19 20 21; do 18 | echo "Downloading Hand_Object sequence $seq_idx..." 19 | wget http://files.is.tue.mpg.de/dtzionas/Hand-Object-Capture/Dataset/Hand_Object___All_Files/"$seq_idx".zip 20 | unzip "$seq_idx".zip 21 | rm "$seq_idx".zip 22 | done 23 | 24 | # Download the MANO-compatible parameter file 25 | echo "Downloading MANO-compatible parameter file..." 26 | wget http://files.is.tue.mpg.de/dtzionas/Hand-Object-Capture/Dataset/MANO_compatible/IJCV16___Results_MANO___parms_for___joints21.zip 27 | unzip IJCV16___Results_MANO___parms_for___joints21.zip 28 | rm IJCV16___Results_MANO___parms_for___joints21.zip 29 | 30 | echo "All files downloaded, unzipped, and cleaned up in $save_dir." 31 | 32 | # Download HIC.json 33 | gdown https://drive.google.com/uc?id=1oqquzJ7DY728M8zQoCYvvuZEBh8L8zkQ -O data/HIC/data/HIC.json -------------------------------------------------------------------------------- /scripts/download_initial_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # HaMeR 5 | mkdir -p data/base_data/pretrained_models/hamer 6 | wget -P data/base_data/pretrained_models/hamer https://www.cs.utexas.edu/~pavlakos/hamer/data/hamer_demo_data.tar.gz 7 | tar -xzf data/base_data/pretrained_models/hamer/hamer_demo_data.tar.gz -C data/base_data/pretrained_models/hamer 8 | mv data/base_data/pretrained_models/hamer/_DATA/hamer_ckpts/checkpoints/hamer.ckpt data/base_data/pretrained_models/hamer/hamer.ckpt 9 | rm -rf data/base_data/pretrained_models/hamer/hamer_demo_data.tar.gz data/base_data/pretrained_models/hamer/_DATA 10 | 11 | # HandOccNet 12 | mkdir -p data/base_data/pretrained_models/handoccnet 13 | gdown https://drive.google.com/uc?id=1JXOcWgn6Bx173BhDH99EH6sZ7oOW05Hh -O data/base_data/pretrained_models/handoccnet/snapshot_demo.pth.tar 14 | 15 | # HRNet 16 | mkdir -p data/base_data/pretrained_models/hrnet 17 | gdown https://drive.google.com/uc?id=1aTXmxKAJVLsXbvM-TmQ0ZjJxP868G73q -O data/base_data/pretrained_models/hrnet/hrnet_w32-36af842e.pth 18 | gdown https://drive.google.com/uc?id=1qm5-QfHTz5Ia71ByZ1Haq5zJpyEbZRoc -O data/base_data/pretrained_models/hrnet/hrnet_w48-8ef0771d.pth 19 | 20 | # Pose2Pose 21 | mkdir -p data/base_data/pretrained_models/pose2pose/hand 22 | gdown https://drive.google.com/uc?id=15wYR8psO2U3ZhFYQEH1-DWc81XkWvK2Y -O data/base_data/pretrained_models/pose2pose/hand/snapshot_12.pth.tar 23 | -------------------------------------------------------------------------------- /data/dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from torch.utils.data.dataset import Dataset 4 | 5 | 6 | class MultipleDatasets(Dataset): 7 | def __init__(self, dbs, make_same_len=True): 8 | self.dbs = dbs 9 | self.db_num = len(self.dbs) 10 | self.max_db_data_num = max([len(db) for db in dbs]) 11 | self.db_len_cumsum = np.cumsum([len(db) for db in dbs]) 12 | self.make_same_len = make_same_len 13 | 14 | def __len__(self): 15 | # all dbs have the same length 16 | if self.make_same_len: 17 | return self.max_db_data_num * self.db_num 18 | # each db has different length 19 | else: 20 | return sum([len(db) for db in self.dbs]) 21 | 22 | def __getitem__(self, index): 23 | if self.make_same_len: 24 | db_idx = index // self.max_db_data_num 25 | data_idx = index % self.max_db_data_num 26 | if data_idx >= len(self.dbs[db_idx]) * (self.max_db_data_num // len(self.dbs[db_idx])): # last batch: random sampling 27 | data_idx = random.randint(0,len(self.dbs[db_idx])-1) 28 | else: # before last batch: use modular 29 | data_idx = data_idx % len(self.dbs[db_idx]) 30 | else: 31 | for i in range(self.db_num): 32 | if index < self.db_len_cumsum[i]: 33 | db_idx = i 34 | break 35 | if db_idx == 0: 36 | data_idx = index 37 | else: 38 | data_idx = index - self.db_len_cumsum[db_idx-1] 39 | 40 | return self.dbs[db_idx][data_idx] -------------------------------------------------------------------------------- /scripts/extract_official_hi4d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Usage: 4 | # ./extract_tar_gz.sh /path/to/input_dir /path/to/destination_dir 5 | # If destination_dir is omitted, files are extracted to the input_dir 6 | 7 | # List of tar.gz files that do NOT have a root directory 8 | no_root_path_pair_list=("pair00_1" "pair19_1" "pair19_2" "pair32_1" "pair32_2") 9 | 10 | input_dir="data/Hi4D/data" 11 | destination_dir="data/Hi4D/data" 12 | 13 | # If destination is not specified, use input directory 14 | if [ -z "$destination_dir" ]; then 15 | destination_dir="$input_dir" 16 | fi 17 | 18 | # Check input directory exists 19 | if [ ! -d "$input_dir" ]; then 20 | echo "Error: Input directory '$input_dir' does not exist." 21 | exit 1 22 | fi 23 | 24 | # Create destination directory if it doesn't exist 25 | mkdir -p "$destination_dir" 26 | 27 | # Loop through all .tar.gz files in the input directory 28 | for file_path in "$input_dir"/*.tar.gz; do 29 | [ -e "$file_path" ] || continue # Skip if no files found 30 | 31 | file_name=$(basename "$file_path") 32 | pair_name="${file_name%.tar.gz}" 33 | 34 | echo "Processing $file_name..." 35 | 36 | # Check if this file is in the no_root_path list 37 | if [[ " ${no_root_path_pair_list[@]} " =~ " $pair_name " ]]; then 38 | extract_path="$destination_dir/$pair_name" 39 | mkdir -p "$extract_path" 40 | echo " Extracting to $extract_path..." 41 | tar -xzf "$file_path" -C "$extract_path" 42 | else 43 | echo " Extracting to $destination_dir..." 44 | tar -xzf "$file_path" -C "$destination_dir" 45 | fi 46 | 47 | if [ $? -eq 0 ]; then 48 | echo " Successfully extracted $file_name" 49 | else 50 | echo " Failed to extract $file_name" 51 | fi 52 | done -------------------------------------------------------------------------------- /lib/core/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os.path as osp 3 | import warnings 4 | 5 | 6 | warnings.filterwarnings("ignore") 7 | 8 | OK = '\033[92m' 9 | WARNING = '\033[93m' 10 | FAIL = '\033[91m' 11 | END = '\033[0m' 12 | 13 | PINK = '\033[95m' 14 | BLUE = '\033[94m' 15 | GREEN = OK 16 | RED = FAIL 17 | WHITE = END 18 | YELLOW = WARNING 19 | 20 | 21 | class ColorLogger(): 22 | def __init__(self, log_dir, log_name='log.txt'): 23 | # set log 24 | self._logger = logging.getLogger(log_name) 25 | self._logger.setLevel(logging.INFO) 26 | log_file = osp.join(log_dir, log_name) 27 | file_log = logging.FileHandler(log_file, mode='a') 28 | file_log.setLevel(logging.INFO) 29 | console_log = logging.StreamHandler() 30 | console_log.setLevel(logging.INFO) 31 | file_formatter = logging.Formatter( 32 | "%(asctime)s %(message)s", 33 | "%m-%d %H:%M:%S") 34 | console_formatter = logging.Formatter( 35 | "{}%(asctime)s{} %(message)s".format(GREEN, END), 36 | "%m-%d %H:%M:%S") 37 | file_log.setFormatter(file_formatter) 38 | console_log.setFormatter(console_formatter) 39 | self._logger.addHandler(file_log) 40 | self._logger.addHandler(console_log) 41 | 42 | def debug(self, msg): 43 | self._logger.debug(str(msg)) 44 | 45 | def info(self, msg): 46 | self._logger.info(str(msg)) 47 | 48 | def warning(self, msg): 49 | self._logger.warning(WARNING + 'WRN: ' + str(msg) + END) 50 | 51 | def critical(self, msg): 52 | self._logger.critical(RED + 'CRI: ' + str(msg) + END) 53 | 54 | def error(self, msg): 55 | self._logger.error(RED + 'ERR: ' + str(msg) + END) -------------------------------------------------------------------------------- /lib/utils/eval_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def evaluation(outputs, targets_data, meta_info, mode='val', thres=0.5): 6 | eval_out = {} 7 | 8 | # GT 9 | mesh_valid = meta_info['mano_valid'] is not None 10 | 11 | # Pred 12 | contact_pred = outputs['contact_out'].sigmoid()[0].detach().cpu().numpy() 13 | 14 | # Error Calculate 15 | if mesh_valid: 16 | # Contact Metrics 17 | cont_pre, cont_rec, cont_f1 = compute_contact_metrics(targets_data['contact_data']['contact_h'][0].detach().cpu().numpy(), contact_pred, mesh_valid, thres=thres) 18 | eval_out['cont_pre'] = cont_pre 19 | eval_out['cont_rec'] = cont_rec 20 | eval_out['cont_f1'] = cont_f1 21 | 22 | return eval_out 23 | 24 | 25 | def compute_contact_metrics(gt, pred, valid, thres=0.5): 26 | """ 27 | Compute precision, recall, and f1 using NumPy 28 | """ 29 | if valid: 30 | # True Positives 31 | tp_num = np.sum(gt[pred >= thres]) 32 | 33 | # Denominators for precision and recall 34 | precision_denominator = np.sum(pred >= thres) 35 | recall_denominator = np.sum(gt) 36 | 37 | # Compute precision, recall, and F1 score 38 | precision_ = tp_num / precision_denominator if precision_denominator > 0 else None 39 | recall_ = tp_num / recall_denominator if recall_denominator > 0 else None 40 | if precision_ is not None and recall_ is not None and (precision_ + recall_) > 0: 41 | f1_ = 2 * precision_ * recall_ / (precision_ + recall_) 42 | else: 43 | f1_ = None 44 | else: 45 | # If not valid, return None for metrics 46 | precision_ = None 47 | recall_ = None 48 | f1_ = None 49 | 50 | return precision_, recall_, f1_ -------------------------------------------------------------------------------- /data/DexYCB/toolkit/layers/mano_layer.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Wrapper layer for manopth ManoLayer.""" 6 | 7 | import torch 8 | 9 | from torch.nn import Module 10 | from manopth.manolayer import ManoLayer 11 | 12 | 13 | class MANOLayer(Module): 14 | """Wrapper layer for manopth ManoLayer.""" 15 | 16 | def __init__(self, side, betas): 17 | """Constructor. 18 | 19 | Args: 20 | side: MANO hand type. 'right' or 'left'. 21 | betas: A numpy array of shape [10] containing the betas. 22 | """ 23 | super(MANOLayer, self).__init__() 24 | 25 | self._side = side 26 | self._betas = betas 27 | self._mano_layer = ManoLayer(flat_hand_mean=False, 28 | ncomps=45, 29 | side=self._side, 30 | mano_root='manopth/mano/models', 31 | use_pca=True) 32 | 33 | b = torch.from_numpy(self._betas).unsqueeze(0) 34 | f = self._mano_layer.th_faces 35 | self.register_buffer('b', b) 36 | self.register_buffer('f', f) 37 | 38 | v = torch.matmul(self._mano_layer.th_shapedirs, self.b.transpose( 39 | 0, 1)).permute(2, 0, 1) + self._mano_layer.th_v_template 40 | r = torch.matmul(self._mano_layer.th_J_regressor[0], v) 41 | self.register_buffer('root_trans', r) 42 | 43 | def forward(self, p, t): 44 | """Forward function. 45 | 46 | Args: 47 | p: A tensor of shape [B, 48] containing the pose. 48 | t: A tensor of shape [B, 3] containing the trans. 49 | 50 | Returns: 51 | v: A tensor of shape [B, 778, 3] containing the vertices. 52 | j: A tensor of shape [B, 21, 3] containing the joints. 53 | """ 54 | v, j = self._mano_layer(p, self.b.expand(p.size(0), -1), t) 55 | v /= 1000 56 | j /= 1000 57 | return v, j 58 | -------------------------------------------------------------------------------- /lib/utils/contact_utils.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import torch 3 | import numpy as np 4 | from trimesh.proximity import ProximityQuery 5 | 6 | from lib.utils.human_models import mano 7 | 8 | 9 | def get_ho_contact_and_offset(mesh_hand, mesh_obj, c_thres): 10 | # Make sure that meshes are watertight and do not comntain inverted faces 11 | # Typically canonical space meshes are more stable 12 | 13 | pq = ProximityQuery(mesh_obj) 14 | obj_coord_c, dist, obj_coord_c_idx = pq.on_surface(mesh_hand.vertices.astype(np.float32)) 15 | 16 | is_contact_h = (dist < c_thres) 17 | contact_h = (1. * is_contact_h).astype(np.float32) 18 | 19 | contact_valid = np.ones((mano.vertex_num, 1)) 20 | inter_coord_valid = np.ones((mano.vertex_num)) 21 | 22 | # Explicit cleanup 23 | del pq 24 | gc.collect() 25 | 26 | return np.array(contact_h), np.array(obj_coord_c), contact_valid, inter_coord_valid 27 | 28 | 29 | def get_contact_thres(backbone_type='hamer'): 30 | # We select contact threshold that has best balance between precision and recall. 31 | if backbone_type == 'hamer': # seed 314 32 | return 0.52 33 | elif backbone_type == 'vit-l-16': # seed 327 34 | return 0.52 35 | elif backbone_type == 'vit-b-16': # seed 327 36 | return 0.5 37 | elif backbone_type == 'vit-s-16': # seed 314 38 | return 0.6 39 | elif backbone_type == 'handoccnet': # seed 314 40 | return 0.6 41 | elif backbone_type == 'hrnet-w48': # seed 314 42 | return 0.58 43 | elif backbone_type == 'hrnet-w32': # seed 314 44 | return 0.57 45 | elif backbone_type == 'resnet-152': # seed 314 46 | return 0.54 47 | elif backbone_type == 'resnet-101': # seed 314 48 | return 0.52 49 | elif backbone_type == 'resnet-50': # seed 314 50 | return 0.55 51 | elif backbone_type == 'resnet-34': # seed 314 52 | return 0.55 53 | elif backbone_type == 'resnet-18': # seed 314 54 | return 0.62 55 | else: 56 | raise NotImplementedError -------------------------------------------------------------------------------- /scripts/download_official_interhand26m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ########### Download InterHand2.6M images ############ 4 | # Set target directory for images 5 | save_images_dir="data/InterHand26M/images" 6 | mkdir -p "$save_images_dir" 7 | cd "$save_images_dir" || exit 1 8 | 9 | base_url="https://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15.s3.amazonaws.com/InterHand2.6M/InterHand2.6M.images.5.fps.v1.0/" 10 | 11 | # Download all part files 12 | for part1 in a b; do 13 | for part2 in {a..z}; do 14 | if [[ "$part1" == "b" && "$part2" == "s" ]]; then 15 | break 16 | fi 17 | filename="InterHand2.6M.images.5.fps.v1.0.tar.part${part1}${part2}" 18 | echo "Downloading $filename ..." 19 | wget -c "${base_url}${filename}" 20 | done 21 | done 22 | 23 | # Download CHECKSUM and helper scripts 24 | wget -c "${base_url}InterHand2.6M.images.5.fps.v1.0.tar.CHECKSUM" 25 | wget -c "${base_url}unzip.sh" 26 | wget -c "${base_url}verify_download.py" 27 | 28 | # Run verification 29 | echo "Running verify_download.py..." 30 | python3 verify_download.py || { echo "Checksum verification failed"; exit 1; } 31 | 32 | # Run unzip 33 | echo "Running unzip.sh..." 34 | bash unzip.sh || { echo "Unzip failed"; exit 1; } 35 | 36 | cd "../../.." || exit 1 37 | 38 | # Move extracted images into the target directory root 39 | extracted_subdir="$save_images_dir/InterHand2.6M_5fps_batch1/images" 40 | if [ -d "$extracted_subdir" ]; then 41 | echo "Moving images to $save_images_dir ..." 42 | mv "$extracted_subdir"/* "$save_images_dir" 43 | rm -r "$save_images_dir/InterHand2.6M_5fps_batch1" 44 | else 45 | echo "Expected directory $extracted_subdir not found." 46 | exit 1 47 | fi 48 | 49 | echo "InterHand2.6M image data downloaded and extracted to $save_images_dir" 50 | ########### End of image download ############ 51 | 52 | 53 | 54 | ############ Download InterHand2.6M annotations ############ 55 | save_ann_dir="data/InterHand26M/annotations" 56 | mkdir -p "$save_ann_dir" 57 | 58 | echo "Downloading annotations to $save_ann_dir ..." 59 | gdown --folder https://drive.google.com/drive/folders/12RNG9slv9i_TsXSoZ6pQAq-Fa98eGLoy -O "$save_ann_dir" 60 | 61 | # Move contents up if nested under 'annotations' 62 | if [ -d "$save_ann_dir/annotations" ]; then 63 | mv "$save_ann_dir/annotations/"* "$save_ann_dir" 64 | rmdir "$save_ann_dir/annotations" 65 | fi 66 | 67 | echo "InterHand2.6M annotations downloaded to $save_ann_dir" 68 | ############ End of annotations download ############ -------------------------------------------------------------------------------- /lib/utils/smplx/tools/clean_ch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import print_function 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | 22 | import argparse 23 | import os 24 | import os.path as osp 25 | 26 | import pickle 27 | 28 | from tqdm import tqdm 29 | import numpy as np 30 | 31 | 32 | def clean_fn(fn, output_folder='output'): 33 | with open(fn, 'rb') as body_file: 34 | body_data = pickle.load(body_file) 35 | 36 | output_dict = {} 37 | for key, data in body_data.iteritems(): 38 | if 'chumpy' in str(type(data)): 39 | output_dict[key] = np.array(data) 40 | else: 41 | output_dict[key] = data 42 | 43 | out_fn = osp.split(fn)[1] 44 | 45 | out_path = osp.join(output_folder, out_fn) 46 | with open(out_path, 'wb') as out_file: 47 | pickle.dump(output_dict, out_file) 48 | 49 | 50 | if __name__ == '__main__': 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument('--input-models', dest='input_models', nargs='+', 53 | required=True, type=str, 54 | help='The path to the model that will be processed') 55 | parser.add_argument('--output-folder', dest='output_folder', 56 | required=True, type=str, 57 | help='The path to the output folder') 58 | 59 | args = parser.parse_args() 60 | 61 | input_models = args.input_models 62 | output_folder = args.output_folder 63 | if not osp.exists(output_folder): 64 | print('Creating directory: {}'.format(output_folder)) 65 | os.makedirs(output_folder) 66 | 67 | for input_model in input_models: 68 | clean_fn(input_model, output_folder=output_folder) 69 | -------------------------------------------------------------------------------- /lib/utils/smplx/smplx/vertex_ids.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from __future__ import print_function 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | 21 | # Joint name to vertex mapping. SMPL/SMPL-H/SMPL-X vertices that correspond to 22 | # MSCOCO and OpenPose joints 23 | vertex_ids = { 24 | 'smplh': { 25 | 'nose': 332, 26 | 'reye': 6260, 27 | 'leye': 2800, 28 | 'rear': 4071, 29 | 'lear': 583, 30 | 'rthumb': 6191, 31 | 'rindex': 5782, 32 | 'rmiddle': 5905, 33 | 'rring': 6016, 34 | 'rpinky': 6133, 35 | 'lthumb': 2746, 36 | 'lindex': 2319, 37 | 'lmiddle': 2445, 38 | 'lring': 2556, 39 | 'lpinky': 2673, 40 | 'LBigToe': 3216, 41 | 'LSmallToe': 3226, 42 | 'LHeel': 3387, 43 | 'RBigToe': 6617, 44 | 'RSmallToe': 6624, 45 | 'RHeel': 6787 46 | }, 47 | 'smplx': { 48 | 'nose': 9120, 49 | 'reye': 9929, 50 | 'leye': 9448, 51 | 'rear': 616, 52 | 'lear': 6, 53 | 'rthumb': 8079, 54 | 'rindex': 7669, 55 | 'rmiddle': 7794, 56 | 'rring': 7905, 57 | 'rpinky': 8022, 58 | 'lthumb': 5361, 59 | 'lindex': 4933, 60 | 'lmiddle': 5058, 61 | 'lring': 5169, 62 | 'lpinky': 5286, 63 | 'LBigToe': 5770, 64 | 'LSmallToe': 5780, 65 | 'LHeel': 8846, 66 | 'RBigToe': 8463, 67 | 'RSmallToe': 8474, 68 | 'RHeel': 8635 69 | }, 70 | 'mano': { 71 | 'thumb': 744, 72 | 'index': 320, 73 | 'middle': 443, 74 | 'ring': 554, 75 | 'pinky': 671, 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /lib/core/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | 5 | from lib.core.config import cfg 6 | from lib.utils.human_models import mano 7 | from lib.core.loss import VCBLoss, RegLoss, SmoothRegLoss 8 | 9 | V_regressor_336 = torch.tensor(np.load(cfg.MODEL.V_regressor_336_path), dtype=torch.float32) 10 | V_regressor_84 = torch.tensor(np.load(cfg.MODEL.V_regressor_84_path), dtype=torch.float32) 11 | J_regressor = torch.tensor(mano.joint_regressor, dtype=torch.float32) 12 | 13 | # Loss function 14 | vcb_mesh_loss = VCBLoss(v_type='mesh') 15 | vcb_mesh_336_loss = VCBLoss(v_type='mesh_336') 16 | vcb_mesh_84_loss = VCBLoss(v_type='mesh_84') 17 | vcb_joint_loss = VCBLoss(v_type='joint') 18 | reg_loss = RegLoss() 19 | smooth_reg_loss = SmoothRegLoss() 20 | 21 | 22 | def compute_loss(preds, targets, epoch): 23 | total_loss = 0 24 | 25 | batch_size = len(preds['contact_out']) 26 | contact_means = np.load(cfg.MODEL.contact_means_path) 27 | contact_means = torch.tensor(contact_means)[None].repeat(batch_size, 1) 28 | regularization_loss = reg_loss(preds['contact_out'], contact_means) 29 | smooth_regularization_loss = smooth_reg_loss(preds['contact_out'], torch.tensor(mano.layer['right'].faces.astype(np.int32))) 30 | 31 | # Calculate loss 32 | contact_h_mesh = targets['contact_data']['contact_h'] 33 | contact_h_336 = 1 * (torch.mm(contact_h_mesh, V_regressor_336.T) > 0) 34 | contact_h_84 = 1 * (torch.mm(contact_h_mesh, V_regressor_84.T) > 0) 35 | contact_h_joint = 1 * (torch.mm(contact_h_mesh, J_regressor.T) > 0) 36 | 37 | contact_mesh_loss = vcb_mesh_loss(preds['contact_out'], contact_h_mesh, epoch) 38 | contact_336_loss = vcb_mesh_336_loss(preds['contact_336_out'], contact_h_336, epoch) 39 | contact_84_loss = vcb_mesh_84_loss(preds['contact_84_out'], contact_h_84, epoch) 40 | contact_joint_loss = vcb_joint_loss(preds['contact_joint_out'], contact_h_joint, epoch) 41 | contact_loss = contact_mesh_loss + contact_336_loss + contact_84_loss + contact_joint_loss + 0.1 * regularization_loss + smooth_regularization_loss 42 | 43 | total_loss = contact_loss 44 | 45 | loss_dict = dict(total_loss=total_loss, 46 | contact_mesh_loss=contact_mesh_loss, 47 | contact_336_loss=contact_336_loss, 48 | contact_84_loss=contact_84_loss, 49 | contact_joint_loss=contact_joint_loss, 50 | regularization_loss=regularization_loss, 51 | smooth_regularization_loss=smooth_regularization_loss 52 | ) 53 | return total_loss, loss_dict -------------------------------------------------------------------------------- /lib/utils/smplx/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | import io 19 | import os 20 | 21 | from setuptools import setup 22 | 23 | # Package meta-data. 24 | NAME = 'smplx' 25 | DESCRIPTION = 'PyTorch module for loading the SMPLX body model' 26 | URL = 'http://smpl-x.is.tuebingen.mpg.de' 27 | EMAIL = 'vassilis.choutas@tuebingen.mpg.de' 28 | AUTHOR = 'Vassilis Choutas' 29 | REQUIRES_PYTHON = '>=3.6.0' 30 | VERSION = '0.1.21' 31 | 32 | here = os.path.abspath(os.path.dirname(__file__)) 33 | 34 | try: 35 | FileNotFoundError 36 | except NameError: 37 | FileNotFoundError = IOError 38 | 39 | # Import the README and use it as the long-description. 40 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 41 | try: 42 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 43 | long_description = '\n' + f.read() 44 | except FileNotFoundError: 45 | long_description = DESCRIPTION 46 | 47 | # Load the package's __version__.py module as a dictionary. 48 | about = {} 49 | if not VERSION: 50 | with open(os.path.join(here, NAME, '__version__.py')) as f: 51 | exec(f.read(), about) 52 | else: 53 | about['__version__'] = VERSION 54 | 55 | pyrender_reqs = ['pyrender>=0.1.23', 'trimesh>=2.37.6', 'shapely'] 56 | matplotlib_reqs = ['matplotlib'] 57 | open3d_reqs = ['open3d-python'] 58 | 59 | setup(name=NAME, 60 | version=about['__version__'], 61 | description=DESCRIPTION, 62 | long_description=long_description, 63 | long_description_content_type='text/markdown', 64 | author=AUTHOR, 65 | author_email=EMAIL, 66 | python_requires=REQUIRES_PYTHON, 67 | url=URL, 68 | install_requires=[ 69 | 'numpy>=1.16.2', 70 | 'torch>=1.0.1.post2', 71 | 'torchgeometry>=0.1.2' 72 | ], 73 | extras_require={ 74 | 'pyrender': pyrender_reqs, 75 | 'open3d': open3d_reqs, 76 | 'matplotlib': matplotlib_reqs, 77 | 'all': pyrender_reqs + matplotlib_reqs + open3d_reqs 78 | }, 79 | packages=['smplx', 'tools']) 80 | -------------------------------------------------------------------------------- /data/DexYCB/toolkit/layers/mano_group_layer.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Wrapper layer to hold a group of MANOLayers.""" 6 | 7 | import torch 8 | 9 | from torch.nn import Module, ModuleList 10 | 11 | from .mano_layer import MANOLayer 12 | 13 | 14 | class MANOGroupLayer(Module): 15 | """Wrapper layer to hold a group of MANOLayers.""" 16 | 17 | def __init__(self, sides, betas): 18 | """Constructor. 19 | 20 | Args: 21 | sides: A list of MANO sides. 'right' or 'left'. 22 | betas: A list of numpy arrays of shape [10] containing the betas. 23 | """ 24 | super(MANOGroupLayer, self).__init__() 25 | 26 | self._sides = sides 27 | self._betas = betas 28 | self._layers = ModuleList( 29 | [MANOLayer(s, b) for s, b in zip(self._sides, self._betas)]) 30 | self._num_obj = len(self._sides) 31 | 32 | f = [] 33 | for i in range(self._num_obj): 34 | f.append(self._layers[i].f + 778 * i) 35 | f = torch.cat(f) 36 | self.register_buffer('f', f) 37 | 38 | r = torch.cat([l.root_trans for l in self._layers]) 39 | self.register_buffer('root_trans', r) 40 | 41 | @property 42 | def num_obj(self): 43 | return self._num_obj 44 | 45 | def forward(self, p, inds=None): 46 | """Forward function. 47 | 48 | Args: 49 | p: A tensor of shape [B, D] containing the pose vectors. 50 | inds: A list of sub-layer indices. 51 | 52 | Returns: 53 | v: A tensor of shape [B, N, 3] containing the vertices. 54 | j: A tensor of shape [B, J, 3] containing the joints. 55 | """ 56 | if inds is None: 57 | inds = range(self._num_obj) 58 | v = [ 59 | torch.zeros((p.size(0), 0, 3), 60 | dtype=torch.float32, 61 | device=self.f.device) 62 | ] 63 | j = [ 64 | torch.zeros((p.size(0), 0, 3), 65 | dtype=torch.float32, 66 | device=self.f.device) 67 | ] 68 | p, t = self._pose2pt(p) 69 | for i in inds: 70 | y = self._layers[i](p[:, i], t[:, i]) 71 | v.append(y[0]) 72 | j.append(y[1]) 73 | v = torch.cat(v, dim=1) 74 | j = torch.cat(j, dim=1) 75 | return v, j 76 | 77 | def _pose2pt(self, pose): 78 | """Extracts pose and trans from pose vectors. 79 | 80 | Args: 81 | pose: A tensor of shape [B, D] containing the pose vectors. 82 | 83 | Returns: 84 | p: A tensor of shape [B, O, 48] containing the pose. 85 | t: A tensor of shape [B, O, 3] containing the trans. 86 | """ 87 | p = torch.stack( 88 | [pose[:, 51 * i + 0:51 * i + 48] for i in range(self._num_obj)], dim=1) 89 | t = torch.stack( 90 | [pose[:, 51 * i + 48:51 * i + 51] for i in range(self._num_obj)], dim=1) 91 | return p, t 92 | -------------------------------------------------------------------------------- /lib/utils/smplx/smplx/vertex_joint_selector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from __future__ import absolute_import 18 | from __future__ import print_function 19 | from __future__ import division 20 | 21 | import numpy as np 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from .utils import to_tensor 27 | 28 | 29 | class VertexJointSelector(nn.Module): 30 | 31 | def __init__(self, vertex_ids=None, 32 | use_hands=True, 33 | use_feet_keypoints=True, **kwargs): 34 | super(VertexJointSelector, self).__init__() 35 | 36 | extra_joints_idxs = [] 37 | 38 | face_keyp_idxs = np.array([ 39 | vertex_ids['nose'], 40 | vertex_ids['reye'], 41 | vertex_ids['leye'], 42 | vertex_ids['rear'], 43 | vertex_ids['lear']], dtype=np.int64) 44 | 45 | extra_joints_idxs = np.concatenate([extra_joints_idxs, 46 | face_keyp_idxs]) 47 | 48 | if use_feet_keypoints: 49 | feet_keyp_idxs = np.array([vertex_ids['LBigToe'], 50 | vertex_ids['LSmallToe'], 51 | vertex_ids['LHeel'], 52 | vertex_ids['RBigToe'], 53 | vertex_ids['RSmallToe'], 54 | vertex_ids['RHeel']], dtype=np.int32) 55 | 56 | extra_joints_idxs = np.concatenate( 57 | [extra_joints_idxs, feet_keyp_idxs]) 58 | 59 | if use_hands: 60 | self.tip_names = ['thumb', 'index', 'middle', 'ring', 'pinky'] 61 | 62 | tips_idxs = [] 63 | for hand_id in ['l', 'r']: 64 | for tip_name in self.tip_names: 65 | tips_idxs.append(vertex_ids[hand_id + tip_name]) 66 | 67 | extra_joints_idxs = np.concatenate( 68 | [extra_joints_idxs, tips_idxs]) 69 | 70 | self.register_buffer('extra_joints_idxs', 71 | to_tensor(extra_joints_idxs, dtype=torch.long)) 72 | 73 | def forward(self, vertices, joints): 74 | extra_joints = torch.index_select(vertices, 1, self.extra_joints_idxs) 75 | joints = torch.cat([joints, extra_joints], dim=1) 76 | 77 | return joints 78 | -------------------------------------------------------------------------------- /scripts/download_official_dexycb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set target directories 4 | data_dir="data/DexYCB/data" 5 | splits_dir="data/DexYCB/splits" 6 | 7 | # Create directories 8 | mkdir -p "$data_dir" 9 | mkdir -p "$splits_dir" 10 | 11 | # Download, extract, and remove archive 12 | gdown https://drive.google.com/uc?id=1Ehh92wDE3CWAiKG7E9E73HjN2Xk2XfEk -O "$data_dir/20200709-subject-01.tar.gz" 13 | tar -xzf "$data_dir/20200709-subject-01.tar.gz" -C "$data_dir" && rm "$data_dir/20200709-subject-01.tar.gz" 14 | 15 | gdown https://drive.google.com/uc?id=1Uo7MLqTbXEa-8s7YQZ3duugJ1nXFEo62 -O "$data_dir/20200813-subject-02.tar.gz" 16 | tar -xzf "$data_dir/20200813-subject-02.tar.gz" -C "$data_dir" && rm "$data_dir/20200813-subject-02.tar.gz" 17 | 18 | gdown https://drive.google.com/uc?id=1FkUxas8sv8UcVGgAzmSZlJw1eI5W5CXq -O "$data_dir/20200820-subject-03.tar.gz" 19 | tar -xzf "$data_dir/20200820-subject-03.tar.gz" -C "$data_dir" && rm "$data_dir/20200820-subject-03.tar.gz" 20 | 21 | gdown https://drive.google.com/uc?id=14up6qsTpvgEyqOQ5hir-QbjMB_dHfdpA -O "$data_dir/20200903-subject-04.tar.gz" 22 | tar -xzf "$data_dir/20200903-subject-04.tar.gz" -C "$data_dir" && rm "$data_dir/20200903-subject-04.tar.gz" 23 | 24 | gdown https://drive.google.com/uc?id=1NBA_FPyGWOQF5-X9ueAat5g8lDMz-EmS -O "$data_dir/20200908-subject-05.tar.gz" 25 | tar -xzf "$data_dir/20200908-subject-05.tar.gz" -C "$data_dir" && rm "$data_dir/20200908-subject-05.tar.gz" 26 | 27 | gdown https://drive.google.com/uc?id=1UWIN2-wOBZX2T0dkAi4ctAAW8KffkXMQ -O "$data_dir/20200918-subject-06.tar.gz" 28 | tar -xzf "$data_dir/20200918-subject-06.tar.gz" -C "$data_dir" && rm "$data_dir/20200918-subject-06.tar.gz" 29 | 30 | gdown https://drive.google.com/uc?id=1oWEYD_o3PVh39pLzMlJcArkDtMj4nzI0 -O "$data_dir/20200928-subject-07.tar.gz" 31 | tar -xzf "$data_dir/20200928-subject-07.tar.gz" -C "$data_dir" && rm "$data_dir/20200928-subject-07.tar.gz" 32 | 33 | gdown https://drive.google.com/uc?id=1GTNZwhWbs7Mfez0krTgXwLPndvrw1Ztv -O "$data_dir/20201002-subject-08.tar.gz" 34 | tar -xzf "$data_dir/20201002-subject-08.tar.gz" -C "$data_dir" && rm "$data_dir/20201002-subject-08.tar.gz" 35 | 36 | gdown https://drive.google.com/uc?id=1j0BLkaCjIuwjakmywKdOO9vynHTWR0UH -O "$data_dir/20201015-subject-09.tar.gz" 37 | tar -xzf "$data_dir/20201015-subject-09.tar.gz" -C "$data_dir" && rm "$data_dir/20201015-subject-09.tar.gz" 38 | 39 | gdown https://drive.google.com/uc?id=1FvFlRfX-p5a5sAWoKEGc17zKJWwKaSB- -O "$data_dir/20201022-subject-10.tar.gz" 40 | tar -xzf "$data_dir/20201022-subject-10.tar.gz" -C "$data_dir" && rm "$data_dir/20201022-subject-10.tar.gz" 41 | 42 | # Download split JSON files from GitHub (gSDF) 43 | echo "Downloading train/test split files to $splits_dir ..." 44 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/dexycb/splits/train_s0_29k.json -O "$splits_dir/train_s0_29k.json" 45 | wget -c https://raw.githubusercontent.com/zerchen/gSDF/05101b5bde6765e9168026cff853b74a1412c125/datasets/dexycb/splits/test_s0_5k.json -O "$splits_dir/test_s0_5k.json" 46 | 47 | echo "All files downloaded, extracted, and archives removed." -------------------------------------------------------------------------------- /data/DexYCB/toolkit/layers/ycb_group_layer.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Wrapper layer to hold a group of YCBLayers.""" 6 | 7 | import torch 8 | 9 | from torch.nn import Module, ModuleList 10 | 11 | from .ycb_layer import YCBLayer 12 | 13 | 14 | class YCBGroupLayer(Module): 15 | """Wrapper layer to hold a group of YCBLayers.""" 16 | 17 | def __init__(self, ids): 18 | """Constructor. 19 | 20 | Args: 21 | ids: A list of YCB object ids. 22 | """ 23 | super(YCBGroupLayer, self).__init__() 24 | 25 | self._ids = ids 26 | self._layers = ModuleList([YCBLayer(i) for i in self._ids]) 27 | self._num_obj = len(self._ids) 28 | 29 | f = [] 30 | offset = 0 31 | for i in range(self._num_obj): 32 | if i > 0: 33 | offset += self._layers[i - 1].v.size(1) 34 | f.append(self._layers[i].f + offset) 35 | f = torch.cat(f) 36 | self.register_buffer('f', f) 37 | 38 | @property 39 | def num_obj(self): 40 | return self._num_obj 41 | 42 | @property 43 | def obj_file(self): 44 | return [l.obj_file for l in self._layers] 45 | 46 | @property 47 | def count(self): 48 | return [l.f.numel() for l in self._layers] 49 | 50 | @property 51 | def material(self): 52 | return [l.material for l in self._layers] 53 | 54 | @property 55 | def tex_coords(self): 56 | return [l.tex_coords for l in self._layers] 57 | 58 | def forward(self, p, inds=None): 59 | """Forward function. 60 | 61 | Args: 62 | p: A tensor of shape [B, D] containing the pose vectors. 63 | inds: A list of sub-layer indices. 64 | 65 | Returns: 66 | v: A tensor of shape [B, N, 3] containing the transformed vertices. 67 | n: A tensor of shape [B, N, 3] containing the transformed normals. 68 | """ 69 | if inds is None: 70 | inds = range(self._num_obj) 71 | v = [ 72 | torch.zeros((p.size(0), 0, 3), 73 | dtype=torch.float32, 74 | device=self.f.device) 75 | ] 76 | n = [ 77 | torch.zeros((p.size(0), 0, 3), 78 | dtype=torch.float32, 79 | device=self.f.device) 80 | ] 81 | r, t = self._pose2rt(p) 82 | for i in inds: 83 | y = self._layers[i](r[:, i], t[:, i]) 84 | v.append(y[0]) 85 | n.append(y[1]) 86 | v = torch.cat(v, dim=1) 87 | n = torch.cat(n, dim=1) 88 | return v, n 89 | 90 | def _pose2rt(self, pose): 91 | """Extracts rotations and translations from pose vectors. 92 | 93 | Args: 94 | pose: A tensor of shape [B, D] containing the pose vectors. 95 | 96 | Returns: 97 | r: A tensor of shape [B, O, 3] containing the rotation vectors. 98 | t: A tensor of shape [B, O, 3] containing the translations. 99 | """ 100 | r = torch.stack( 101 | [pose[:, 6 * i + 0:6 * i + 3] for i in range(self._num_obj)], dim=1) 102 | t = torch.stack( 103 | [pose[:, 6 * i + 3:6 * i + 6] for i in range(self._num_obj)], dim=1) 104 | return r, t 105 | -------------------------------------------------------------------------------- /scripts/download_official_rich.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # URL encode function 4 | urle () { [[ "${1}" ]] || return 1; local LANG=C i x; for (( i = 0; i < ${#1}; i++ )); do x="${1:i:1}"; [[ "${x}" == [a-zA-Z0-9.~-] ]] && echo -n "${x}" || printf '%%%02X' "'${x}"; done; echo; } 5 | 6 | # Prompt for username and password 7 | echo -e "\nYou need to register at https://rich.is.tue.mpg.de/" 8 | read -p "Username: " username 9 | read -p "Password: " password 10 | 11 | # Encode credentials 12 | username=$(urle "$username") 13 | password=$(urle "$password") 14 | 15 | # Set save directory 16 | save_dir="data/RICH/data" 17 | mkdir -p "$save_dir" 18 | 19 | # ----------- Download Human-Scene Contact ----------- 20 | echo "Downloading train_hsc.zip..." 21 | wget --post-data "username=$username&password=$password" \ 22 | 'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=train_hsc.zip' \ 23 | -O "$save_dir/train_hsc.zip" \ 24 | --no-check-certificate --continue 25 | 26 | echo "Downloading test_hsc.zip..." 27 | wget --post-data "username=$username&password=$password" \ 28 | 'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=test_hsc.zip' \ 29 | -O "$save_dir/test_hsc.zip" \ 30 | --no-check-certificate --continue 31 | 32 | # ----------- Download JPG Image Archives ----------- 33 | echo "Downloading JPG_images/train.tar.gz..." 34 | wget --post-data "username=$username&password=$password" \ 35 | 'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=JPG_images/train.tar.gz' \ 36 | -O "$save_dir/train.tar.gz" \ 37 | --no-check-certificate --continue 38 | 39 | echo "Downloading JPG_images/test.tar.gz..." 40 | wget --post-data "username=$username&password=$password" \ 41 | 'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=JPG_images/test.tar.gz' \ 42 | -O "$save_dir/test.tar.gz" \ 43 | --no-check-certificate --continue 44 | 45 | # ----------- Download Scan Calibration ----------- 46 | echo "Downloading scan_calibration.zip..." 47 | wget --post-data "username=$username&password=$password" \ 48 | 'https://download.is.tue.mpg.de/download.php?domain=rich&resume=1&sfile=scan_calibration.zip' \ 49 | -O "$save_dir/scan_calibration.zip" \ 50 | --no-check-certificate --continue 51 | 52 | # ----------- Download Multicam2World Info (no auth needed) ----------- 53 | echo "Downloading multicam2world.zip..." 54 | wget 'https://rich.is.tue.mpg.de/media/upload/multicam2world.zip' \ 55 | -O "$save_dir/multicam2world.zip" \ 56 | --continue 57 | 58 | # ----------- Unzip / Untar ----------- 59 | echo "Extracting scan_calibration.zip..." 60 | unzip "$save_dir/scan_calibration.zip" -d "$save_dir" 61 | 62 | echo "Extracting multicam2world.zip..." 63 | unzip "$save_dir/multicam2world.zip" -d "$save_dir" 64 | 65 | echo "Extracting train_hsc.zip..." 66 | mkdir -p "$save_dir/hsc" 67 | unzip "$save_dir/train_hsc.zip" -d "$save_dir" 68 | mv "$save_dir/train_hsc" "$save_dir/hsc/train" 69 | 70 | echo "Extracting test_hsc.zip..." 71 | unzip "$save_dir/test_hsc.zip" -d "$save_dir/hsc" 72 | 73 | echo "Extracting train.tar.gz..." 74 | tar -xzf "$save_dir/train.tar.gz" -C "$image_dir" 75 | 76 | echo "Extracting test.tar.gz..." 77 | tar -xzf "$save_dir/test.tar.gz" -C "$image_dir" 78 | 79 | echo "All RICH files downloaded and extracted to $save_dir" -------------------------------------------------------------------------------- /lib/utils/func_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import numpy as np 4 | 5 | 6 | def load_img(path, order='RGB'): 7 | img = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 8 | if not isinstance(img, np.ndarray): 9 | raise IOError("Fail to read %s" % path) 10 | 11 | if order=='RGB': img = img[:,:,::-1] 12 | img = img.astype(np.float32) 13 | return img 14 | 15 | 16 | def get_bbox(joint_img, joint_valid, expansion_factor=1.0): 17 | x_img, y_img = joint_img[:,0], joint_img[:,1] 18 | x_img = x_img[joint_valid==1]; y_img = y_img[joint_valid==1]; 19 | xmin = min(x_img); ymin = min(y_img); xmax = max(x_img); ymax = max(y_img); 20 | 21 | x_center = (xmin+xmax)/2.; width = (xmax-xmin)*expansion_factor; 22 | xmin = x_center - 0.5*width 23 | xmax = x_center + 0.5*width 24 | 25 | y_center = (ymin+ymax)/2.; height = (ymax-ymin)*expansion_factor; 26 | ymin = y_center - 0.5*height 27 | ymax = y_center + 0.5*height 28 | 29 | bbox = np.array([xmin, ymin, xmax - xmin, ymax - ymin]).astype(np.float32) 30 | return bbox 31 | 32 | 33 | def process_bbox(bbox, target_shape, original_img_shape): 34 | 35 | # aspect ratio preserving bbox 36 | w = bbox[2] 37 | h = bbox[3] 38 | c_x = bbox[0] + w/2. 39 | c_y = bbox[1] + h/2. 40 | aspect_ratio = target_shape[1]/target_shape[0] 41 | if w > aspect_ratio * h: 42 | h = w / aspect_ratio 43 | elif w < aspect_ratio * h: 44 | w = h * aspect_ratio 45 | bbox[2] = w*1.25 46 | bbox[3] = h*1.25 47 | bbox[0] = c_x - bbox[2]/2. 48 | bbox[1] = c_y - bbox[3]/2. 49 | 50 | return bbox 51 | 52 | 53 | def pca_to_axis_angle(pca_pose): 54 | """ 55 | Converts the PCA pose representation from ManoLayer (use_pca=True) 56 | to full axis-angle pose (use_pca=False). 57 | 58 | Args: 59 | - pca_pose: The PCA components (batch_size x num_pca_comps). 60 | 61 | Returns: 62 | - full_pose: The full 48D axis-angle pose (batch_size x 48). 63 | """ 64 | # Ensure pca_pose is a torch tensor 65 | if isinstance(pca_pose, np.ndarray): 66 | pca_pose = torch.tensor(pca_pose, dtype=torch.float32) 67 | 68 | global_rotation, hand_pose = pca_pose[:, :3], pca_pose[:, 3:] # This should be a placeholder, adjust as needed. 69 | 70 | # Multiply the PCA components by the PCA basis to get the hand pose (45D) 71 | mano_th_selected_comps = get_mano_pca_basis(ncomps=45, use_pca=True, side='right', mano_root='data/base_data/human_models/mano') 72 | hand_pose = torch.mm(hand_pose, mano_th_selected_comps) 73 | 74 | # Add the mean hand pose to the result (broadcasting over the batch dimension) 75 | full_hand_pose = hand_pose 76 | 77 | # Concatenate the global rotation with the full hand pose 78 | full_pose = torch.cat([global_rotation, full_hand_pose], dim=1) # Shape: (batch_size, 48) 79 | 80 | return full_pose 81 | 82 | 83 | import re 84 | def atoi(text): 85 | return int(text) if text.isdigit() else text 86 | def natural_keys(text): 87 | return [atoi(c) for c in re.split(r'(\d+)', text)] 88 | 89 | 90 | # Load config 91 | import yaml 92 | def load_config(cfg_path): 93 | with open(cfg_path, 'r') as f: 94 | cfg = yaml.safe_load(f) 95 | return cfg -------------------------------------------------------------------------------- /lib/utils/smplx/tools/merge_smplh_mano.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import print_function 19 | 20 | import os 21 | import os.path as osp 22 | import pickle 23 | 24 | import argparse 25 | 26 | import numpy as np 27 | 28 | 29 | def merge_models(smplh_fn, mano_left_fn, mano_right_fn, 30 | output_folder='output'): 31 | 32 | with open(smplh_fn, 'rb') as body_file: 33 | body_data = pickle.load(body_file) 34 | 35 | with open(mano_left_fn, 'rb') as lhand_file: 36 | lhand_data = pickle.load(lhand_file) 37 | 38 | with open(mano_right_fn, 'rb') as rhand_file: 39 | rhand_data = pickle.load(rhand_file) 40 | 41 | out_fn = osp.split(smplh_fn)[1] 42 | 43 | output_data = body_data.copy() 44 | output_data['hands_componentsl'] = lhand_data['hands_components'] 45 | output_data['hands_componentsr'] = rhand_data['hands_components'] 46 | 47 | output_data['hands_coeffsl'] = lhand_data['hands_coeffs'] 48 | output_data['hands_coeffsr'] = rhand_data['hands_coeffs'] 49 | 50 | output_data['hands_meanl'] = lhand_data['hands_mean'] 51 | output_data['hands_meanr'] = rhand_data['hands_mean'] 52 | 53 | for key, data in output_data.iteritems(): 54 | if 'chumpy' in str(type(data)): 55 | output_data[key] = np.array(data) 56 | else: 57 | output_data[key] = data 58 | 59 | out_path = osp.join(output_folder, out_fn) 60 | print(out_path) 61 | print('Saving to {}'.format(out_path)) 62 | with open(out_path, 'wb') as output_file: 63 | pickle.dump(output_data, output_file) 64 | 65 | 66 | if __name__ == '__main__': 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument('--smplh-fn', dest='smplh_fn', required=True, 69 | type=str, help='The path to the SMPLH model') 70 | parser.add_argument('--mano-left-fn', dest='mano_left_fn', required=True, 71 | type=str, help='The path to the left hand MANO model') 72 | parser.add_argument('--mano-right-fn', dest='mano_right_fn', required=True, 73 | type=str, help='The path to the right hand MANO model') 74 | parser.add_argument('--output-folder', dest='output_folder', 75 | required=True, type=str, 76 | help='The path to the output folder') 77 | 78 | args = parser.parse_args() 79 | 80 | smplh_fn = args.smplh_fn 81 | mano_left_fn = args.mano_left_fn 82 | mano_right_fn = args.mano_right_fn 83 | output_folder = args.output_folder 84 | 85 | if not osp.exists(output_folder): 86 | print('Creating directory: {}'.format(output_folder)) 87 | os.makedirs(output_folder) 88 | 89 | merge_models(smplh_fn, mano_left_fn, mano_right_fn, output_folder) 90 | -------------------------------------------------------------------------------- /lib/utils/smplx/examples/vis_flame_vertices.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | import os.path as osp 18 | import argparse 19 | import pickle 20 | 21 | import numpy as np 22 | import torch 23 | import open3d as o3d 24 | 25 | import smplx 26 | 27 | 28 | def main(model_folder, corr_fname, ext='npz', 29 | head_color=(0.3, 0.3, 0.6), 30 | gender='neutral'): 31 | 32 | head_idxs = np.load(corr_fname) 33 | 34 | model = smplx.create(model_folder, model_type='smplx', 35 | gender=gender, 36 | ext=ext) 37 | betas = torch.zeros([1, 10], dtype=torch.float32) 38 | expression = torch.zeros([1, 10], dtype=torch.float32) 39 | 40 | output = model(betas=betas, expression=expression, 41 | return_verts=True) 42 | vertices = output.vertices.detach().cpu().numpy().squeeze() 43 | joints = output.joints.detach().cpu().numpy().squeeze() 44 | 45 | print('Vertices shape =', vertices.shape) 46 | print('Joints shape =', joints.shape) 47 | 48 | mesh = o3d.geometry.TriangleMesh() 49 | mesh.vertices = o3d.utility.Vector3dVector(vertices) 50 | mesh.triangles = o3d.utility.Vector3iVector(model.faces) 51 | mesh.compute_vertex_normals() 52 | 53 | colors = np.ones_like(vertices) * [0.3, 0.3, 0.3] 54 | colors[head_idxs] = head_color 55 | 56 | mesh.vertex_colors = o3d.utility.Vector3dVector(colors) 57 | 58 | o3d.visualization.draw_geometries([mesh]) 59 | 60 | 61 | if __name__ == '__main__': 62 | parser = argparse.ArgumentParser(description='SMPL-X Demo') 63 | 64 | parser.add_argument('--model-folder', required=True, type=str, 65 | help='The path to the model folder') 66 | parser.add_argument('--corr-fname', required=True, type=str, 67 | dest='corr_fname', 68 | help='Filename with the head correspondences') 69 | parser.add_argument('--gender', type=str, default='neutral', 70 | help='The gender of the model') 71 | parser.add_argument('--ext', type=str, default='npz', 72 | help='Which extension to use for loading') 73 | parser.add_argument('--head', default='right', 74 | choices=['right', 'left'], 75 | type=str, help='Which head to plot') 76 | parser.add_argument('--head-color', type=float, nargs=3, dest='head_color', 77 | default=(0.3, 0.3, 0.6), 78 | help='Color for the head vertices') 79 | 80 | args = parser.parse_args() 81 | 82 | model_folder = osp.expanduser(osp.expandvars(args.model_folder)) 83 | corr_fname = args.corr_fname 84 | gender = args.gender 85 | ext = args.ext 86 | head = args.head 87 | head_color = args.head_color 88 | 89 | main(model_folder, corr_fname, ext=ext, 90 | head_color=head_color, 91 | gender=gender 92 | ) 93 | -------------------------------------------------------------------------------- /lib/utils/human_models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import os.path as osp 4 | import pickle 5 | 6 | from lib.core.config import cfg 7 | from lib.utils.transforms import transform_joint_to_other_db 8 | from lib.utils.smplx import smplx 9 | 10 | 11 | 12 | class MANO(object): 13 | def __init__(self): 14 | self.layer_arg = {'create_global_orient': False, 'create_hand_pose': False, 'create_betas': False, 'create_transl': False} 15 | self.layer = {'right': smplx.create(cfg.MODEL.human_model_path, 'mano', is_rhand=True, use_pca=False, flat_hand_mean=False, **self.layer_arg), 'left': smplx.create(cfg.MODEL.human_model_path, 'mano', is_rhand=False, use_pca=False, flat_hand_mean=False, **self.layer_arg)} 16 | self.vertex_num = 778 17 | self.face = {'right': self.layer['right'].faces, 'left': self.layer['left'].faces} 18 | self.add_watertight_face = {'right': np.array([[92,38,122], [234,92,122], [239,234,122], [279,239,122], [215,279,122], [215,122,118], [215,118,117], [215,117,119], [215,119,120], [215,120,108], [215,108,79], [215,79,78], [215,78,121], [214,215,121]])} 19 | self.watertight_face = {'right': np.concatenate((self.layer['right'].faces, self.add_watertight_face['right']), axis=0)} 20 | self.shape_param_dim = 10 21 | 22 | if torch.sum(torch.abs(self.layer['left'].shapedirs[:,0,:] - self.layer['right'].shapedirs[:,0,:])) < 1: 23 | print('Fix shapedirs bug of MANO') 24 | self.layer['left'].shapedirs[:,0,:] *= -1 25 | 26 | # original MANO joint set 27 | self.orig_joint_num = 16 28 | self.orig_joints_name = ('Wrist', 'Index_1', 'Index_2', 'Index_3', 'Middle_1', 'Middle_2', 'Middle_3', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Ring_1', 'Ring_2', 'Ring_3', 'Thumb_1', 'Thumb_2', 'Thumb_3') 29 | self.orig_root_joint_idx = self.orig_joints_name.index('Wrist') 30 | self.orig_flip_pairs = () 31 | self.orig_joint_regressor = self.layer['right'].J_regressor.numpy() # same for the right and left hands 32 | 33 | # changed MANO joint set 34 | self.joint_num = 21 # manually added fingertips 35 | self.joints_name = ('Wrist', 'Thumb_1', 'Thumb_2', 'Thumb_3', 'Thumb_4', 'Index_1', 'Index_2', 'Index_3', 'Index_4', 'Middle_1', 'Middle_2', 'Middle_3', 'Middle_4', 'Ring_1', 'Ring_2', 'Ring_3', 'Ring_4', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Pinky_4') 36 | self.skeleton = ( (0,1), (0,5), (0,9), (0,13), (0,17), (1,2), (2,3), (3,4), (5,6), (6,7), (7,8), (9,10), (10,11), (11,12), (13,14), (14,15), (15,16), (17,18), (18,19), (19,20) ) 37 | self.root_joint_idx = self.joints_name.index('Wrist') 38 | self.flip_pairs = () 39 | # add fingertips to joint_regressor 40 | self.joint_regressor = transform_joint_to_other_db(self.orig_joint_regressor, self.orig_joints_name, self.joints_name) 41 | self.joint_regressor[self.joints_name.index('Thumb_4')] = np.array([1 if i == 745 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1) 42 | self.joint_regressor[self.joints_name.index('Index_4')] = np.array([1 if i == 317 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1) 43 | self.joint_regressor[self.joints_name.index('Middle_4')] = np.array([1 if i == 445 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1) 44 | self.joint_regressor[self.joints_name.index('Ring_4')] = np.array([1 if i == 556 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1) 45 | self.joint_regressor[self.joints_name.index('Pinky_4')] = np.array([1 if i == 673 else 0 for i in range(self.joint_regressor.shape[1])], dtype=np.float32).reshape(1,-1) 46 | 47 | 48 | 49 | mano = MANO() -------------------------------------------------------------------------------- /data/MOW/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import json 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from torchvision.transforms import Normalize 9 | 10 | from lib.core.config import cfg 11 | from lib.utils.human_models import mano 12 | from lib.utils.func_utils import load_img 13 | from lib.utils.preprocessing import augmentation_contact, mask2bbox 14 | 15 | 16 | 17 | # Main dataloader code for MOW dataset 18 | class MOW(Dataset): 19 | def __init__(self, transform, data_split): 20 | super(MOW, self).__init__() 21 | self.__dict__.update(locals()) 22 | 23 | self.transfrom = transform 24 | dataset_name = 'mow' 25 | 26 | self.data_split = data_split 27 | self.root_path = root_path = 'data/MOW' 28 | 29 | self.data_dir = os.path.join(self.root_path, 'data') 30 | self.split_dir = os.path.join(self.root_path, 'splits') 31 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 32 | 33 | with open(os.path.join(self.data_dir, 'poses.json')) as f: 34 | self.db = {a['image_id']: a for a in json.load(f)} 35 | 36 | # Organize db id based on split 37 | db_split_path = os.path.join(self.split_dir, f'{self.data_split}.json') 38 | with open(db_split_path, 'r') as f: 39 | self.split = json.load(f) 40 | self.length = len(self.split) 41 | 42 | 43 | def __len__(self): 44 | return self.length 45 | 46 | 47 | def __getitem__(self, index): 48 | sample_id = self.split[index] 49 | ann = self.db[sample_id] 50 | image_id = ann['image_id'] 51 | 52 | img_path = os.path.join(self.data_dir, 'images', f'{image_id}.jpg') 53 | orig_img = load_img(img_path) 54 | 55 | mano_valid = np.ones((1), dtype=np.float32) 56 | 57 | 58 | ################################## LOAD ANNOTATION DATA ##################################### 59 | mask_ho_path = os.path.join(self.data_dir, 'masks/both', f'{image_id}.jpg') 60 | mask_ho = (cv2.imread(mask_ho_path) > 128)[:, :, 0] 61 | bbox_ho = mask2bbox(mask_ho, expansion_factor=cfg.DATASET.ho_bbox_expand_ratio) 62 | 63 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 64 | contact_data = dict(contact_h=contact_h) 65 | ################################## LOAD ANNOTATION DATA ##################################### 66 | 67 | 68 | ############################### PROCESS CROP AND AUGMENTATION ############################### 69 | # Crop image 70 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_ho, self.data_split, enforce_flip=False) 71 | crop_img = img.copy() 72 | 73 | # Transform for 3D HMR 74 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 75 | img = self.transform(img.astype(np.float32)/255.0) 76 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 77 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 78 | img = img.transpose(2, 0, 1) / 255.0 79 | img = normalize_img(torch.from_numpy(img)).float() 80 | else: 81 | raise NotImplementedError 82 | ############################### PROCESS CROP AND AUGMENTATION ############################### 83 | 84 | 85 | input_data = dict(image=img) 86 | targets_data = dict(contact_data=contact_data) 87 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 88 | 89 | 90 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /docs/data_eval.md: -------------------------------------------------------------------------------- 1 | ## Data 2 | You need to follow directory structure of the `data` as below. 3 | ``` 4 | ${ROOT} 5 | |-- data 6 | | |-- base_data 7 | | |-- MOW 8 | | | |-- data 9 | | | |-- preprocessed_data 10 | | | |-- splits 11 | | | |-- dataset.py 12 | | |-- HIC 13 | | | |-- data 14 | | | |-- preprocessed_data 15 | | | |-- splits 16 | | | |-- dataset.py 17 | | |-- RICH 18 | | | |-- data 19 | | | |-- preprocessed_data 20 | | | |-- splits 21 | | | |-- dataset.py 22 | | |-- Hi4D 23 | | | |-- data 24 | | | |-- preprocessed_data 25 | | | |-- splits 26 | | | |-- dataset.py 27 | ``` 28 | * Download `base_data` from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data/resolve/main/train/data/base_data.tar.gz) by running: 29 | ``` 30 | bash scripts/download_train_base_data.sh 31 | ``` 32 | #### preprocessed_data 33 | * Download `preprocessed_data` from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data) by running: 34 | ``` 35 | bash scripts/download_train_preprocessed_data.sh 36 | ``` 37 | #### MOW dataset 38 | ``` 39 | ${ROOT} 40 | |-- data 41 | | |-- MOW 42 | | | |-- data 43 | | | | |-- images 44 | | | | |-- masks 45 | | | | |-- poses.json 46 | | | |-- preprocessed_data 47 | | | | |-- test 48 | | | | | |-- contact_data 49 | | | |-- splits 50 | | | | |-- test.json 51 | | | |-- dataset.py 52 | ``` 53 | * Download `images`, `poses.json` by running: 54 | ``` 55 | bash scripts/download_official_mow.sh 56 | ``` 57 | * `masks`, `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data). 58 | #### HIC dataset 59 | ``` 60 | ${ROOT} 61 | |-- data 62 | | |-- HIC 63 | | | |-- data 64 | | | | |-- 01 65 | | | | |-- ... 66 | | | | |-- 12 67 | | | | |-- IJCV16___Results_MANO___parms_for___joints21 68 | | | | |-- HIC.json 69 | | | |-- preprocessed_data 70 | | | | |-- test 71 | | | | | |-- annot_data 72 | | | | | |-- contact_data 73 | | | |-- splits 74 | | | | |-- test.json 75 | | | |-- dataset.py 76 | ``` 77 | * Download `data` by running: 78 | ``` 79 | bash scripts/download_official_hic.sh 80 | ``` 81 | * `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data). 82 | #### RICH dataset 83 | ``` 84 | ${ROOT} 85 | |-- data 86 | | |-- RICH 87 | | | |-- data 88 | | | | |-- hsc 89 | | | | |-- images_jpg_subset 90 | | | | |-- multicam2world 91 | | | | |-- scan_calibration 92 | | | |-- preprocessed_data 93 | | | | |-- test 94 | | | | | |-- annot_data 95 | | | | | |-- contact_data 96 | | | |-- splits 97 | | | | |-- test.json 98 | | | |-- dataset.py 99 | ``` 100 | * Download `data` by running: 101 | ``` 102 | bash scripts/download_official_rich.sh 103 | ``` 104 | * `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data). 105 | #### Hi4D dataset 106 | ``` 107 | ${ROOT} 108 | |-- data 109 | | |-- Hi4D 110 | | | |-- data 111 | | | | |-- pair00 112 | | | | |-- ... 113 | | | | |-- pair37 114 | | | |-- preprocessed_data 115 | | | | |-- test 116 | | | | | |-- annot_data 117 | | | | | |-- contact_data 118 | | | |-- splits 119 | | | | |-- test.json 120 | | | | |-- test_pid.json 121 | | | |-- dataset.py 122 | ``` 123 | * Download `data` by running after download request from [official website](https://yifeiyin04.github.io/Hi4D): 124 | ``` 125 | bash scripts/download_official_hi4d.sh 126 | bash scripts/extract_official_hi4d.sh 127 | ``` 128 | * `preprocessed_data`, `splits` are already downloaded from [HuggingFace](https://huggingface.co/datasets/dqj5182/haco-data). -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # custom 12 | # data/*/*.json 13 | # data/*/*/*.json 14 | # data/*/images 15 | # data/*/data 16 | # data/*/sequences 17 | # data/*/annotation 18 | # data/*/annotations 19 | # data/base_data 20 | # data/preprocessed_data 21 | # experiment 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # pyenv 90 | .python-version 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | ### macOS template 120 | # General 121 | .DS_Store 122 | .AppleDouble 123 | .LSOverride 124 | 125 | # Icon must end with two \r 126 | Icon 127 | 128 | # Thumbnails 129 | ._* 130 | 131 | # Files that might appear in the root of a volume 132 | .DocumentRevisions-V100 133 | .fseventsd 134 | .Spotlight-V100 135 | .TemporaryItems 136 | .Trashes 137 | .VolumeIcon.icns 138 | .com.apple.timemachine.donotpresent 139 | 140 | # Directories potentially created on remote AFP share 141 | .AppleDB 142 | .AppleDesktop 143 | Network Trash Folder 144 | Temporary Items 145 | .apdisk 146 | ### JetBrains template 147 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 148 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 149 | 150 | # User-specific stuff 151 | .idea/**/workspace.xml 152 | .idea/**/tasks.xml 153 | .idea/**/dictionaries 154 | .idea/**/shelf 155 | 156 | # Sensitive or high-churn files 157 | .idea/**/dataSources/ 158 | .idea/**/dataSources.ids 159 | .idea/**/dataSources.local.xml 160 | .idea/**/sqlDataSources.xml 161 | .idea/**/dynamic.xml 162 | .idea/**/uiDesigner.xml 163 | .idea/**/dbnavigator.xml 164 | 165 | # Gradle 166 | .idea/**/gradle.xml 167 | .idea/**/libraries 168 | 169 | # CMake 170 | cmake-build-debug/ 171 | cmake-build-release/ 172 | 173 | # Mongo Explorer plugin 174 | .idea/**/mongoSettings.xml 175 | 176 | # File-based project format 177 | *.iws 178 | 179 | # IntelliJ 180 | out/ 181 | 182 | # mpeltonen/sbt-idea plugin 183 | .idea_modules/ 184 | 185 | # JIRA plugin 186 | atlassian-ide-plugin.xml 187 | 188 | # Cursive Clojure plugin 189 | .idea/replstate.xml 190 | 191 | # Crashlytics plugin (for Android Studio and IntelliJ) 192 | com_crashlytics_export_strings.xml 193 | crashlytics.properties 194 | crashlytics-build.properties 195 | fabric.properties 196 | 197 | # Editor-based Rest Client 198 | .idea/httpRequests -------------------------------------------------------------------------------- /lib/utils/smplx/examples/vis_mano_vertices.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | import os.path as osp 18 | import argparse 19 | import pickle 20 | 21 | import numpy as np 22 | import torch 23 | import open3d as o3d 24 | 25 | import smplx 26 | 27 | 28 | def main(model_folder, corr_fname, ext='npz', 29 | hand_color=(0.3, 0.3, 0.6), 30 | gender='neutral', hand='right'): 31 | 32 | with open(corr_fname, 'rb') as f: 33 | idxs_data = pickle.load(f) 34 | if hand == 'both': 35 | hand_idxs = np.concatenate( 36 | [idxs_data['left_hand'], idxs_data['right_hand']] 37 | ) 38 | else: 39 | hand_idxs = idxs_data[f'{hand}_hand'] 40 | 41 | model = smplx.create(model_folder, model_type='smplx', 42 | gender=gender, 43 | ext=ext) 44 | betas = torch.zeros([1, 10], dtype=torch.float32) 45 | expression = torch.zeros([1, 10], dtype=torch.float32) 46 | 47 | output = model(betas=betas, expression=expression, 48 | return_verts=True) 49 | vertices = output.vertices.detach().cpu().numpy().squeeze() 50 | joints = output.joints.detach().cpu().numpy().squeeze() 51 | 52 | print('Vertices shape =', vertices.shape) 53 | print('Joints shape =', joints.shape) 54 | 55 | mesh = o3d.geometry.TriangleMesh() 56 | mesh.vertices = o3d.utility.Vector3dVector(vertices) 57 | mesh.triangles = o3d.utility.Vector3iVector(model.faces) 58 | mesh.compute_vertex_normals() 59 | 60 | colors = np.ones_like(vertices) * [0.3, 0.3, 0.3] 61 | colors[hand_idxs] = hand_color 62 | 63 | mesh.vertex_colors = o3d.utility.Vector3dVector(colors) 64 | 65 | o3d.visualization.draw_geometries([mesh]) 66 | 67 | 68 | if __name__ == '__main__': 69 | parser = argparse.ArgumentParser(description='SMPL-X Demo') 70 | 71 | parser.add_argument('--model-folder', required=True, type=str, 72 | help='The path to the model folder') 73 | parser.add_argument('--corr-fname', required=True, type=str, 74 | dest='corr_fname', 75 | help='Filename with the hand correspondences') 76 | parser.add_argument('--gender', type=str, default='neutral', 77 | help='The gender of the model') 78 | parser.add_argument('--ext', type=str, default='npz', 79 | help='Which extension to use for loading') 80 | parser.add_argument('--hand', default='right', 81 | choices=['right', 'left', 'both'], 82 | type=str, help='Which hand to plot') 83 | parser.add_argument('--hand-color', type=float, nargs=3, dest='hand_color', 84 | default=(0.3, 0.3, 0.6), 85 | help='Color for the hand vertices') 86 | 87 | args = parser.parse_args() 88 | 89 | model_folder = osp.expanduser(osp.expandvars(args.model_folder)) 90 | corr_fname = args.corr_fname 91 | gender = args.gender 92 | ext = args.ext 93 | hand = args.hand 94 | hand_color = args.hand_color 95 | 96 | main(model_folder, corr_fname, ext=ext, 97 | hand_color=hand_color, 98 | gender=gender, hand=hand 99 | ) 100 | -------------------------------------------------------------------------------- /lib/utils/smplx/smplx/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import NewType, Union, Optional 18 | from dataclasses import dataclass, asdict, fields 19 | import numpy as np 20 | import torch 21 | 22 | Tensor = NewType('Tensor', torch.Tensor) 23 | Array = NewType('Array', np.ndarray) 24 | 25 | 26 | @dataclass 27 | class ModelOutput: 28 | vertices: Optional[Tensor] = None 29 | joints: Optional[Tensor] = None 30 | full_pose: Optional[Tensor] = None 31 | global_orient: Optional[Tensor] = None 32 | transl: Optional[Tensor] = None 33 | 34 | def __getitem__(self, key): 35 | return getattr(self, key) 36 | 37 | def get(self, key, default=None): 38 | return getattr(self, key, default) 39 | 40 | def __iter__(self): 41 | return self.keys() 42 | 43 | def keys(self): 44 | keys = [t.name for t in fields(self)] 45 | return iter(keys) 46 | 47 | def values(self): 48 | values = [getattr(self, t.name) for t in fields(self)] 49 | return iter(values) 50 | 51 | def items(self): 52 | data = [(t.name, getattr(self, t.name)) for t in fields(self)] 53 | return iter(data) 54 | 55 | 56 | @dataclass 57 | class SMPLOutput(ModelOutput): 58 | betas: Optional[Tensor] = None 59 | body_pose: Optional[Tensor] = None 60 | 61 | 62 | @dataclass 63 | class SMPLHOutput(SMPLOutput): 64 | left_hand_pose: Optional[Tensor] = None 65 | right_hand_pose: Optional[Tensor] = None 66 | transl: Optional[Tensor] = None 67 | 68 | 69 | @dataclass 70 | class SMPLXOutput(SMPLHOutput): 71 | expression: Optional[Tensor] = None 72 | jaw_pose: Optional[Tensor] = None 73 | 74 | 75 | @dataclass 76 | class MANOOutput(ModelOutput): 77 | betas: Optional[Tensor] = None 78 | hand_pose: Optional[Tensor] = None 79 | 80 | 81 | @dataclass 82 | class FLAMEOutput(ModelOutput): 83 | betas: Optional[Tensor] = None 84 | expression: Optional[Tensor] = None 85 | jaw_pose: Optional[Tensor] = None 86 | neck_pose: Optional[Tensor] = None 87 | 88 | 89 | def find_joint_kin_chain(joint_id, kinematic_tree): 90 | kin_chain = [] 91 | curr_idx = joint_id 92 | while curr_idx != -1: 93 | kin_chain.append(curr_idx) 94 | curr_idx = kinematic_tree[curr_idx] 95 | return kin_chain 96 | 97 | 98 | def to_tensor( 99 | array: Union[Array, Tensor], dtype=torch.float32 100 | ) -> Tensor: 101 | if torch.is_tensor(array): 102 | return array 103 | else: 104 | return torch.tensor(array, dtype=dtype) 105 | 106 | 107 | class Struct(object): 108 | def __init__(self, **kwargs): 109 | for key, val in kwargs.items(): 110 | setattr(self, key, val) 111 | 112 | 113 | def to_np(array, dtype=np.float32): 114 | if 'scipy.sparse' in str(type(array)): 115 | array = array.todense() 116 | return np.array(array, dtype=dtype) 117 | 118 | 119 | def rot_mat_to_euler(rot_mats): 120 | # Calculates rotation matrix to euler angles 121 | # Careful for extreme cases of eular angles like [0.0, pi, 0.0] 122 | 123 | sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] + 124 | rot_mats[:, 1, 0] * rot_mats[:, 1, 0]) 125 | return torch.atan2(-rot_mats[:, 2, 0], sy) 126 | -------------------------------------------------------------------------------- /lib/core/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from easydict import EasyDict as edict 5 | 6 | from lib.core.logger import ColorLogger 7 | from lib.utils.log_utils import init_dirs 8 | 9 | 10 | cfg = edict() 11 | 12 | 13 | """ Dataset """ 14 | cfg.DATASET = edict() 15 | cfg.DATASET.train_name = ['ObMan', 'DexYCB', 'HO3D', 'MOW', 'H2O3D', 'HOI4D', 'H2O', 'ARCTIC', 'InterHand26M', 'HIC', 'PROX', 'RICH', 'Decaf', 'Hi4D'] 16 | cfg.DATASET.test_name = 'MOW' # ONLY TEST ONE DATASET AT A TIME. Currently support ['MOW', 'HIC', 'RICH', 'Hi4D'] 17 | cfg.DATASET.workers = 2 18 | cfg.DATASET.ho_bbox_expand_ratio = 1.3 19 | cfg.DATASET.hand_bbox_expand_ratio = 1.3 20 | cfg.DATASET.ho_big_bbox_expand_ratio = 2.0 21 | cfg.DATASET.hand_scene_bbox_expand_ratio = 2.5 22 | cfg.DATASET.obj_bbox_expand_ratio = 1.5 23 | 24 | 25 | """ Model - HMR """ 26 | cfg.MODEL = edict() 27 | cfg.MODEL.input_img_shape = (256, 256) 28 | cfg.MODEL.img_mean = (0.485, 0.456, 0.406) 29 | cfg.MODEL.img_std = (0.229, 0.224, 0.225) 30 | # MANO 31 | cfg.MODEL.human_model_path = 'data/base_data/human_models' 32 | # Contact 33 | cfg.MODEL.c_thres_ih = 0.005 # Reasonable for InterHand26M, HIC dataset 34 | cfg.MODEL.c_thres = 0.01 # Reasonable for ObMan, DexYCB, HO3D, H2O3D, ARCTIC, HOI4D, H2O, PROX dataset 35 | cfg.MODEL.c_thres_in_the_wild = 0.035 # Reasonable for MOW dataset 36 | cfg.MODEL.contact_data_path = 'data/base_data/contact_data/all/contact_data_all.npy' 37 | cfg.MODEL.contact_means_path = 'data/base_data/contact_data/all/contact_means_all.npy' 38 | # Backbone 39 | cfg.MODEL.backbone_type = 'hamer' 40 | cfg.MODEL.hamer_backbone_pretrained_path = 'data/base_data/pretrained_models/hamer/hamer.ckpt' 41 | cfg.MODEL.hrnet_w32_backbone_config_path = 'data/base_data/pretrained_models/hrnet/cls_hrnet_w32_sgd_lr5e-2_wd1e-4_bs32_x100.yaml' 42 | cfg.MODEL.hrnet_w32_backbone_pretrained_path = 'data/base_data/pretrained_models/hrnet/hrnet_w32-36af842e.pth' 43 | cfg.MODEL.hrnet_w48_backbone_config_path = 'data/base_data/pretrained_models/hrnet/cls_hrnet_w48_sgd_lr5e-2_wd1e-4_bs32_x100.yaml' 44 | cfg.MODEL.hrnet_w48_backbone_pretrained_path = 'data/base_data/pretrained_models/hrnet/hrnet_w48-8ef0771d.pth' 45 | cfg.MODEL.handoccnet_backbone_pretrained_path = 'data/base_data/pretrained_models/handoccnet/snapshot_demo.pth.tar' 46 | # Multi-level joint regressor 47 | cfg.MODEL.V_regressor_336_path = 'data/base_data/human_models/mano/V_regressor_336.npy' 48 | cfg.MODEL.V_regressor_84_path = 'data/base_data/human_models/mano/V_regressor_84.npy' 49 | # Hand Detector 50 | cfg.MODEL.hand_landmarker_path = 'data/base_data/demo_data/hand_landmarker.task' 51 | # Balanced Sampling 52 | cfg.MODEL.balanced_sampling = True 53 | # Loss 54 | cfg.MODEL.loss_type = 'vcb+reg+smooth' 55 | 56 | 57 | """ Train Detail """ 58 | cfg.TRAIN = edict() 59 | cfg.TRAIN.batch = 24 60 | cfg.TRAIN.epoch = 10 61 | cfg.TRAIN.lr = 1e-5 62 | cfg.TRAIN.weight_decay = 0.0001 63 | cfg.TRAIN.milestones = (5, 10) 64 | cfg.TRAIN.step_size = 10 65 | cfg.TRAIN.gamma = 0.9 66 | cfg.TRAIN.betas = (0.9, 0.95) 67 | cfg.TRAIN.print_freq = 1 68 | cfg.TRAIN.seed = 314 # for reproducibility 69 | cfg.TRAIN.loss_weight = 1.0 70 | 71 | 72 | """ Test Detail """ 73 | cfg.TEST = edict() 74 | cfg.TEST.batch = 1 75 | 76 | 77 | """ CAMERA """ 78 | cfg.CAMERA = edict() 79 | 80 | 81 | """ OTHERS """ 82 | torch.backends.cudnn.benchmark = True 83 | logger = None 84 | 85 | 86 | def update_config(backbone_type='', test_name='', exp_dir='', ckpt_path=''): 87 | if backbone_type == '': 88 | backbone_type = 'hamer' 89 | cfg.MODEL.backbone_type = backbone_type 90 | if test_name == '': 91 | test_name = 'MOW' 92 | cfg.DATASET.test_name = test_name 93 | 94 | global logger 95 | log_dir = os.path.join(exp_dir, 'log') 96 | try: 97 | init_dirs([log_dir]) 98 | logger = ColorLogger(log_dir) 99 | logger.info("Logger initialized successfully!") 100 | except Exception as e: 101 | print(f"Failed to initialize logger: {e}") 102 | logger = None -------------------------------------------------------------------------------- /lib/models/backbone/resnet.py: -------------------------------------------------------------------------------- 1 | # This code is from HandOccNet (https://github.com/mks0601/Hand4Whole_RELEASE/blob/main/common/nets/resnet.py) 2 | import torch 3 | import torch.nn as nn 4 | from torchvision.models.resnet import BasicBlock, Bottleneck 5 | 6 | 7 | class ResNetBackbone(nn.Module): 8 | def __init__(self, resnet_type): 9 | 10 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'), 11 | 34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'), 12 | 50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'), 13 | 101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'), 14 | 152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')} 15 | block, layers, channels, name = resnet_spec[resnet_type] 16 | 17 | self.name = name 18 | self.inplanes = 64 19 | super(ResNetBackbone, self).__init__() 20 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 21 | bias=False) 22 | self.bn1 = nn.BatchNorm2d(64) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 25 | 26 | self.layer1 = self._make_layer(block, 64, layers[0]) 27 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 28 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 29 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 30 | 31 | for m in self.modules(): 32 | if isinstance(m, nn.Conv2d): 33 | nn.init.normal_(m.weight, mean=0, std=0.001) 34 | elif isinstance(m, nn.BatchNorm2d): 35 | nn.init.constant_(m.weight, 1) 36 | nn.init.constant_(m.bias, 0) 37 | 38 | def _make_layer(self, block, planes, blocks, stride=1): 39 | downsample = None 40 | if stride != 1 or self.inplanes != planes * block.expansion: 41 | downsample = nn.Sequential( 42 | nn.Conv2d(self.inplanes, planes * block.expansion, 43 | kernel_size=1, stride=stride, bias=False), 44 | nn.BatchNorm2d(planes * block.expansion), 45 | ) 46 | 47 | layers = [] 48 | layers.append(block(self.inplanes, planes, stride, downsample)) 49 | self.inplanes = planes * block.expansion 50 | for i in range(1, blocks): 51 | layers.append(block(self.inplanes, planes)) 52 | 53 | return nn.Sequential(*layers) 54 | 55 | def forward(self, x): 56 | x = self.conv1(x) 57 | x = self.bn1(x) 58 | x = self.relu(x) 59 | x = self.maxpool(x) 60 | 61 | x = self.layer1(x) 62 | x = self.layer2(x) 63 | x = self.layer3(x) 64 | x = self.layer4(x) 65 | return x 66 | 67 | def init_weights(self): 68 | import torchvision.models as models 69 | 70 | if self.name == 'resnet18': 71 | org_resnet = models.resnet18(pretrained=True) 72 | elif self.name == 'resnet34': 73 | org_resnet = models.resnet34(pretrained=True) 74 | elif self.name == 'resnet50': 75 | org_resnet = models.resnet50(pretrained=True) 76 | elif self.name == 'resnet101': 77 | org_resnet = models.resnet101(pretrained=True) 78 | elif self.name == 'resnet152': 79 | org_resnet = models.resnet152(pretrained=True) 80 | else: 81 | raise ValueError(f"Unsupported model name: {self.name}") 82 | 83 | # Drop the original fully connected layer 84 | org_resnet.fc = None # Or you can set it to nn.Identity() 85 | 86 | # If you're loading weights manually, extract the state_dict 87 | org_resnet_state = org_resnet.state_dict() 88 | 89 | # Remove FC layer weights to avoid mismatch 90 | org_resnet_state.pop('fc.weight', None) 91 | org_resnet_state.pop('fc.bias', None) 92 | 93 | # Load into your model 94 | self.load_state_dict(org_resnet_state, strict=False) 95 | print("Initialized ResNet from torchvision with pretrained=True") -------------------------------------------------------------------------------- /lib/utils/smplx/smplx/joint_names.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | JOINT_NAMES = [ 18 | 'pelvis', 19 | 'left_hip', 20 | 'right_hip', 21 | 'spine1', 22 | 'left_knee', 23 | 'right_knee', 24 | 'spine2', 25 | 'left_ankle', 26 | 'right_ankle', 27 | 'spine3', 28 | 'left_foot', 29 | 'right_foot', 30 | 'neck', 31 | 'left_collar', 32 | 'right_collar', 33 | 'head', 34 | 'left_shoulder', 35 | 'right_shoulder', 36 | 'left_elbow', 37 | 'right_elbow', 38 | 'left_wrist', 39 | 'right_wrist', 40 | 'jaw', 41 | 'left_eye_smplhf', 42 | 'right_eye_smplhf', 43 | 'left_index1', 44 | 'left_index2', 45 | 'left_index3', 46 | 'left_middle1', 47 | 'left_middle2', 48 | 'left_middle3', 49 | 'left_pinky1', 50 | 'left_pinky2', 51 | 'left_pinky3', 52 | 'left_ring1', 53 | 'left_ring2', 54 | 'left_ring3', 55 | 'left_thumb1', 56 | 'left_thumb2', 57 | 'left_thumb3', 58 | 'right_index1', 59 | 'right_index2', 60 | 'right_index3', 61 | 'right_middle1', 62 | 'right_middle2', 63 | 'right_middle3', 64 | 'right_pinky1', 65 | 'right_pinky2', 66 | 'right_pinky3', 67 | 'right_ring1', 68 | 'right_ring2', 69 | 'right_ring3', 70 | 'right_thumb1', 71 | 'right_thumb2', 72 | 'right_thumb3', 73 | 'nose', 74 | 'right_eye', 75 | 'left_eye', 76 | 'right_ear', 77 | 'left_ear', 78 | 'left_big_toe', 79 | 'left_small_toe', 80 | 'left_heel', 81 | 'right_big_toe', 82 | 'right_small_toe', 83 | 'right_heel', 84 | 'left_thumb', 85 | 'left_index', 86 | 'left_middle', 87 | 'left_ring', 88 | 'left_pinky', 89 | 'right_thumb', 90 | 'right_index', 91 | 'right_middle', 92 | 'right_ring', 93 | 'right_pinky', 94 | 'right_eye_brow1', 95 | 'right_eye_brow2', 96 | 'right_eye_brow3', 97 | 'right_eye_brow4', 98 | 'right_eye_brow5', 99 | 'left_eye_brow5', 100 | 'left_eye_brow4', 101 | 'left_eye_brow3', 102 | 'left_eye_brow2', 103 | 'left_eye_brow1', 104 | 'nose1', 105 | 'nose2', 106 | 'nose3', 107 | 'nose4', 108 | 'right_nose_2', 109 | 'right_nose_1', 110 | 'nose_middle', 111 | 'left_nose_1', 112 | 'left_nose_2', 113 | 'right_eye1', 114 | 'right_eye2', 115 | 'right_eye3', 116 | 'right_eye4', 117 | 'right_eye5', 118 | 'right_eye6', 119 | 'left_eye4', 120 | 'left_eye3', 121 | 'left_eye2', 122 | 'left_eye1', 123 | 'left_eye6', 124 | 'left_eye5', 125 | 'right_mouth_1', 126 | 'right_mouth_2', 127 | 'right_mouth_3', 128 | 'mouth_top', 129 | 'left_mouth_3', 130 | 'left_mouth_2', 131 | 'left_mouth_1', 132 | 'left_mouth_5', # 59 in OpenPose output 133 | 'left_mouth_4', # 58 in OpenPose output 134 | 'mouth_bottom', 135 | 'right_mouth_4', 136 | 'right_mouth_5', 137 | 'right_lip_1', 138 | 'right_lip_2', 139 | 'lip_top', 140 | 'left_lip_2', 141 | 'left_lip_1', 142 | 'left_lip_3', 143 | 'lip_bottom', 144 | 'right_lip_3', 145 | # Face contour 146 | 'right_contour_1', 147 | 'right_contour_2', 148 | 'right_contour_3', 149 | 'right_contour_4', 150 | 'right_contour_5', 151 | 'right_contour_6', 152 | 'right_contour_7', 153 | 'right_contour_8', 154 | 'contour_middle', 155 | 'left_contour_8', 156 | 'left_contour_7', 157 | 'left_contour_6', 158 | 'left_contour_5', 159 | 'left_contour_4', 160 | 'left_contour_3', 161 | 'left_contour_2', 162 | 'left_contour_1', 163 | ] 164 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | from tqdm import tqdm 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | from torch.utils.data import DataLoader 9 | import torchvision.transforms as transforms 10 | 11 | from lib.core.config import cfg, update_config 12 | from lib.models.model import HACO 13 | from lib.utils.contact_utils import get_contact_thres 14 | from lib.utils.train_utils import get_transform, worker_init_fn 15 | from lib.utils.eval_utils import evaluation 16 | 17 | 18 | parser = argparse.ArgumentParser(description='Test HACO') 19 | parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model') 20 | parser.add_argument('--test_name', type=str, default='MOW', help='dataset name for evaluation') 21 | parser.add_argument('--checkpoint', type=str, default='', help='model path for evaluation') 22 | args = parser.parse_args() 23 | 24 | 25 | # Import dataset 26 | exec(f'from data.{args.test_name}.dataset import {args.test_name}') 27 | 28 | 29 | # Set device as CUDA 30 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 31 | torch.set_num_threads(cfg.DATASET.workers) # Limit Torch 32 | os.environ["OMP_NUM_THREADS"] = "4" # Limit OpenMP (NumPy, MKL) 33 | os.environ["MKL_NUM_THREADS"] = "4" # Limit MKL operations 34 | 35 | 36 | # Initialize directories 37 | experiment_dir = f'experiments_test_{args.test_name.lower()}' 38 | checkpoint_dir = os.path.join(experiment_dir, 'full', 'checkpoints') 39 | os.makedirs(checkpoint_dir, exist_ok=True) 40 | 41 | 42 | # Load config 43 | update_config(backbone_type=args.backbone, test_name=args.test_name, exp_dir=experiment_dir) 44 | 45 | 46 | ############## Dataset ############### 47 | transform = get_transform(args.backbone) 48 | 49 | test_dataset = eval(f'{cfg.DATASET.test_name}')(transform, 'test') 50 | ############## Dataset ############### 51 | 52 | 53 | ############# Dataloader ############# 54 | test_dataloader = DataLoader(test_dataset, batch_size=cfg.TEST.batch, shuffle=False, num_workers=cfg.DATASET.workers, pin_memory=True, drop_last=False, worker_init_fn=worker_init_fn) 55 | ############# Dataloader ############# 56 | 57 | 58 | from lib.core.config import logger 59 | logger.info(f"# of test samples: {len(test_dataset)}") 60 | 61 | 62 | ############# Model ############# 63 | model = HACO().to(device) 64 | model.eval() 65 | ############# Model ############# 66 | 67 | 68 | # Load model checkpoint if provided 69 | if args.checkpoint: 70 | checkpoint = torch.load(args.checkpoint, map_location=device) 71 | model.load_state_dict(checkpoint['state_dict']) 72 | 73 | 74 | ############################### Test Loop ############################### 75 | eval_result = { 76 | 'cont_pre': [None for _ in range(len(test_dataset))], 77 | 'cont_rec': [None for _ in range(len(test_dataset))], 78 | 'cont_f1': [None for _ in range(len(test_dataset))], 79 | } 80 | 81 | test_iterator = tqdm(enumerate(test_dataloader), total=len(test_dataloader), leave=False) 82 | model.eval() 83 | 84 | 85 | for idx, data in test_iterator: 86 | ############# Run model ############# 87 | with torch.no_grad(): 88 | outputs = model({'input': data['input_data'], 'target': data['targets_data'], 'meta_info': data['meta_info']}, mode="test") 89 | ############# Run model ############# 90 | 91 | 92 | ############## Evaluation ############### 93 | # Compute evaluation metrics 94 | eval_thres = get_contact_thres(args.backbone) 95 | eval_out = evaluation(outputs, data['targets_data'], data['meta_info'], mode='test', thres=eval_thres) 96 | for key in [*eval_out]: 97 | eval_result[key][idx] = eval_out[key] 98 | 99 | # Hand Contact Estimator (HCE) 100 | total_cont_pre = np.mean([x if x is not None else 0.0 for x in eval_result['cont_pre'][:idx+1]]) 101 | total_cont_rec = np.mean([x if x is not None else 0.0 for x in eval_result['cont_rec'][:idx+1]]) 102 | total_cont_f1 = np.mean([x if x is not None else 0.0 for x in eval_result['cont_f1'][:idx+1]]) 103 | ############## Evaluation ############### 104 | 105 | 106 | logger.info(f"C-Pre: {total_cont_pre:.3f} | C-Rec: {total_cont_rec:.3f} | C-F1: {total_cont_f1:.3f}") 107 | ############################### Test Loop ############################### 108 | 109 | 110 | logger.info('Test finished!!!!') 111 | logger.info(f"Final Results --- C-Pre: {total_cont_pre:.3f} | C-Rec: {total_cont_rec:.3f} | C-F1: {total_cont_f1:.3f}") -------------------------------------------------------------------------------- /lib/models/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from lib.core.config import cfg 6 | 7 | 8 | 9 | class HACO(nn.Module): 10 | def __init__(self): 11 | super(HACO, self).__init__() 12 | if torch.cuda.is_available(): 13 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 14 | self.to(self.device) 15 | 16 | # Load modules 17 | self.backbone = get_backbone_network(type=cfg.MODEL.backbone_type) 18 | self.decoder = get_decoder_network(type=cfg.MODEL.backbone_type) 19 | 20 | def forward(self, inputs, mode='test'): 21 | image = inputs['input']['image'].to(self.device) 22 | 23 | if 'vit' in cfg.MODEL.backbone_type: 24 | image = F.interpolate(image, size=(224, 224), mode='bilinear', align_corners=False) 25 | 26 | img_feat = self.backbone(image) 27 | contact_out, contact_336_out, contact_84_out, contact_joint_out = self.decoder(img_feat) 28 | 29 | return dict(contact_out=contact_out, contact_336_out=contact_336_out, contact_84_out=contact_84_out, contact_joint_out=contact_joint_out) 30 | 31 | 32 | 33 | def get_backbone_network(type='hamer'): 34 | if type in ['hamer']: 35 | from lib.models.backbone.backbone_hamer_style import ViT_HaMeR 36 | backbone = ViT_HaMeR() 37 | checkpoint = torch.load(cfg.MODEL.hamer_backbone_pretrained_path, map_location='cuda')['state_dict'] 38 | filtered_state_dict = {k[len("backbone."):]: v for k, v in checkpoint.items() if k.startswith("backbone.")} 39 | backbone.load_state_dict(filtered_state_dict) 40 | elif type in ['resnet-18']: 41 | from lib.models.backbone.resnet import ResNetBackbone 42 | backbone = ResNetBackbone(18) # ResNet 43 | backbone.init_weights() 44 | elif type in ['resnet-34']: 45 | from lib.models.backbone.resnet import ResNetBackbone 46 | backbone = ResNetBackbone(34) # ResNet 47 | backbone.init_weights() 48 | elif type in ['resnet-50']: 49 | from lib.models.backbone.resnet import ResNetBackbone 50 | backbone = ResNetBackbone(50) # ResNet 51 | backbone.init_weights() 52 | elif type in ['resnet-101']: 53 | from lib.models.backbone.resnet import ResNetBackbone 54 | backbone = ResNetBackbone(101) # ResNet 55 | backbone.init_weights() 56 | elif type in ['resnet-152']: 57 | from lib.models.backbone.resnet import ResNetBackbone 58 | backbone = ResNetBackbone(152) # ResNet 59 | backbone.init_weights() 60 | elif type in ['hrnet-w32']: 61 | from lib.models.backbone.hrnet import HighResolutionNet 62 | from lib.utils.func_utils import load_config 63 | config = load_config(cfg.MODEL.hrnet_w32_backbone_config_path) 64 | pretrained = cfg.MODEL.hrnet_w32_backbone_pretrained_path 65 | backbone = HighResolutionNet(config) 66 | backbone.init_weights(pretrained=pretrained) 67 | elif type in ['hrnet-w48']: 68 | from lib.models.backbone.hrnet import HighResolutionNet 69 | from lib.utils.func_utils import load_config 70 | config = load_config(cfg.MODEL.hrnet_w48_backbone_config_path) 71 | pretrained = cfg.MODEL.hrnet_w48_backbone_pretrained_path 72 | backbone = HighResolutionNet(config) 73 | backbone.init_weights(pretrained=pretrained) 74 | elif type in ['handoccnet']: 75 | from lib.models.backbone.fpn import FPN 76 | backbone = FPN(pretrained=False) 77 | pretrained = cfg.MODEL.handoccnet_backbone_pretrained_path 78 | state_dict = {k[len('module.backbone.'):]: v for k, v in torch.load(pretrained)['network'].items() if k.startswith('module.backbone.')} 79 | backbone.load_state_dict(state_dict, strict=True) 80 | elif type in ['vit-s-16']: 81 | from lib.models.backbone.vit import ViTBackbone 82 | backbone = ViTBackbone(model_name='vit_small_patch16_224', pretrained=True) 83 | elif type in ['vit-b-16']: 84 | from lib.models.backbone.vit import ViTBackbone 85 | backbone = ViTBackbone(model_name='vit_base_patch16_224', pretrained=True) 86 | elif type in ['vit-l-16']: 87 | from lib.models.backbone.vit import ViTBackbone 88 | backbone = ViTBackbone(model_name='vit_large_patch16_224', pretrained=True) 89 | else: 90 | raise NotImplementedError 91 | 92 | return backbone 93 | 94 | 95 | 96 | def get_decoder_network(type='hamer'): 97 | from lib.models.decoder.decoder_hamer_style import ContactTransformerDecoderHead 98 | decoder = ContactTransformerDecoderHead() 99 | 100 | return decoder -------------------------------------------------------------------------------- /data/PROX/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import json 4 | import pickle 5 | import numpy as np 6 | 7 | import torch 8 | from torch.utils.data import Dataset 9 | from torchvision.transforms import Normalize 10 | 11 | from lib.core.config import cfg 12 | from lib.utils.human_models import mano 13 | from lib.utils.func_utils import load_img 14 | from lib.utils.preprocessing import augmentation_contact 15 | 16 | 17 | 18 | # Main dataloader code for PROX dataset 19 | class PROX(Dataset): 20 | def __init__(self, transform, data_split): 21 | super(PROX, self).__init__() 22 | self.__dict__.update(locals()) 23 | 24 | self.transform = transform 25 | dataset_name = 'prox' 26 | 27 | self.data_split = data_split 28 | self.root_path = root_path = os.path.join('data', 'PROX') 29 | self.data_dir = os.path.join(self.root_path, 'data') 30 | 31 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 32 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 33 | os.makedirs(self.annot_data_path, exist_ok=True) 34 | os.makedirs(self.contact_data_path, exist_ok=True) 35 | 36 | # SMPL-X to MANO mapping 37 | smplx_mano_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smplx_to_mano.pkl') 38 | 39 | with open(smplx_mano_mapping_path, 'rb') as f: 40 | self.smplx_to_mano_mapping = pickle.load(f) 41 | self.smplx_to_mano_mapping_r = self.smplx_to_mano_mapping["right_hand"] 42 | 43 | # Camera 44 | with open(os.path.join(self.data_dir, 'quantitative/calibration/Color.json'), 'r') as f: 45 | calibration = json.load(f) 46 | self.cam_param = {'focal': calibration['f'], 'princpt': calibration['c']} 47 | 48 | # Organize db id based on split 49 | db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 50 | with open(db_split_path, 'r') as f: 51 | self.db = json.load(f) 52 | 53 | 54 | def __len__(self): 55 | return len(self.db) 56 | 57 | 58 | def __getitem__(self, index): 59 | aid = self.db[index] 60 | seq_name = aid.split('/')[-3] 61 | img_name = aid.split('/')[-1] 62 | sample_id = f'{seq_name}-{img_name}' 63 | 64 | orig_img_path = os.path.join(os.path.join(self.data_dir, 'quantitative', 'recordings', seq_name, 'Color', f'{img_name}.jpg')) 65 | 66 | orig_img = load_img(orig_img_path) 67 | orig_img = cv2.flip(orig_img, 1) # only for PROX dataset 68 | img_shape = orig_img.shape[:2] 69 | img_h, img_w = img_shape 70 | 71 | mano_valid = np.ones((1), dtype=np.float32) 72 | 73 | 74 | ################################## LOAD ANNOTATION DATA ##################################### 75 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 76 | 77 | annot_data = np.load(annot_data_path, allow_pickle=True) 78 | bbox_hand_r = annot_data['bbox_ho'] 79 | 80 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 81 | contact_data = dict(contact_h=contact_h) 82 | ################################## LOAD ANNOTATION DATA ##################################### 83 | 84 | 85 | ############################### PROCESS CROP AND AUGMENTATION ################################ 86 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 87 | crop_img = img.copy() 88 | 89 | # Transform for 3D HMR 90 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 91 | img = self.transform(img.astype(np.float32)/255.0) 92 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 93 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 94 | img = img.transpose(2, 0, 1) / 255.0 95 | img = normalize_img(torch.from_numpy(img)).float() 96 | else: 97 | raise NotImplementedError 98 | ############################### PROCESS CROP AND AUGMENTATION ################################ 99 | 100 | 101 | input_data = dict(image=img) 102 | targets_data = dict(contact_data=contact_data) 103 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 104 | 105 | 106 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /lib/utils/mano_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2017 Javier Romero, Dimitrios Tzionas, Michael J Black and the Max Planck Gesellschaft. All rights reserved. 3 | This software is provided for research purposes only. 4 | By using this software you agree to the terms of the MANO/SMPL+H Model license here http://mano.is.tue.mpg.de/license 5 | 6 | More information about MANO/SMPL+H is available at http://mano.is.tue.mpg.de. 7 | For comments or questions, please email us at: mano@tue.mpg.de 8 | 9 | 10 | About this file: 11 | ================ 12 | This file defines a wrapper for the loading functions of the MANO model. 13 | 14 | Modules included: 15 | - load_model: 16 | loads the MANO model from a given file location (i.e. a .pkl file location), 17 | or a dictionary object. 18 | 19 | ''' 20 | import os 21 | import cv2 22 | import torch 23 | import numpy as np 24 | import pickle 25 | import chumpy as ch 26 | from chumpy.ch import MatVecMult 27 | 28 | 29 | class Rodrigues(ch.Ch): 30 | dterms = 'rt' 31 | 32 | def compute_r(self): 33 | return cv2.Rodrigues(self.rt.r)[0] 34 | 35 | def compute_dr_wrt(self, wrt): 36 | if wrt is self.rt: 37 | return cv2.Rodrigues(self.rt.r)[1].T 38 | 39 | 40 | def lrotmin(p): 41 | if isinstance(p, np.ndarray): 42 | p = p.ravel()[3:] 43 | return np.concatenate( 44 | [(cv2.Rodrigues(np.array(pp))[0] - np.eye(3)).ravel() 45 | for pp in p.reshape((-1, 3))]).ravel() 46 | if p.ndim != 2 or p.shape[1] != 3: 47 | p = p.reshape((-1, 3)) 48 | p = p[1:] 49 | return ch.concatenate([(Rodrigues(pp) - ch.eye(3)).ravel() 50 | for pp in p]).ravel() 51 | 52 | 53 | def posemap(s): 54 | if s == 'lrotmin': 55 | return lrotmin 56 | else: 57 | raise Exception('Unknown posemapping: %s' % (str(s), )) 58 | 59 | 60 | def ready_arguments(fname_or_dict, posekey4vposed='pose'): 61 | if not isinstance(fname_or_dict, dict): 62 | dd = pickle.load(open(fname_or_dict, 'rb'), encoding='latin1') 63 | else: 64 | dd = fname_or_dict 65 | 66 | want_shapemodel = 'shapedirs' in dd 67 | nposeparms = dd['kintree_table'].shape[1] * 3 68 | 69 | if 'trans' not in dd: 70 | dd['trans'] = np.zeros(3) 71 | if 'pose' not in dd: 72 | dd['pose'] = np.zeros(nposeparms) 73 | if 'shapedirs' in dd and 'betas' not in dd: 74 | dd['betas'] = np.zeros(dd['shapedirs'].shape[-1]) 75 | 76 | for s in [ 77 | 'v_template', 'weights', 'posedirs', 'pose', 'trans', 'shapedirs', 78 | 'betas', 'J' 79 | ]: 80 | if (s in dd) and not hasattr(dd[s], 'dterms'): 81 | dd[s] = ch.array(dd[s]) 82 | 83 | assert (posekey4vposed in dd) 84 | if want_shapemodel: 85 | dd['v_shaped'] = dd['shapedirs'].dot(dd['betas']) + dd['v_template'] 86 | v_shaped = dd['v_shaped'] 87 | J_tmpx = MatVecMult(dd['J_regressor'], v_shaped[:, 0]) 88 | J_tmpy = MatVecMult(dd['J_regressor'], v_shaped[:, 1]) 89 | J_tmpz = MatVecMult(dd['J_regressor'], v_shaped[:, 2]) 90 | dd['J'] = ch.vstack((J_tmpx, J_tmpy, J_tmpz)).T 91 | pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed]) 92 | dd['v_posed'] = v_shaped + dd['posedirs'].dot(pose_map_res) 93 | else: 94 | pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed]) 95 | dd_add = dd['posedirs'].dot(pose_map_res) 96 | dd['v_posed'] = dd['v_template'] + dd_add 97 | 98 | return dd 99 | 100 | 101 | 102 | def get_mano_pca_basis(ncomps=45, use_pca=True, side='right', mano_root='data/base_data/human_models/mano'): 103 | if use_pca: 104 | ncomps = ncomps 105 | else: 106 | ncomps = 45 107 | 108 | if side == 'right': 109 | mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl') 110 | elif side == 'left': 111 | mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl') 112 | smpl_data = ready_arguments(mano_path) 113 | hands_components = smpl_data['hands_components'] 114 | selected_components = hands_components[:ncomps] 115 | th_selected_comps = selected_components 116 | 117 | return torch.tensor(th_selected_comps, dtype=torch.float32) 118 | 119 | 120 | 121 | def change_flat_hand_mean(hand_pose, remove=True, side='right', mano_root='data/base_data/human_models/mano'): 122 | if side == 'right': 123 | mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl') 124 | elif side == 'left': 125 | mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl') 126 | smpl_data = ready_arguments(mano_path) 127 | 128 | # Get hand mean 129 | hands_mean = smpl_data['hands_mean'] 130 | hands_mean = hands_mean.copy() # hands_mean: (45) 131 | 132 | if remove: 133 | hand_pose[3:] = hand_pose[3:] - hands_mean 134 | else: 135 | hand_pose[3:] = hand_pose[3:] + hands_mean 136 | return hand_pose -------------------------------------------------------------------------------- /lib/utils/demo_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from collections import defaultdict, deque 4 | 5 | import mediapipe as mp 6 | 7 | 8 | from lib.utils.vis_utils import draw_landmarks_on_image, draw_landmarks_on_image_simple 9 | 10 | 11 | def smooth_bbox(prev_bbox, curr_bbox, alpha=0.8): 12 | if prev_bbox is None: 13 | return curr_bbox 14 | return [alpha * p + (1 - alpha) * c for p, c in zip(prev_bbox, curr_bbox)] 15 | 16 | 17 | def smooth_contact_mask(prev_mask, curr_mask, alpha=0.8): 18 | if prev_mask is None: 19 | return curr_mask.astype(np.float32) 20 | return alpha * prev_mask + (1 - alpha) * curr_mask.astype(np.float32) 21 | 22 | 23 | def remove_small_contact_components(contact_mask, faces, min_size=20): 24 | vertex_to_faces = defaultdict(list) 25 | for i, f in enumerate(faces): 26 | for v in f: 27 | vertex_to_faces[v].append(i) 28 | 29 | visited = np.zeros(len(contact_mask), dtype=bool) 30 | filtered_mask = np.zeros_like(contact_mask, dtype=bool) 31 | 32 | for v in range(len(contact_mask)): 33 | if visited[v] or not contact_mask[v]: 34 | continue 35 | 36 | queue = deque([v]) 37 | component = [] 38 | while queue: 39 | curr = queue.popleft() 40 | if visited[curr] or not contact_mask[curr]: 41 | continue 42 | visited[curr] = True 43 | component.append(curr) 44 | for f_idx in vertex_to_faces[curr]: 45 | for neighbor in faces[f_idx]: 46 | if not visited[neighbor] and contact_mask[neighbor]: 47 | queue.append(neighbor) 48 | 49 | if len(component) >= min_size: 50 | filtered_mask[component] = True 51 | 52 | return filtered_mask 53 | 54 | 55 | def initialize_video_writer(output_path, fps, frame_size): 56 | tried_codecs = ['avc1', 'H264', 'X264', 'MJPG', 'mp4v'] # we recommend using 'MJPG' 57 | for codec in tried_codecs: 58 | fourcc = cv2.VideoWriter_fourcc(*codec) 59 | writer = cv2.VideoWriter(output_path, fourcc, fps, frame_size) 60 | if writer.isOpened(): 61 | print(f"Using codec '{codec}' for {output_path}") 62 | return writer 63 | writer.release() 64 | raise RuntimeError(f"Failed to initialize VideoWriter for {output_path}") 65 | 66 | 67 | def run_wilor_hand_detector(orig_img, detector): 68 | conf = 0.3 69 | IoU_threshold = 0.3 70 | 71 | detections = detector(orig_img, conf=conf, verbose=False, iou=IoU_threshold)[0] 72 | 73 | img_h, img_w, _ = orig_img.shape 74 | 75 | right_hand_bbox = [0, 0, img_w, img_h] # [x_min_expand, y_min_expand, bb_width_expand, bb_height_expand] 76 | best_conf = 0. 77 | 78 | # Find the most confident right hand 79 | for det in detections: 80 | Bbox = det.boxes.data.cpu().detach().squeeze().numpy() 81 | Conf = det.boxes.conf.data.cpu().detach()[0].numpy().reshape(-1).astype(np.float16) 82 | Side = det.boxes.cls.data.cpu().detach() 83 | 84 | if (Side.item() == 1.) and (Conf.item() > best_conf): 85 | right_hand_bbox = [Bbox[0], Bbox[1], Bbox[2]-Bbox[0], Bbox[3]-Bbox[1]] 86 | 87 | return right_hand_bbox 88 | 89 | 90 | def extract_frames_with_hand(cap, detector, detector_type='wilor'): 91 | frames_with_hand = [] 92 | frame_idx = 0 93 | 94 | while cap.isOpened(): 95 | ret, frame = cap.read() 96 | if not ret: 97 | break 98 | 99 | orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 100 | if detector_type == 'wilor': 101 | right_hand_bbox = run_wilor_hand_detector(orig_img, detector) 102 | _, right_hand_bbox = draw_landmarks_on_image_simple(orig_img.copy(), right_hand_bbox) 103 | elif detector_type == 'mediapipe': 104 | mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=orig_img) 105 | detection_result = detector.detect(mp_image) 106 | _, right_hand_bbox = draw_landmarks_on_image(orig_img.copy(), detection_result) 107 | 108 | if right_hand_bbox is not None: 109 | frames_with_hand.append((frame_idx, frame, right_hand_bbox)) 110 | 111 | frame_idx += 1 112 | 113 | cap.release() 114 | return frames_with_hand 115 | 116 | 117 | def find_longest_continuous_segment(frames_with_hand): 118 | longest_segment = [] 119 | current_segment = [] 120 | 121 | for i in range(len(frames_with_hand)): 122 | if i == 0 or frames_with_hand[i][0] == frames_with_hand[i - 1][0] + 1: 123 | current_segment.append(frames_with_hand[i]) 124 | else: 125 | if len(current_segment) > len(longest_segment): 126 | longest_segment = current_segment 127 | current_segment = [frames_with_hand[i]] 128 | 129 | if len(current_segment) > len(longest_segment): 130 | longest_segment = current_segment 131 | 132 | return longest_segment -------------------------------------------------------------------------------- /data/HIC/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torchvision.transforms import Normalize 8 | 9 | from lib.core.config import cfg 10 | from lib.utils.human_models import mano 11 | from lib.utils.func_utils import load_img 12 | from lib.utils.preprocessing import augmentation_contact 13 | 14 | 15 | 16 | # Main dataloader code for HIC dataset 17 | class HIC(Dataset): 18 | def __init__(self, transform, data_split): 19 | super(HIC, self).__init__() 20 | self.__dict__.update(locals()) 21 | 22 | self.transform = transform 23 | dataset_name = 'hic' 24 | 25 | self.data_split = data_split 26 | self.root_path = root_path = os.path.join('data', 'HIC') 27 | self.data_dir = os.path.join(self.root_path, 'data') 28 | 29 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 30 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 31 | 32 | # inter: 01, 02, 03, 04, 05, 06, 07 | single: 08, 09, 10, 11 | Single-HOI: 15, 19, 20, 21 | Inter-HOI: 16, 17, 18 33 | self.inter_seq_names = ['01', '02', '03', '04', '05', '06', '07'] 34 | self.single_seq_names = ['08', '09', '10', '11'] 35 | self.single_hoi_seq_names = ['15', '19', '20', '21'] 36 | self.inter_hoi_seq_names = ['16', '17', '18'] 37 | 38 | # Split train/test set (we only use inter seq) 39 | self.train_seq_names = ['01', '02', '03', '04', '05', '06'] 40 | self.test_seq_names = ['07'] 41 | 42 | self.cam_param = {'focal': [525.0, 525.0], 'princpt': [319.5, 239.5]} # this is fixed for HIC dataset 43 | 44 | # Organize db id based on split 45 | db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 46 | with open(db_split_path, 'r') as f: 47 | self.db = json.load(f) 48 | 49 | 50 | def __len__(self): 51 | return len(self.db['images']) 52 | 53 | 54 | def __getitem__(self, index): 55 | images_info = self.db['images'][index] 56 | annotations = self.db['annotations'][index] 57 | 58 | aid = annotations['id'] 59 | image_id = annotations['image_id'] 60 | seq_name = images_info['seq_name'] 61 | file_name = images_info['file_name'] 62 | img_w, img_h = images_info['width'], images_info['height'] 63 | 64 | bbox = annotations['bbox'] 65 | hand_type = annotations['hand_type'] 66 | right_mano_path = annotations['right_mano_path'] 67 | left_mano_path = annotations['left_mano_path'] 68 | 69 | sample_id = image_id 70 | 71 | # Load image 72 | orig_img_path = os.path.join(self.data_dir, file_name) 73 | orig_img = load_img(orig_img_path) 74 | img_shape = orig_img.shape[:2] 75 | img_h, img_w = img_shape 76 | 77 | mano_valid = np.ones((1), dtype=np.float32) 78 | 79 | 80 | ################################## LOAD ANNOTATION DATA ##################################### 81 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 82 | 83 | annot_data = np.load(annot_data_path, allow_pickle=True) 84 | bbox_hand_r = annot_data['bbox_hand'] 85 | cam_param = annot_data['cam_param'] 86 | 87 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 88 | contact_data = dict(contact_h=contact_h) 89 | ################################## LOAD ANNOTATION DATA ##################################### 90 | 91 | 92 | ############################### PROCESS CROP AND AUGMENTATION ################################ 93 | img, img2bb_trans, bb2img_trans, rot, do_flip, _ = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 94 | crop_img = img.copy() 95 | 96 | # Transform for 3D HMR 97 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 98 | img = self.transform(img.astype(np.float32)/255.0) 99 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 100 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 101 | img = img.transpose(2, 0, 1) / 255.0 102 | img = normalize_img(torch.from_numpy(img)).float() 103 | else: 104 | raise NotImplementedError 105 | ############################### PROCESS CROP AND AUGMENTATION ################################ 106 | 107 | 108 | input_data = dict(image=img) 109 | targets_data = dict(contact_data=contact_data) 110 | meta_info = dict(sample_id=str(sample_id), mano_valid=mano_valid) 111 | 112 | 113 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /data/DexYCB/toolkit/layers/ycb_layer.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Layer to transform YCB mesh vertices with SE3 transformation.""" 6 | 7 | import os 8 | import torch 9 | 10 | from torch.nn import Module 11 | 12 | from ..obj import OBJ 13 | 14 | 15 | class YCBLayer(Module): 16 | """Layer to transform YCB mesh vertices with SE3 transformation.""" 17 | 18 | def __init__(self, i): 19 | """Constructor. 20 | 21 | Args: 22 | i: YCB object index. 23 | """ 24 | super(YCBLayer, self).__init__() 25 | 26 | assert 'DEX_YCB_DIR' in os.environ, "environment variable 'DEX_YCB_DIR' is not set" 27 | self._path = os.environ['DEX_YCB_DIR'] + "/models" 28 | self._classes = ('__background__', '002_master_chef_can', '003_cracker_box', 29 | '004_sugar_box', '005_tomato_soup_can', 30 | '006_mustard_bottle', '007_tuna_fish_can', 31 | '008_pudding_box', '009_gelatin_box', 32 | '010_potted_meat_can', '011_banana', '019_pitcher_base', 33 | '021_bleach_cleanser', '024_bowl', '025_mug', 34 | '035_power_drill', '036_wood_block', '037_scissors', 35 | '040_large_marker', '051_large_clamp', 36 | '052_extra_large_clamp', '061_foam_brick') 37 | self._class_name = self._classes[i] 38 | self._obj_file = self._path + '/' + self._class_name + "/textured_simple.obj" 39 | self._obj = OBJ(self._obj_file) 40 | assert len(self._obj.mesh_list) == 1 41 | assert len(self._obj.mesh_list[0].groups) == 1 42 | g = self._obj.mesh_list[0].groups[0] 43 | 44 | self._material = g.material 45 | self._tex_coords = self._obj.t[g.f_t] 46 | 47 | v = torch.from_numpy(self._obj.v).t() 48 | n = torch.from_numpy(self._obj.n).t() 49 | assert (g.f_v == g.f_n).all() 50 | f = torch.from_numpy(g.f_v).view((-1, 3)) 51 | self.register_buffer('v', v) 52 | self.register_buffer('n', n) 53 | self.register_buffer('f', f) 54 | 55 | @property 56 | def obj_file(self): 57 | return self._obj_file 58 | 59 | @property 60 | def material(self): 61 | return self._material 62 | 63 | @property 64 | def tex_coords(self): 65 | return self._tex_coords 66 | 67 | def forward(self, r, t): 68 | """Forward function. 69 | 70 | Args: 71 | r: A tensor of shape [B, 3] containing the rotation in axis-angle. 72 | t: A tensor of shape [B, 3] containing the translation. 73 | 74 | Returns: 75 | v: A tensor of shape [B, N, 3] containing the transformed vertices. 76 | n: A tensor of shape [B, N, 3] containing the transformed normals. 77 | """ 78 | R = rv2dcm(r) 79 | v = torch.matmul(R, self.v).permute(0, 2, 1) + t.unsqueeze(1) 80 | n = torch.matmul(R, self.n).permute(0, 2, 1) 81 | return v, n 82 | 83 | 84 | # https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula#Matrix_notation 85 | def rv2dcm(rv): 86 | """Converts rotation vectors to direction cosine matrices. 87 | 88 | Args: 89 | rv: A tensor of shape [B, 3] containing the rotation vectors. 90 | 91 | Returns: 92 | A tensor of shape [B, 3, 3] containing the direction cosine matrices. 93 | """ 94 | angle = torch.norm(rv + 1e-8, p=2, dim=1) 95 | axis = rv / angle.unsqueeze(1) 96 | s = torch.sin(angle).unsqueeze(1).unsqueeze(2) 97 | c = torch.cos(angle).unsqueeze(1).unsqueeze(2) 98 | I = torch.eye(3, device=rv.device).expand(rv.size(0), -1, -1) 99 | z = torch.zeros_like(angle) 100 | K = torch.stack( 101 | (torch.stack((z, -axis[:, 2], axis[:, 1]), 102 | dim=1), torch.stack((axis[:, 2], z, -axis[:, 0]), dim=1), 103 | torch.stack((-axis[:, 1], axis[:, 0], z), dim=1)), 104 | dim=1) 105 | dcm = I + s * K + (1 - c) * torch.bmm(K, K) 106 | return dcm 107 | 108 | 109 | # https://en.wikipedia.org/wiki/Rotation_formalisms_in_three_dimensions#Rotation_matrix_%E2%86%94_Euler_axis/angle 110 | # https://github.com/kashif/ceres-solver/blob/087462a90dd1c23ac443501f3314d0fcedaea5f7/include/ceres/rotation.h#L178 111 | # S. Sarabandi and F. Thomas. A Survey on the Computation of Quaternions from Rotation Matrices. J MECH ROBOT, 2019. 112 | # https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula 113 | def dcm2rv(dcm): 114 | """Converts direction cosine matrices to rotation vectors. 115 | 116 | Args: 117 | dcm: A tensor of shape [B, 3, 3] containing the direction cosine matrices. 118 | 119 | Returns: 120 | A tensor of shape [B, 3] containing the rotation vectors. 121 | """ 122 | X = torch.stack((dcm[:, 2, 1] - dcm[:, 1, 2], dcm[:, 0, 2] - dcm[:, 2, 0], 123 | dcm[:, 1, 0] - dcm[:, 0, 1]), 124 | dim=1) 125 | s = torch.norm(X, p=2, dim=1) / 2 126 | c = (dcm[:, 0, 0] + dcm[:, 1, 1] + dcm[:, 2, 2] - 1) / 2 127 | c = torch.clamp(c, -1, 1) 128 | angle = torch.atan2(s, c) 129 | Y = torch.stack((dcm[:, 0, 0], dcm[:, 1, 1], dcm[:, 2, 2]), dim=1) 130 | Y = torch.sqrt((Y - c.unsqueeze(1)) / (1 - c.unsqueeze(1))) 131 | rv = torch.zeros((dcm.size(0), 3), device=dcm.device) 132 | i1 = s > 1e-3 133 | i2 = (s <= 1e-3) & (c > 0) 134 | i3 = (s <= 1e-3) & (c < 0) 135 | rv[i1] = angle[i1].unsqueeze(1) * X[i1] / (2 * s[i1].unsqueeze(1)) 136 | rv[i2] = X[i2] / 2 137 | rv[i3] = angle[i3].unsqueeze(1) * torch.sign(X[i3]) * Y[i3] 138 | return rv 139 | -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy.spatial.transform import Rotation as R 4 | 5 | 6 | def cam2pixel(cam_coord, f, c): 7 | x = cam_coord[:,0] / (cam_coord[:,2] + 1e-5) * f[0] + c[0] 8 | y = cam_coord[:,1] / (cam_coord[:,2] + 1e-5) * f[1] + c[1] 9 | z = cam_coord[:,2] + 1e-5 10 | return np.stack((x,y,z),1) 11 | 12 | 13 | def world2cam(world_coord, R, t): 14 | cam_coord = np.dot(R, world_coord.transpose(1,0)).transpose(1,0) + t.reshape(1,3) 15 | return cam_coord 16 | 17 | 18 | def transform_joint_to_other_db(src_joint, src_name, dst_name): 19 | src_joint_num = len(src_name) 20 | dst_joint_num = len(dst_name) 21 | 22 | new_joint = np.zeros(((dst_joint_num,) + src_joint.shape[1:]), dtype=np.float32) 23 | 24 | for src_idx in range(len(src_name)): 25 | name = src_name[src_idx] 26 | if name in dst_name: 27 | dst_idx = dst_name.index(name) 28 | new_joint[dst_idx] = src_joint[src_idx] 29 | 30 | return new_joint 31 | 32 | 33 | def apply_homogeneous_transformation(vertices, transform_matrix): 34 | # Convert vertices to homogeneous coordinates (add a column of ones) 35 | num_verts = vertices.shape[0] 36 | verts_homogeneous = torch.cat([vertices, torch.ones((num_verts, 1), dtype=vertices.dtype, device=vertices.device)], dim=1) # Shape (num_verts, 4) 37 | 38 | # Apply the homogeneous transformation 39 | transformed_homogeneous = torch.matmul(transform_matrix, verts_homogeneous.T).T # Shape (num_verts, 4) 40 | 41 | # Convert back to Cartesian coordinates (divide by the homogeneous component) 42 | transformed_vertices = transformed_homogeneous[:, :3] / transformed_homogeneous[:, 3][:, None] # Shape (num_verts, 3) 43 | 44 | return transformed_vertices 45 | 46 | 47 | def apply_homogeneous_transformation_np(vertices, transform_matrix): 48 | # Convert vertices to homogeneous coordinates (add a column of ones) 49 | num_verts = vertices.shape[0] 50 | verts_homogeneous = np.concatenate([vertices, np.ones((num_verts, 1), dtype=vertices.dtype)], axis=1) # Shape (num_verts, 4) 51 | 52 | # Apply the homogeneous transformation 53 | transformed_homogeneous = np.dot(transform_matrix, verts_homogeneous.T).T # Shape (num_verts, 4) 54 | 55 | # Convert back to Cartesian coordinates (divide by the homogeneous component) 56 | transformed_vertices = transformed_homogeneous[:, :3] / transformed_homogeneous[:, 3][:, None] # Shape (num_verts, 3) 57 | 58 | return transformed_vertices 59 | 60 | 61 | # Revert MANO global rotation and translation 62 | def inv_mano_global_orient(mano_verts, mano_root, mano_global_orient, mano_trans): 63 | """ 64 | Reverts the global orientation and translation applied to MANO vertices 65 | (i.e., transforms them from the global coordinate space back to a local space). 66 | 67 | Args: 68 | mano_verts (Tensor): shape (num_verts, 3), the MANO vertices. 69 | mano_joints (Tensor): shape (num_joints, 3), the MANO joint positions. 70 | mano_global_orient (Tensor): shape (3,), global orientation in axis-angle format. 71 | mano_trans (Tensor): shape (3,), global translation. 72 | 73 | Returns: 74 | vertices_transformed (Tensor): shape (num_verts, 3), the locally transformed vertices. 75 | transform_matrix (Tensor): shape (4, 4), the homogeneous transformation matrix 76 | that undoes the global transform. 77 | transform_matrix_inv (Tensor): shape (4, 4), the inverse of transform_matrix 78 | (i.e., the forward transform). 79 | """ 80 | device = mano_verts.device 81 | 82 | # 1) Convert global orientation (axis-angle) -> rotation matrix 83 | R = axis_angle_to_rotation_matrix(mano_global_orient) # shape (3, 3) 84 | 85 | # 2) Invert rotation matrix 86 | # (for an orthonormal rotation, inverse is transpose) 87 | R_inv = invert_rotation_matrix(R) 88 | 89 | # 3) Identify the 'root' for the transform 90 | # Typically 'Wrist' in MANO 91 | wrist_position = mano_root 92 | adjust_root = wrist_position 93 | 94 | # 4) Build the matrix that undoes global transform (global -> local) 95 | transform_matrix = torch.eye(4, device=device) 96 | transform_matrix[:3, :3] = R_inv 97 | transform_matrix[:3, 3] = ( 98 | -torch.matmul(R_inv, adjust_root) 99 | - mano_trans 100 | + wrist_position 101 | ) 102 | 103 | # 5) Apply transform_matrix to vertices 104 | verts_hom = torch.cat( 105 | [mano_verts, torch.ones((mano_verts.shape[0], 1), device=device)], 106 | dim=1 107 | ) 108 | vertices_transformed = (transform_matrix @ verts_hom.T).T[:, :3] 109 | 110 | # 6) Manually invert transform_matrix without torch.linalg.inv 111 | # 112 | # If T = [[A, b], 113 | # [0, 1]], 114 | # then T^-1 = [[A^-1, -A^-1 b], 115 | # [0, 1 ]]. 116 | # 117 | # Here, A = R_inv, so A^-1 = R (the original rotation), 118 | # b = transform_matrix[:3, 3]. 119 | # 120 | # So T^-1[:3, :3] = R 121 | # T^-1[:3, 3] = -R @ b 122 | # 123 | transform_matrix_inv = torch.eye(4, device=device) 124 | transform_matrix_inv[:3, :3] = R # because R is (R_inv)^-1 125 | transform_matrix_inv[:3, 3] = -R @ transform_matrix[:3, 3] 126 | 127 | return vertices_transformed, transform_matrix, transform_matrix_inv -------------------------------------------------------------------------------- /data/H2O/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torchvision.transforms import Normalize 8 | 9 | from lib.core.config import cfg 10 | from lib.utils.human_models import mano 11 | from lib.utils.func_utils import load_img 12 | from lib.utils.preprocessing import augmentation_contact 13 | from lib.utils.train_utils import get_contact_difficulty_sample_id 14 | 15 | 16 | def get_sample_id(split, index): 17 | aid = split[index] 18 | subject_name = aid.split('/')[0] + '_ego' # we only use ego split for H2O dataset 19 | seq_name = aid.split('/')[1] 20 | obj_id = aid.split('/')[2] 21 | cam_name = aid.split('/')[3] 22 | img_name = aid.split('/')[5] 23 | img_id = img_name.split('.png')[0] 24 | 25 | sample_id = f'{subject_name}-{seq_name}-{obj_id}-{cam_name}-{img_id}' 26 | return sample_id 27 | 28 | 29 | 30 | # Main dataloader code for H2O dataset 31 | class H2O(Dataset): 32 | def __init__(self, transform, data_split): 33 | super(H2O, self).__init__() 34 | self.__dict__.update(locals()) 35 | 36 | self.transform = transform 37 | dataset_name = 'h2o' 38 | 39 | self.data_split = data_split 40 | self.root_path = root_path = os.path.join('data', 'H2O') 41 | self.data_dir = os.path.join(self.root_path, 'data') 42 | 43 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 44 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 45 | 46 | # Organize db id based on split 47 | db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 48 | with open(db_split_path, 'r') as f: 49 | self.split = json.load(f) 50 | 51 | # Sort contact by difficulty (Balanced contact sampling) 52 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 53 | sample_id_to_split_id = {} 54 | for split_idx in range(len(self.split)): 55 | each_sample_id = get_sample_id(self.split, split_idx) 56 | if each_sample_id in sample_id_to_split_id: 57 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 58 | else: 59 | sample_id_to_split_id[each_sample_id] = self.split[split_idx] 60 | 61 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 62 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path) 63 | 64 | new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list if key in [*sample_id_to_split_id]] 65 | self.split = new_split 66 | 67 | 68 | def __len__(self): 69 | return len(self.split) 70 | 71 | 72 | def __getitem__(self, index): 73 | aid = self.split[index] 74 | subject_name = aid.split('/')[0] + '_ego' # we only use ego split for H2O dataset 75 | seq_name = aid.split('/')[1] 76 | obj_id = aid.split('/')[2] 77 | cam_name = aid.split('/')[3] 78 | img_name = aid.split('/')[5] 79 | img_id = img_name.split('.png')[0] 80 | 81 | sample_id = f'{subject_name}-{seq_name}-{obj_id}-{cam_name}-{img_id}' 82 | 83 | orig_img_path = os.path.join(self.data_dir, subject_name, seq_name, obj_id, cam_name, 'rgb', f'{img_id}.png') 84 | 85 | orig_img = load_img(orig_img_path) 86 | img_shape = orig_img.shape[:2] 87 | img_h, img_w = img_shape 88 | 89 | mano_valid = np.ones((1), dtype=np.float32) 90 | 91 | 92 | ################################## LOAD ANNOTATION DATA ##################################### 93 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 94 | 95 | annot_data = np.load(annot_data_path, allow_pickle=True) 96 | bbox_hand_r = annot_data['bbox_hand_r'] 97 | 98 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 99 | contact_data = dict(contact_h=contact_h) 100 | ################################## LOAD ANNOTATION DATA ##################################### 101 | 102 | 103 | ############################### PROCESS CROP AND AUGMENTATION ################################ 104 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 105 | crop_img = img.copy() 106 | 107 | # Transform for 3D HMR 108 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 109 | img = self.transform(img.astype(np.float32)/255.0) 110 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 111 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 112 | img = img.transpose(2, 0, 1) / 255.0 113 | img = normalize_img(torch.from_numpy(img)).float() 114 | else: 115 | raise NotImplementedError 116 | ############################### PROCESS CROP AND AUGMENTATION ################################ 117 | 118 | 119 | input_data = dict(image=img) 120 | targets_data = dict(contact_data=contact_data) 121 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 122 | 123 | 124 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /data/HOI4D/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from torchvision.transforms import Normalize 9 | 10 | from lib.core.config import cfg 11 | from lib.utils.human_models import mano 12 | from lib.utils.func_utils import load_img 13 | from lib.utils.preprocessing import augmentation_contact 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id 15 | 16 | 17 | obj_cls_mapping = [ 18 | '', 'ToyCar', 'Mug', 'Laptop', 'StorageFurniture', 'Bottle', 19 | 'Safe', 'Bowl', 'Bucket', 'Scissors', '', 'Pliers', 'Kettle', 20 | 'Knife', 'TrashCan', '', '', 'Lamp', 'Stapler', '', 'Chair' 21 | ] 22 | 23 | 24 | 25 | # Main dataloader code for HOI4D dataset 26 | class HOI4D(Dataset): 27 | def __init__(self, transform, data_split): 28 | super(HOI4D, self).__init__() 29 | self.__dict__.update(locals()) 30 | 31 | self.transform = transform 32 | dataset_name = 'hoi4d' 33 | 34 | self.data_split = data_split 35 | self.root_path = root_path = os.path.join('data', 'HOI4D') 36 | self.data_dir = os.path.join(self.root_path, 'data') 37 | 38 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 39 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 40 | 41 | # Organize db id based on split 42 | db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 43 | with open(db_split_path, 'r') as f: 44 | db_split = json.load(f) 45 | 46 | self.split = db_split 47 | 48 | # Sort contact by difficulty (Balanced contact sampling) 49 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 50 | sample_id_to_split_id = {} 51 | for split_idx in range(len(self.split)): 52 | each_sample_id = self.split[split_idx] 53 | if each_sample_id in sample_id_to_split_id: 54 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 55 | else: 56 | sample_id_to_split_id[each_sample_id] = self.split[split_idx] 57 | 58 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 59 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path) 60 | 61 | new_split = [key for key in sample_id_difficulty_list] 62 | self.split = new_split 63 | 64 | 65 | def __len__(self): 66 | return len(self.split) 67 | 68 | 69 | def __getitem__(self, index): 70 | image_id = self.split[index] 71 | seq_name, image_name = re.match(r'(.+?)-(\d+)$', image_id).groups() 72 | seq_name = seq_name.replace('-', '/') 73 | 74 | camera_name = seq_name.split('/')[0] 75 | person_name = seq_name.split('/')[1] 76 | obj_class_name = seq_name.split('/')[2] 77 | obj_instance_name = seq_name.split('/')[3] 78 | room_name = seq_name.split('/')[4] 79 | room_layout_name = seq_name.split('/')[5] 80 | task_name = seq_name.split('/')[6] 81 | 82 | obj_cat = obj_cls_mapping[int(obj_class_name[1:])] 83 | obj_id = int(obj_instance_name[1:]) 84 | 85 | orig_img_path = os.path.join(self.data_dir, 'HOI4D_color', 'HOI4D_release', seq_name, 'align_rgb', f'{image_name}.jpg') 86 | 87 | sample_id = str(image_id) 88 | 89 | orig_img = load_img(orig_img_path) 90 | img_shape = orig_img.shape[:2] 91 | img_h, img_w = img_shape 92 | 93 | mano_valid = np.ones((1), dtype=np.float32) 94 | 95 | 96 | ################################## LOAD ANNOTATION DATA ##################################### 97 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 98 | contact_data_path = os.path.join(self.contact_data_path, f'{sample_id}.npy') 99 | 100 | annot_data = np.load(annot_data_path, allow_pickle=True) 101 | bbox_hand = annot_data['bbox_ho'] 102 | 103 | contact_h = np.load(contact_data_path).astype(np.float32) 104 | contact_data = dict(contact_h=contact_h) 105 | ################################## LOAD ANNOTATION DATA ##################################### 106 | 107 | 108 | ############################### PROCESS CROP AND AUGMENTATION ################################ 109 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 110 | crop_img = img.copy() 111 | 112 | # Transform for 3D HMR 113 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 114 | img = self.transform(img.astype(np.float32)/255.0) 115 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 116 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 117 | img = img.transpose(2, 0, 1) / 255.0 118 | img = normalize_img(torch.from_numpy(img)).float() 119 | else: 120 | raise NotImplementedError 121 | ############################### PROCESS CROP AND AUGMENTATION ################################ 122 | 123 | 124 | input_data = dict(image=img) 125 | targets_data = dict(contact_data=contact_data) 126 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 127 | 128 | 129 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /data/ARCTIC/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from tqdm import tqdm 5 | from easydict import EasyDict 6 | 7 | import torch 8 | from torch.utils.data import Dataset 9 | from torchvision.transforms import Normalize 10 | 11 | from lib.core.config import cfg 12 | from lib.utils.human_models import mano 13 | from lib.utils.func_utils import load_img 14 | from lib.utils.preprocessing import augmentation_contact 15 | from lib.utils.train_utils import get_contact_difficulty_sample_id 16 | 17 | 18 | def get_sample_id(db, split, index): 19 | index = split[index] 20 | aid = db['imgnames'][index].split('./arctic_data/data/images/')[-1] 21 | subject_name = aid.split('/')[0] 22 | seq_name = aid.split('/')[1] 23 | obj_name, action_name = seq_name.split('_')[0], seq_name.split('_')[1] 24 | cam_name = aid.split('/')[2] 25 | img_name = aid.split('/')[3] 26 | img_id = img_name.split('.jpg')[0] 27 | 28 | sample_id = f'{subject_name}-{seq_name}-{cam_name}-{img_id}' 29 | return sample_id 30 | 31 | 32 | 33 | # Main dataloader code for ARCTIC dataset 34 | class ARCTIC(Dataset): 35 | def __init__(self, transform, data_split): 36 | super(ARCTIC, self).__init__() 37 | self.__dict__.update(locals()) 38 | 39 | self.transform = transform 40 | dataset_name = 'arctic' 41 | 42 | self.data_split = data_split 43 | self.root_path = root_path = os.path.join('data', 'ARCTIC') 44 | self.data_dir = os.path.join(self.root_path, 'data') 45 | 46 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 47 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 48 | 49 | # DB (we only use protocol 2) 50 | db_p2_path = os.path.join(self.data_dir, f'splits/p2_{self.data_split}.npy') 51 | db_p2 = np.load(db_p2_path, allow_pickle=True).item() # keys: ['data_dict', 'imgnames'] | egocentric 52 | 53 | self.db = db_p2 54 | 55 | # Organize db id based on split 56 | split_db_id_file_path = os.path.join(self.root_path, 'splits', f'{data_split}.json') 57 | with open(split_db_id_file_path, 'r') as f: 58 | self.split = json.load(f) 59 | self.split = [int(item) for item in self.split] 60 | 61 | # Sort contact by difficulty (Balanced contact sampling) 62 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 63 | sample_id_to_split_id = {} 64 | for split_idx in range(len(self.split)): 65 | each_sample_id = get_sample_id(self.db, self.split, split_idx) 66 | if each_sample_id in sample_id_to_split_id: 67 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 68 | else: 69 | sample_id_to_split_id[each_sample_id] = self.split[split_idx] 70 | 71 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 72 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path) 73 | 74 | new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list] 75 | self.split = new_split 76 | 77 | 78 | def __len__(self): 79 | return len(self.split) 80 | 81 | 82 | def __getitem__(self, index): 83 | db_id = self.split[index] 84 | aid = self.db['imgnames'][db_id].split('./arctic_data/data/images/')[-1] 85 | subject_name = aid.split('/')[0] 86 | seq_name = aid.split('/')[1] 87 | obj_name, action_name = seq_name.split('_')[0], seq_name.split('_')[1] 88 | cam_name = aid.split('/')[2] 89 | img_name = aid.split('/')[3] 90 | img_id = img_name.split('.jpg')[0] 91 | sample_id = f'{subject_name}-{seq_name}-{cam_name}-{img_id}' 92 | 93 | orig_img_path = os.path.join(self.data_dir, 'cropped_images', subject_name, seq_name, cam_name, f'{img_id}.jpg') 94 | 95 | orig_img = load_img(orig_img_path) 96 | img_shape = orig_img.shape[:2] 97 | img_h, img_w = img_shape 98 | 99 | mano_valid = np.ones((1), dtype=np.float32) 100 | 101 | 102 | ################################## LOAD ANNOTATION DATA ##################################### 103 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 104 | 105 | annot_data = np.load(annot_data_path, allow_pickle=True) 106 | bbox_hand_r = annot_data['bbox_hand_r'] 107 | 108 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 109 | contact_data = dict(contact_h=contact_h) 110 | ################################## LOAD ANNOTATION DATA ##################################### 111 | 112 | 113 | ############################### PROCESS CROP AND AUGMENTATION ############################### 114 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 115 | crop_img = img.copy() 116 | 117 | # Transform for 3D HMR 118 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 119 | img = self.transform(img.astype(np.float32)/255.0) 120 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 121 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 122 | img = img.transpose(2, 0, 1) / 255.0 123 | img = normalize_img(torch.from_numpy(img)).float() 124 | else: 125 | raise NotImplementedError 126 | ############################### PROCESS CROP AND AUGMENTATION ############################### 127 | 128 | 129 | input_data = dict(image=img) 130 | targets_data = dict(contact_data=contact_data) 131 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 132 | 133 | 134 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /lib/utils/smplx/LICENSE: -------------------------------------------------------------------------------- 1 | License 2 | 3 | Software Copyright License for non-commercial scientific research purposes 4 | Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use the SMPL-X/SMPLify-X model, data and software, (the "Model & Software"), including 3D meshes, blend weights, blend shapes, textures, software, scripts, and animations. By downloading and/or using the Model & Software (including downloading, cloning, installing, and any other use of this github repository), you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Model & Software. Any infringement of the terms of this agreement will automatically terminate your rights under this License 5 | 6 | Ownership / Licensees 7 | The Software and the associated materials has been developed at the 8 | 9 | Max Planck Institute for Intelligent Systems (hereinafter "MPI"). 10 | 11 | Any copyright or patent right is owned by and proprietary material of the 12 | 13 | Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter collectively “Max-Planck”) 14 | 15 | hereinafter the “Licensor”. 16 | 17 | License Grant 18 | Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right: 19 | 20 | To install the Model & Software on computers owned, leased or otherwise controlled by you and/or your organization; 21 | To use the Model & Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects; 22 | Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artifacts for commercial purposes. The Model & Software may not be reproduced, modified and/or made available in any form to any third party without Max-Planck’s prior written permission. 23 | 24 | The Model & Software may not be used for pornographic purposes or to generate pornographic material whether commercial or not. This license also prohibits the use of the Model & Software to train methods/algorithms/neural networks/etc. for commercial use of any kind. By downloading the Model & Software, you agree not to reverse engineer it. 25 | 26 | No Distribution 27 | The Model & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive purposes only. 28 | 29 | Disclaimer of Representations and Warranties 30 | You expressly acknowledge and agree that the Model & Software results from basic research, is provided “AS IS”, may contain errors, and that any use of the Model & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE MODEL & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of the Model & Software, (ii) that the use of the Model & Software will not infringe any patents, copyrights or other intellectual property rights of a third party, and (iii) that the use of the Model & Software will not cause any damage of any kind to you or a third party. 31 | 32 | Limitation of Liability 33 | Because this Model & Software License Agreement qualifies as a donation, according to Section 521 of the German Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only. If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee for the resulting damage. 34 | Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be liable in accordance with the German Product Liability Act in the event of product liability. The foregoing applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded. 35 | Patent claims generated through the usage of the Model & Software cannot be directed towards the copyright holders. 36 | The Model & Software is provided in the state of development the licensor defines. If modified or extended by Licensee, the Licensor makes no claims about the fitness of the Model & Software and is not responsible for any problems such modifications cause. 37 | 38 | No Maintenance Services 39 | You understand and agree that Licensor is under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Model & Software. Licensor nevertheless reserves the right to update, modify, or discontinue the Model & Software at any time. 40 | 41 | Defects of the Model & Software must be notified in writing to the Licensor with a comprehensible description of the error symptoms. The notification of the defect should enable the reproduction of the error. The Licensee is encouraged to communicate any use, results, modification or publication. 42 | 43 | Publications using the Model & Software 44 | You acknowledge that the Model & Software is a valuable scientific resource and agree to appropriately reference the following paper in any publication making use of the Model & Software. 45 | 46 | Citation: 47 | 48 | 49 | @inproceedings{SMPL-X:2019, 50 | title = {Expressive Body Capture: 3D Hands, Face, and Body from a Single Image}, 51 | author = {Pavlakos, Georgios and Choutas, Vasileios and Ghorbani, Nima and Bolkart, Timo and Osman, Ahmed A. A. and Tzionas, Dimitrios and Black, Michael J.}, 52 | booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)}, 53 | year = {2019} 54 | } 55 | Commercial licensing opportunities 56 | For commercial uses of the Software, please send email to ps-license@tue.mpg.de 57 | 58 | This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention. 59 | -------------------------------------------------------------------------------- /data/HO3D/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from pycocotools.coco import COCO 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torchvision.transforms import Normalize 8 | 9 | from lib.core.config import cfg 10 | from lib.utils.human_models import mano 11 | from lib.utils.func_utils import load_img 12 | from lib.utils.preprocessing import augmentation_contact 13 | from lib.utils.train_utils import get_contact_difficulty_sample_id 14 | 15 | 16 | def get_sample_id(db, split, index): 17 | image_id = split[index] 18 | ann_ids = db.getAnnIds(imgIds=[image_id]) 19 | ann = db.loadAnns(ann_ids)[0] 20 | img = db.loadImgs(image_id)[0] 21 | sample_id = str(image_id) 22 | return sample_id 23 | 24 | 25 | 26 | # Main dataloader code for HO3D dataset 27 | class HO3D(Dataset): 28 | def __init__(self, transform, data_split): 29 | super(HO3D, self).__init__() 30 | self.__dict__.update(locals()) 31 | 32 | self.transform = transform 33 | dataset_name = 'ho3d' 34 | 35 | if data_split == 'train': 36 | self.data_split_name = 'train' 37 | elif data_split == 'test': 38 | self.data_split_name = 'evaluation' 39 | else: 40 | raise NotImplementedError 41 | 42 | self.data_split = data_split 43 | self.root_path = root_path = os.path.join('data', 'HO3D') 44 | self.data_dir = os.path.join(self.root_path, 'data') 45 | self.annot_dir = os.path.join(self.root_path, 'annotations') 46 | 47 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 48 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 49 | 50 | self.joints_name = ('wrist', 'index1', 'index2', 'index3', 'middle1', 'middle2', 'middle3', 'pinky1', 'pinky2', 'pinky3', 'ring1', 'ring2', 'ring3', 'thumb1', 'thumb2', 'thumb3', 'thumb4', 'index4', 'middle4', 'ring4', 'pinky4') 51 | self.mano_joints_name = ('wrist', 'thumb1', 'thumb2', 'thumb3', 'thumb4', 'index1', 'index2', 'index3', 'index4', 'middle1', 'middle2', 'middle3', 'middle4', 'ring1', 'ring2', 'ring3', 'ring4', 'pinky1', 'pinky2', 'pinky3', 'pinky4') 52 | 53 | # Organize db id based on split 54 | self.db = COCO(os.path.join(self.annot_dir, f"HO3D_{self.data_split_name}_data.json")) 55 | self.split = self.db.getImgIds() 56 | 57 | # Sort contact by difficulty (Balanced contact sampling) 58 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 59 | sample_id_to_split_id = {} 60 | for split_idx in range(len(self.split)): 61 | each_sample_id = get_sample_id(self.db, self.split, split_idx) 62 | if each_sample_id in sample_id_to_split_id: 63 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 64 | else: 65 | sample_id_to_split_id[each_sample_id] = self.split[split_idx] 66 | 67 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 68 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path) 69 | 70 | new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list] 71 | self.split = new_split 72 | 73 | 74 | def __len__(self): 75 | return len(self.split) 76 | 77 | 78 | def __getitem__(self, index): 79 | image_id = self.split[index] 80 | ann_ids = self.db.getAnnIds(imgIds=[image_id]) 81 | ann = self.db.loadAnns(ann_ids)[0] 82 | img = self.db.loadImgs(image_id)[0] 83 | orig_img_path = os.path.join(self.data_dir, self.data_split_name, img['file_name']) 84 | meta_path = os.path.join(self.data_dir, self.data_split_name, img['file_name'].replace('/rgb/', '/meta/').replace('.png', '.pkl')) 85 | img_shape = (img['height'], img['width']) 86 | sample_id = str(image_id) 87 | 88 | orig_img = load_img(orig_img_path) 89 | img_shape = orig_img.shape[:2] 90 | img_h, img_w = img_shape 91 | 92 | mano_valid = np.ones((1), dtype=np.float32) 93 | 94 | 95 | ################################## LOAD ANNOTATION DATA ##################################### 96 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 97 | 98 | annot_data = np.load(annot_data_path, allow_pickle=True) 99 | bbox_ho = annot_data['bbox_ho'] 100 | cam_param = annot_data['cam_param'] 101 | 102 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 103 | contact_data = dict(contact_h=contact_h) 104 | ################################## LOAD ANNOTATION DATA ##################################### 105 | 106 | 107 | ############################### PROCESS CROP AND AUGMENTATION ################################ 108 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_ho, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 109 | crop_img = img.copy() 110 | 111 | # Transform for 3D HMR 112 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 113 | img = self.transform(img.astype(np.float32)/255.0) 114 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 115 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 116 | img = img.transpose(2, 0, 1) / 255.0 117 | img = normalize_img(torch.from_numpy(img)).float() 118 | else: 119 | raise NotImplementedError 120 | ############################### PROCESS CROP AND AUGMENTATION ################################ 121 | 122 | 123 | input_data = dict(image=img) 124 | targets_data = dict(contact_data=contact_data) 125 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 126 | 127 | 128 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import argparse 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | import mediapipe as mp 9 | from mediapipe.tasks.python import vision 10 | from mediapipe.tasks.python import BaseOptions 11 | 12 | from lib.core.config import cfg, update_config 13 | from lib.models.model import HACO 14 | from lib.utils.human_models import mano 15 | from lib.utils.contact_utils import get_contact_thres 16 | from lib.utils.vis_utils import ContactRenderer, draw_landmarks_on_image, draw_landmarks_on_image_simple 17 | from lib.utils.preprocessing import augmentation_contact 18 | from lib.utils.demo_utils import remove_small_contact_components, run_wilor_hand_detector 19 | 20 | 21 | parser = argparse.ArgumentParser(description='Demo HACO') 22 | parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model') 23 | parser.add_argument('--detector', type=str, default='wilor', choices=['wilor', 'mediapipe'], help='detector model') 24 | parser.add_argument('--checkpoint', type=str, default='', help='model path for demo') 25 | parser.add_argument('--input_path', type=str, default='asset/example_images', help='image path for demo') 26 | args = parser.parse_args() 27 | 28 | 29 | # Set device as CUDA 30 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 31 | 32 | 33 | # Initialize directories 34 | experiment_dir = 'experiments_demo_image' 35 | 36 | 37 | # Load config 38 | update_config(backbone_type=args.backbone, exp_dir=experiment_dir) 39 | 40 | 41 | # Initialize renderer 42 | contact_renderer = ContactRenderer() 43 | 44 | 45 | # Load demo images 46 | input_dir = args.input_path 47 | images = [f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] 48 | 49 | 50 | # Initialize MediaPipe HandLandmarker 51 | if args.detector == 'wilor': 52 | from ultralytics import YOLO 53 | detector_path = f'data/base_data/demo_data/wilor_detector.pt' 54 | detector = YOLO(detector_path) 55 | elif args.detector == 'mediapipe': 56 | base_options = BaseOptions(model_asset_path=cfg.MODEL.hand_landmarker_path) 57 | hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2) 58 | detector = vision.HandLandmarker.create_from_options(hand_options) 59 | else: 60 | raise NotImplementedError(f"Unsupported detector: {args.detector}") 61 | 62 | 63 | ############# Model ############# 64 | model = HACO().to(device) 65 | model.eval() 66 | ############# Model ############# 67 | 68 | 69 | # Load model checkpoint if provided 70 | if args.checkpoint: 71 | checkpoint = torch.load(args.checkpoint, map_location=device) 72 | model.load_state_dict(checkpoint['state_dict']) 73 | 74 | 75 | ############################### Demo Loop ############################### 76 | for i, frame_name in tqdm(enumerate(images), total=len(images)): 77 | print(f"Processing: {frame_name}") 78 | 79 | # Load and convert image 80 | frame_path = os.path.join(input_dir, frame_name) 81 | frame = cv2.imread(frame_path) 82 | orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 83 | frame_name_base = os.path.splitext(frame_name)[0] 84 | 85 | # Hand landmark detection 86 | if args.detector == 'wilor': 87 | right_hand_bbox = run_wilor_hand_detector(orig_img, detector) 88 | annotated_image, right_hand_bbox = draw_landmarks_on_image_simple(orig_img.copy(), right_hand_bbox) 89 | elif args.detector == 'mediapipe': 90 | mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=orig_img.copy()) 91 | detection_result = detector.detect(mp_image) 92 | annotated_image, right_hand_bbox = draw_landmarks_on_image(orig_img.copy(), detection_result) 93 | else: 94 | raise NotImplementedError(f"Unsupported detector: {args.detector}") 95 | 96 | 97 | if right_hand_bbox is None: 98 | print(f"Skipping {frame_name} - no hand detected.") 99 | continue 100 | 101 | print(f"Frame {i}: Right hand bbox: {right_hand_bbox}") 102 | 103 | # Image preprocessing 104 | crop_img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), right_hand_bbox, 'test', enforce_flip=False) 105 | 106 | # Convert to model input format 107 | if args.backbone in ['handoccnet'] or 'resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type: 108 | from torchvision import transforms 109 | img_tensor = transforms.ToTensor()(crop_img.astype(np.float32) / 255.0) 110 | elif args.backbone in ['hamer'] or 'vit' in cfg.MODEL.backbone_type: 111 | from torchvision.transforms import Normalize 112 | normalize = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 113 | img_tensor = crop_img.transpose(2, 0, 1) / 255.0 114 | img_tensor = normalize(torch.from_numpy(img_tensor)).float() 115 | else: 116 | raise NotImplementedError(f"Unsupported backbone: {args.backbone}") 117 | 118 | ############# Run model ############# 119 | with torch.no_grad(): 120 | outputs = model({'input': {'image': img_tensor[None].to(device)}}, mode="test") 121 | ############# Run model ############# 122 | 123 | # Save result 124 | os.makedirs('outputs', exist_ok=True) 125 | os.makedirs('outputs/detection', exist_ok=True) 126 | os.makedirs('outputs/crop_img', exist_ok=True) 127 | os.makedirs('outputs/contact', exist_ok=True) 128 | 129 | cv2.imwrite(f'outputs/detection/{frame_name_base}.png', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)) 130 | cv2.imwrite(f'outputs/crop_img/{frame_name_base}.png', crop_img[..., ::-1]) 131 | 132 | eval_thres = get_contact_thres(args.backbone) 133 | contact_mask = (outputs['contact_out'].sigmoid()[0] > eval_thres).detach().cpu().numpy() 134 | contact_mask = remove_small_contact_components(contact_mask, faces=mano.watertight_face['right'], min_size=20) 135 | contact_rendered = contact_renderer.render_contact(crop_img[..., ::-1], contact_mask) 136 | cv2.imwrite(f'outputs/contact/{frame_name_base}.png', contact_rendered) 137 | ############################### Demo Loop ############################### -------------------------------------------------------------------------------- /data/Decaf/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torchvision.transforms import Normalize 8 | 9 | from lib.core.config import cfg 10 | from lib.utils.human_models import mano 11 | from lib.utils.func_utils import load_img 12 | from lib.utils.preprocessing import augmentation_contact 13 | from lib.utils.train_utils import get_contact_difficulty_sample_id 14 | 15 | 16 | def get_sample_id(split, index): 17 | aid = split[index] 18 | seq_name = aid.split('_')[0] 19 | cam_name = aid.split('_')[1] 20 | img_name = aid.split('_')[2] 21 | sample_id = aid 22 | return sample_id 23 | 24 | 25 | 26 | # Main dataloader code for Decaf dataset 27 | class Decaf(Dataset): 28 | def __init__(self, transform, data_split): 29 | super(Decaf, self).__init__() 30 | self.__dict__.update(locals()) 31 | 32 | self.transfrom = transform 33 | dataset_name = 'decaf' 34 | 35 | self.data_split = data_split 36 | self.root_path = root_path = 'data/Decaf' 37 | self.data_dir = os.path.join(self.root_path, 'data') 38 | 39 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 40 | 41 | self.mano_joints_name = ('wrist', 'thumb1', 'thumb2', 'thumb3', 'thumb4', 'index1', 'index2', 'index3', 'index4', 'middle1', 'middle2', 'middle3', 'middle4', 'ring1', 'ring2', 'ring3', 'ring4', 'pinky1', 'pinky2', 'pinky3', 'pinky4') 42 | 43 | seq_list = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8'] 44 | cam_list = ['084', '100', '102', '108', '110', '111', '121', '122'] 45 | 46 | # Make db 47 | contact_rh_data = {seq: np.load(os.path.join(self.data_dir, self.data_split, 'contacts', seq, 'contacts_rh.npy')) for seq in seq_list} 48 | 49 | bb_rh_data = { 50 | f"{seq}_{cam}": np.load(os.path.join(self.data_dir, self.data_split, 'right_hand_bbs', seq, f'{cam}.npy')) 51 | for seq in seq_list 52 | for cam in cam_list 53 | } 54 | 55 | # Organize db id based on split 56 | db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 57 | with open(db_split_path, 'r') as f: 58 | db_keys = json.load(f) 59 | 60 | self.split = list(db_keys) 61 | 62 | # Sort contact by difficulty (Balanced contact sampling) 63 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 64 | sample_id_to_split_id = {} 65 | for split_idx in range(len(self.split)): 66 | each_sample_id = get_sample_id(self.split, split_idx) 67 | if each_sample_id in sample_id_to_split_id: 68 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 69 | else: 70 | sample_id_to_split_id[each_sample_id] = self.split[split_idx] 71 | 72 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 73 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path) 74 | 75 | new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list if key in [*sample_id_to_split_id]] 76 | self.split = new_split 77 | 78 | 79 | def __len__(self): 80 | return len(self.split) 81 | 82 | 83 | def __getitem__(self, index): 84 | aid = self.split[index] 85 | seq_name = aid.split('_')[0] 86 | cam_name = aid.split('_')[1] 87 | img_name = aid.split('_')[2] 88 | sample_id = aid 89 | 90 | orig_img_path = os.path.join(self.data_dir, self.data_split, 'images', seq_name, cam_name, f'{img_name}.jpg') 91 | 92 | orig_img = load_img(orig_img_path) 93 | img_shape = orig_img.shape[:2] 94 | img_h, img_w = img_shape 95 | 96 | mano_valid = np.ones((1), dtype=np.float32) 97 | 98 | 99 | ################################## LOAD ANNOTATION DATA ##################################### 100 | db_contact_path = os.path.join(self.root_path, 'preprocessed_data', self.data_split, 'db_contact', f'{sample_id}.npy') 101 | db_bb_path = os.path.join(self.root_path, 'preprocessed_data', self.data_split, 'db_bb', f'{sample_id}.npy') 102 | db_contact = np.load(db_contact_path) 103 | db_bb = np.load(db_bb_path) 104 | 105 | contact_rh = db_contact.astype(np.float32) 106 | bbox_rh = db_bb.tolist() # GT bbox is in [x_min, y_min, x_max, y_max] 107 | bbox_rh = np.array([bbox_rh[0], bbox_rh[1], bbox_rh[2]-bbox_rh[0], bbox_rh[3]-bbox_rh[1]]) # Change from [x_min, y_min, x_max, y_max] to [x_min, y_min, width, height] 108 | contact_data = dict(contact_h=contact_rh) 109 | ################################## LOAD ANNOTATION DATA ##################################### 110 | 111 | 112 | ############################### PROCESS CROP AND AUGMENTATION ################################ 113 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_rh, self.data_split, enforce_flip=False) 114 | crop_img = img.copy() 115 | 116 | # Transform for 3D HMR 117 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 118 | img = self.transform(img.astype(np.float32)/255.0) 119 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 120 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 121 | img = img.transpose(2, 0, 1) / 255.0 122 | img = normalize_img(torch.from_numpy(img)).float() 123 | else: 124 | raise NotImplementedError 125 | ############################### PROCESS CROP AND AUGMENTATION ################################ 126 | 127 | 128 | input_data = dict(image=img) 129 | targets_data = dict(contact_data=contact_data) 130 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 131 | 132 | 133 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /data/RICH/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pickle 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from torchvision.transforms import Normalize 9 | 10 | from lib.core.config import cfg 11 | from lib.utils.human_models import mano 12 | from lib.utils.func_utils import load_img 13 | from lib.utils.preprocessing import augmentation_contact 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id 15 | 16 | 17 | def get_sample_id(db, index): 18 | aid = db[index] 19 | seq_name = aid.split('/')[-3] 20 | seq_loc_name = seq_name.split('_')[0] 21 | annot_name = seq_name.split('_')[1] 22 | cam_name = aid.split('/')[-2] 23 | cam_id = int(cam_name.split('cam_')[-1]) 24 | img_name = aid.split('/')[-1].split('.jpeg')[0] # we used jpg version 25 | img_annot_name = img_name.split('_')[0] 26 | sample_id = f'{seq_name}-{cam_name}-{img_name}' 27 | return sample_id 28 | 29 | 30 | 31 | # Main dataloader code for RICH dataset 32 | class RICH(Dataset): 33 | def __init__(self, transform, data_split): 34 | super(RICH, self).__init__() 35 | self.__dict__.update(locals()) 36 | 37 | self.transform = transform 38 | dataset_name = 'rich' 39 | 40 | self.data_split = data_split 41 | self.root_path = root_path = os.path.join('data', 'RICH') 42 | self.data_dir = os.path.join(self.root_path, 'data') 43 | 44 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 45 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 46 | 47 | # SMPL, SMPLX, MANO conversion mappings 48 | smpl_to_smplx_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smpl_to_smplx.pkl') 49 | smplx_mano_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smplx_to_mano.pkl') 50 | 51 | with open(smpl_to_smplx_mapping_path, 'rb') as f: 52 | self.smpl_to_smplx_mapping = pickle.load(f) 53 | 54 | with open(smplx_mano_mapping_path, 'rb') as f: 55 | self.smplx_to_mano_mapping = pickle.load(f) 56 | self.smplx_to_mano_mapping_r = self.smplx_to_mano_mapping["right_hand"] 57 | 58 | # Organize db id based on split 59 | db_split_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 60 | with open(db_split_path, 'r') as f: 61 | self.db = json.load(f) 62 | 63 | # Sort contact by difficulty (Balanced contact sampling) 64 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 65 | sample_id_to_db_id = {} 66 | for db_idx in range(len(self.db)): 67 | each_sample_id = get_sample_id(self.db, db_idx) 68 | if each_sample_id in sample_id_to_db_id: 69 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 70 | else: 71 | sample_id_to_db_id[each_sample_id] = self.db[db_idx] 72 | 73 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 74 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_db_id, self.contact_data_path, contact_means_path) 75 | 76 | new_db = [sample_id_to_db_id[key] for key in sample_id_difficulty_list] 77 | self.db = new_db 78 | 79 | 80 | def __len__(self): 81 | return len(self.db) 82 | 83 | 84 | def __getitem__(self, index): 85 | aid = self.db[index] 86 | seq_name = aid.split('/')[-3] 87 | seq_loc_name = seq_name.split('_')[0] 88 | annot_name = seq_name.split('_')[1] 89 | cam_name = aid.split('/')[-2] 90 | cam_id = int(cam_name.split('cam_')[-1]) 91 | img_name = aid.split('/')[-1].split('.jpeg')[0] # we used jpg version 92 | img_annot_name = img_name.split('_')[0] 93 | sample_id = f'{seq_name}-{cam_name}-{img_name}' 94 | 95 | orig_img_path = os.path.join(self.data_dir, 'images_jpg_subset', self.data_split, seq_name, cam_name, f'{img_name}.jpeg') 96 | 97 | orig_img = load_img(orig_img_path) 98 | img_shape = orig_img.shape[:2] 99 | img_h, img_w = img_shape 100 | 101 | mano_valid = np.ones((1), dtype=np.float32) 102 | 103 | 104 | ################################## LOAD ANNOTATION DATA ##################################### 105 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 106 | 107 | annot_data = np.load(annot_data_path, allow_pickle=True) 108 | bbox_hand_r = annot_data['bbox_ho'] 109 | 110 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 111 | contact_data = dict(contact_h=contact_h) 112 | ################################## LOAD ANNOTATION DATA ##################################### 113 | 114 | 115 | ############################### PROCESS CROP AND AUGMENTATION ############################### 116 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r, self.data_split, enforce_flip=False) 117 | crop_img = img.copy() 118 | 119 | # Transform for 3D HMR 120 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 121 | img = self.transform(img.astype(np.float32)/255.0) 122 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 123 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 124 | img = img.transpose(2, 0, 1) / 255.0 125 | img = normalize_img(torch.from_numpy(img)).float() 126 | else: 127 | raise NotImplementedError 128 | ############################### PROCESS CROP AND AUGMENTATION ############################### 129 | 130 | 131 | input_data = dict(image=img) 132 | targets_data = dict(contact_data=contact_data) 133 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 134 | 135 | 136 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /demo_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import argparse 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | import mediapipe as mp 9 | from mediapipe.tasks.python import vision 10 | from mediapipe.tasks.python import BaseOptions 11 | 12 | from lib.core.config import cfg, update_config 13 | from lib.models.model import HACO 14 | from lib.utils.human_models import mano 15 | from lib.utils.contact_utils import get_contact_thres 16 | from lib.utils.vis_utils import ContactRenderer, draw_landmarks_on_image 17 | from lib.utils.preprocessing import augmentation_contact 18 | from lib.utils.demo_utils import smooth_bbox, smooth_contact_mask, remove_small_contact_components, initialize_video_writer, extract_frames_with_hand, find_longest_continuous_segment, run_wilor_hand_detector 19 | 20 | 21 | parser = argparse.ArgumentParser(description='Demo HACO') 22 | parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model') 23 | parser.add_argument('--detector', type=str, default='wilor', choices=['wilor', 'mediapipe'], help='detector model') 24 | parser.add_argument('--checkpoint', type=str, default='', help='model path for demo') 25 | parser.add_argument('--input_path', type=str, default='asset/example_videos', help='video path for demo') 26 | args = parser.parse_args() 27 | 28 | 29 | # Set device as CUDA 30 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 31 | 32 | 33 | # Initialize directories 34 | experiment_dir = 'experiments_demo_video' 35 | 36 | 37 | # Load config 38 | update_config(backbone_type=args.backbone, exp_dir=experiment_dir) 39 | 40 | 41 | # Initialize renderer 42 | contact_renderer = ContactRenderer() 43 | 44 | 45 | # Load demo videos 46 | input_dir = args.input_path 47 | video_files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.mp4', '.avi', '.mov'))] 48 | 49 | 50 | # Initialize MediaPipe HandLandmarker 51 | if args.detector == 'wilor': 52 | from ultralytics import YOLO 53 | detector_path = f'data/base_data/demo_data/wilor_detector.pt' 54 | detector = YOLO(detector_path) 55 | elif args.detector == 'mediapipe': 56 | base_options = BaseOptions(model_asset_path=cfg.MODEL.hand_landmarker_path) 57 | hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2) 58 | detector = vision.HandLandmarker.create_from_options(hand_options) 59 | else: 60 | raise NotImplementedError 61 | 62 | 63 | ############# Model ############# 64 | model = HACO().to(device) 65 | model.eval() 66 | ############# Model ############# 67 | 68 | 69 | # Load model checkpoint if provided 70 | if args.checkpoint: 71 | checkpoint = torch.load(args.checkpoint, map_location=device) 72 | model.load_state_dict(checkpoint['state_dict']) 73 | 74 | 75 | ############################### Demo Loop ############################### 76 | for i, video_name in tqdm(enumerate(video_files), total=len(video_files)): 77 | print(f"Processing: {video_name}") 78 | 79 | # Organize input and output path 80 | video_path = os.path.join(input_dir, video_name) 81 | os.makedirs("outputs_video", exist_ok=True) 82 | output_path = os.path.join("outputs_video", f"{os.path.splitext(video_name)[0]}_out.mp4") 83 | 84 | # Load and convert video 85 | cap = cv2.VideoCapture(video_path) 86 | fps = cap.get(cv2.CAP_PROP_FPS) 87 | fps = 30 if fps == 0 or np.isnan(fps) else fps 88 | 89 | # Extract meaningful video segment 90 | frames_with_hand = extract_frames_with_hand(cap, detector, args.detector) 91 | longest_segment = find_longest_continuous_segment(frames_with_hand) 92 | 93 | if not longest_segment: 94 | print(f"No hand detected in any continuous segment for {video_name}") 95 | continue 96 | 97 | writer = None 98 | smoothed_bbox = None 99 | smoothed_contact = None 100 | 101 | for _, frame, bbox in longest_segment: 102 | # Image preprocessing 103 | smoothed_bbox = smooth_bbox(smoothed_bbox, bbox, alpha=0.8) 104 | orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 105 | crop_img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), smoothed_bbox, 'test', enforce_flip=False, bkg_color='white') 106 | 107 | # Convert to model input format 108 | if args.backbone in ['handoccnet'] or 'resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type: 109 | from torchvision import transforms 110 | img_tensor = transforms.ToTensor()(crop_img.astype(np.float32) / 255.0) 111 | elif args.backbone in ['hamer'] or 'vit' in cfg.MODEL.backbone_type: 112 | from torchvision.transforms import Normalize 113 | normalize = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 114 | img_tensor = crop_img.transpose(2, 0, 1) / 255.0 115 | img_tensor = normalize(torch.from_numpy(img_tensor)).float() 116 | else: 117 | raise NotImplementedError(f"Unsupported backbone: {args.backbone}") 118 | 119 | ############# Run model ############# 120 | with torch.no_grad(): 121 | outputs = model({'input': {'image': img_tensor[None].to(device)}}, mode="test") 122 | ############# Run model ############# 123 | 124 | # Save result 125 | eval_thres = get_contact_thres(args.backbone) 126 | raw_contact = (outputs['contact_out'].sigmoid()[0] > eval_thres).detach().cpu().numpy() 127 | smoothed_contact = smooth_contact_mask(smoothed_contact, raw_contact, alpha=0.8) 128 | contact_mask = smoothed_contact > 0.5 129 | contact_mask = remove_small_contact_components(contact_mask, faces=mano.watertight_face['right'], min_size=20) 130 | contact_rendered = contact_renderer.render_contact(crop_img, contact_mask, mode='demo') 131 | 132 | if writer is None: 133 | ch, cw = contact_rendered.shape[:2] 134 | writer = initialize_video_writer(output_path, fps, (cw, ch)) 135 | 136 | writer.write(cv2.cvtColor(contact_rendered, cv2.COLOR_RGB2BGR)) 137 | 138 | if writer: 139 | writer.release() 140 | ############################### Demo Loop ############################### -------------------------------------------------------------------------------- /data/DexYCB/toolkit/obj.py: -------------------------------------------------------------------------------- 1 | # DexYCB Toolkit 2 | # Copyright (C) 2021 NVIDIA Corporation 3 | # Licensed under the GNU General Public License v3.0 [see LICENSE for details] 4 | 5 | """Wavefront OBJ file loader. 6 | 7 | Functions and classes are largely derived from 8 | https://github.com/pyglet/pyglet/blob/f762169c9dd88c22c8d6d2399a129cc23654d99c/contrib/model/model/obj_batch.py 9 | """ 10 | 11 | import os 12 | import logging 13 | import numpy as np 14 | 15 | 16 | class Material: 17 | """Material.""" 18 | diffuse = [.8, .8, .8] 19 | ambient = [.2, .2, .2] 20 | specular = [0., 0., 0.] 21 | emission = [0., 0., 0.] 22 | shininess = 0. 23 | opacity = 1. 24 | texture_path = None 25 | 26 | def __init__(self, name): 27 | """Constructor. 28 | 29 | Args: 30 | name: Material name. 31 | """ 32 | self.name = name 33 | 34 | 35 | class MaterialGroup: 36 | """Material group.""" 37 | 38 | def __init__(self, material): 39 | """Constructor. 40 | 41 | Args: 42 | material: A Material object. 43 | """ 44 | self.material = material 45 | 46 | self.f_v = [] 47 | self.f_n = [] 48 | self.f_t = [] 49 | 50 | 51 | class Mesh: 52 | """Mesh.""" 53 | 54 | def __init__(self, name): 55 | """Constructor. 56 | 57 | Args: 58 | name: Mesh name. 59 | """ 60 | self.name = name 61 | self.groups = [] 62 | 63 | 64 | class OBJ: 65 | """3D data loaded from an OBJ file.""" 66 | 67 | def __init__(self, filename, file=None, path=None): 68 | """Constructor. 69 | 70 | Args: 71 | filename: Path to the OBJ file. 72 | file: An file object. 73 | path: Path to the directory storing the material files. 74 | """ 75 | self.materials = {} 76 | self.meshes = {} 77 | self.mesh_list = [] 78 | 79 | if file is None: 80 | file = open(filename, 'r') 81 | 82 | if path is None: 83 | path = os.path.dirname(filename) 84 | self.path = path 85 | 86 | mesh = None 87 | group = None 88 | material = None 89 | 90 | self.v = [] 91 | self.n = [] 92 | self.t = [] 93 | 94 | for line in file: 95 | if line.startswith('#'): 96 | continue 97 | values = line.split() 98 | if not values: 99 | continue 100 | 101 | if values[0] == 'v': 102 | self.v.append(list(map(float, values[1:4]))) 103 | elif values[0] == 'vn': 104 | self.n.append(list(map(float, values[1:4]))) 105 | elif values[0] == 'vt': 106 | self.t.append(list(map(float, values[1:3]))) 107 | elif values[0] == 'mtllib': 108 | self._load_material_library(values[1]) 109 | elif values[0] in ('usemtl', 'usemat'): 110 | material = self.materials.get(values[1], None) 111 | if material is None: 112 | logging.warn('Unknown material: %s' % values[1]) 113 | if mesh is not None: 114 | group = MaterialGroup(material) 115 | mesh.groups.append(group) 116 | elif values[0] == 'o': 117 | mesh = Mesh(values[1]) 118 | self.meshes[mesh.name] = mesh 119 | self.mesh_list.append(mesh) 120 | group = None 121 | elif values[0] == 'f': 122 | if mesh is None: 123 | mesh = Mesh('') 124 | self.mesh_list.append(mesh) 125 | if material is None: 126 | material = Material("") 127 | if group is None: 128 | group = MaterialGroup(material) 129 | mesh.groups.append(group) 130 | 131 | for i, v in enumerate(values[1:]): 132 | v_index, t_index, n_index = \ 133 | (list(map(int, [j or 0 for j in v.split('/')])) + [0, 0])[:3] 134 | if v_index < 0: 135 | v_index += len(vertices) 136 | if t_index < 0: 137 | t_index += len(tex_coords) 138 | if n_index < 0: 139 | n_index += len(normals) 140 | if i < 3: 141 | group.f_v.append(v_index - 1) 142 | group.f_n.append(n_index - 1) 143 | group.f_t.append(t_index - 1) 144 | else: 145 | # Triangulate. 146 | group.f_v += [group.f_v[-3 * (i - 2)], group.f_v[-1], v_index - 1] 147 | group.f_n += [group.f_n[-3 * (i - 2)], group.f_n[-1], n_index - 1] 148 | group.f_t += [group.f_t[-3 * (i - 2)], group.f_t[-1], t_index - 1] 149 | 150 | self.v = np.array(self.v, dtype=np.float32) 151 | self.n = np.array(self.n, dtype=np.float32) 152 | self.t = np.array(self.t, dtype=np.float32) 153 | 154 | for mesh in self.mesh_list: 155 | for group in mesh.groups: 156 | group.f_v = np.array(group.f_v, dtype=np.int64).reshape(-1, 3) 157 | group.f_n = np.array(group.f_n, dtype=np.int64).reshape(-1, 3) 158 | group.f_t = np.array(group.f_t, dtype=np.int64).reshape(-1, 3) 159 | 160 | def _open_material_file(self, filename): 161 | """Opens a material file. 162 | 163 | Args: 164 | filename: Path to the material file. 165 | 166 | Returns: 167 | A file object. 168 | """ 169 | return open(os.path.join(self.path, filename), 'r') 170 | 171 | def _load_material_library(self, filename): 172 | """Loads the material from a material file. 173 | 174 | Args: 175 | filename: Path to the material file. 176 | """ 177 | material = None 178 | file = self._open_material_file(filename) 179 | 180 | for line in file: 181 | if line.startswith('#'): 182 | continue 183 | values = line.split() 184 | if not values: 185 | continue 186 | 187 | if values[0] == 'newmtl': 188 | material = Material(values[1]) 189 | self.materials[material.name] = material 190 | elif material is None: 191 | logging.warn('Expected "newmtl" in %s' % filename) 192 | continue 193 | 194 | try: 195 | if values[0] == 'Kd': 196 | material.diffuse = list(map(float, values[1:])) 197 | elif values[0] == 'Ka': 198 | material.ambient = list(map(float, values[1:])) 199 | elif values[0] == 'Ks': 200 | material.specular = list(map(float, values[1:])) 201 | elif values[0] == 'Ke': 202 | material.emissive = list(map(float, values[1:])) 203 | elif values[0] == 'Ns': 204 | material.shininess = float(values[1]) 205 | elif values[0] == 'd': 206 | material.opacity = float(values[1]) 207 | elif values[0] == 'map_Kd': 208 | material.texture_path = os.path.abspath(self.path + '/' + values[1]) 209 | except BaseException as ex: 210 | logging.warning('Parse error in %s.' % (filename, ex)) 211 | -------------------------------------------------------------------------------- /data/ObMan/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from pycocotools.coco import COCO 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from torchvision.transforms import Normalize 9 | 10 | from lib.core.config import cfg 11 | from lib.utils.human_models import mano 12 | from lib.utils.func_utils import load_img 13 | from lib.utils.preprocessing import augmentation_contact 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id 15 | 16 | 17 | def get_sample_id(db, split, index): 18 | aid = split[index] 19 | ann = db.anns[aid] 20 | img_data = db.loadImgs(ann['image_id'])[0] 21 | sample_id = img_data['file_name'] 22 | return sample_id, img_data 23 | 24 | 25 | 26 | # Main dataloader code for ObMan dataset 27 | class ObMan(Dataset): 28 | def __init__(self, transform, data_split): 29 | super(ObMan, self).__init__() 30 | self.__dict__.update(locals()) 31 | 32 | self.transfrom = transform 33 | dataset_name = 'obman' 34 | 35 | if data_split == 'train': 36 | data_split_name = 'train_87k' 37 | elif data_split == 'test': 38 | data_split_name = 'test_6k' 39 | else: 40 | raise NotImplementedError 41 | 42 | self.data_split = data_split 43 | self.root_path = root_path = 'data/ObMan' 44 | 45 | self.data_dir = os.path.join(self.root_path, 'data') 46 | self.split_dir = os.path.join(self.root_path, 'splits') 47 | self.annot_dir = os.path.join(self.root_path, 'annotations') 48 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 49 | 50 | with open(os.path.join(self.split_dir , f'{data_split_name}.json'), 'r') as f: 51 | self.split = json.load(f) 52 | 53 | self.split = [int(idx) for idx in self.split] 54 | 55 | self.anno_file = os.path.join(self.annot_dir, f'{dataset_name}_{data_split}.json') 56 | self.img_source = os.path.join(self.data_dir, data_split, 'rgb') 57 | self.seg_source = os.path.join(self.data_dir, data_split, 'segm') 58 | self.input_img_shape = cfg.MODEL.input_img_shape # should be (256, 256) 59 | 60 | self.cam_intr = np.array([[480., 0., 128.], [0., 480., 128.], 61 | [0., 0., 1.]]).astype(np.float32) 62 | self.cam_extr = np.array([[1., 0., 0., 0.], [0., -1., 0., 0.], 63 | [0., 0., -1., 0.]]).astype(np.float32) 64 | 65 | self.joint_set = {'hand': \ 66 | {'joint_num': 21, # single hand 67 | 'joints_name': ('Wrist', 'Thumb_1', 'Thumb_2', 'Thumb_3', 'Thumb_4', 'Index_1', 'Index_2', 'Index_3', 'Index_4', 'Middle_1', 'Middle_2', 'Middle_3', 'Middle_4', 'Ring_1', 'Ring_2', 'Ring_3', 'Ring_4', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Pinky_4'), 68 | 'flip_pairs': () 69 | } 70 | } 71 | self.joint_set['hand']['root_joint_idx'] = self.joint_set['hand']['joints_name'].index('Wrist') 72 | 73 | # Organize db id based on split 74 | self.db = COCO(self.anno_file) 75 | self.start_point = 0 76 | self.end_point = len(self.split) 77 | self.length = self.end_point - self.start_point 78 | 79 | # Sort contact by difficulty (Balanced contact sampling) 80 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 81 | sample_id_to_split_id = {} 82 | for split_idx in range(len(self.split)): 83 | each_sample_id, _ = get_sample_id(self.db, self.split, split_idx) 84 | if each_sample_id in sample_id_to_split_id: 85 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 86 | else: 87 | sample_id_to_split_id[each_sample_id] = self.split[split_idx] 88 | 89 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 90 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_split_id, self.contact_data_path, contact_means_path) 91 | 92 | new_split = [sample_id_to_split_id[key] for key in sample_id_difficulty_list] 93 | self.split = new_split 94 | 95 | 96 | def __len__(self): 97 | return len(self.split) 98 | 99 | 100 | def __getitem__(self, index): 101 | sample_id, img_data = get_sample_id(self.db, self.split, index) 102 | 103 | # Base path 104 | img_path = os.path.join(self.img_source, img_data['file_name'] + '.jpg') 105 | seg_path = os.path.join(self.seg_source, img_data['file_name'] + '.png') 106 | 107 | # Full image 108 | orig_img = load_img(img_path) 109 | orig_img_shape = orig_img.shape[:2] 110 | 111 | mano_valid = np.ones((1), dtype=np.float32) 112 | 113 | 114 | ################################## LOAD ANNOTATION DATA ##################################### 115 | bbox_ho = np.array([0, 0, 256, 256]) 116 | 117 | contact_h = np.load(os.path.join(self.contact_data_path, f'{sample_id}.npy')).astype(np.float32) 118 | contact_data = dict(contact_h=contact_h) 119 | contact_h = contact_data['contact_h'] 120 | ################################## LOAD ANNOTATION DATA ##################################### 121 | 122 | 123 | ############################### PROCESS CROP AND AUGMENTATION ################################ 124 | # Crop image 125 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_ho, self.data_split, enforce_flip=False) 126 | crop_img = img.copy() 127 | 128 | # Transform for 3D HMR 129 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 130 | img = self.transform(img.astype(np.float32)/255.0) 131 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 132 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 133 | img = img.transpose(2, 0, 1) / 255.0 134 | img = normalize_img(torch.from_numpy(img)).float() 135 | else: 136 | raise NotImplementedError 137 | ############################### PROCESS CROP AND AUGMENTATION ################################ 138 | 139 | 140 | input_data = dict(image=img) 141 | targets_data = dict(contact_data=contact_data) 142 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 143 | 144 | 145 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) -------------------------------------------------------------------------------- /data/Hi4D/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pickle 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from torchvision.transforms import Normalize 9 | 10 | from lib.core.config import cfg 11 | from lib.utils.human_models import mano 12 | from lib.utils.func_utils import load_img 13 | from lib.utils.preprocessing import augmentation_contact 14 | from lib.utils.train_utils import get_contact_difficulty_sample_id 15 | 16 | 17 | def get_sample_id(db, db_pid, index): 18 | aid = db[index] 19 | pid = db_pid[index] 20 | pair_name = aid.split('/')[-5] 21 | action_name = aid.split('/')[-4] 22 | cam_name = aid.split('/')[-2] 23 | img_name = aid.split('/')[-1].split('.jpg')[0] 24 | sample_id = f'{pair_name}-{action_name}-{cam_name}-{img_name}-{pid}' 25 | return sample_id 26 | 27 | 28 | 29 | # Main dataloader code for Hi4D dataset 30 | class Hi4D(Dataset): 31 | def __init__(self, transform, data_split): 32 | super(Hi4D, self).__init__() 33 | self.__dict__.update(locals()) 34 | 35 | self.transform = transform 36 | dataset_name = 'hi4d' 37 | 38 | self.data_split = data_split 39 | self.root_path = root_path = os.path.join('data', 'Hi4D') 40 | self.data_dir = os.path.join(self.root_path, 'data') 41 | 42 | self.annot_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'annot_data') 43 | self.contact_data_path = os.path.join(root_path, 'preprocessed_data', data_split, 'contact_data') 44 | 45 | # SMPL-X to MANO mapping 46 | smpl_smplx_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smpl_to_smplx.pkl') 47 | smplx_mano_mapping_path = os.path.join('data', 'base_data', 'conversions', 'smplx_to_mano.pkl') 48 | 49 | with open(smpl_smplx_mapping_path, 'rb') as f: 50 | self.smpl_to_smplx_mapping = pickle.load(f) 51 | 52 | with open(smplx_mano_mapping_path, 'rb') as f: 53 | self.smplx_to_mano_mapping = pickle.load(f) 54 | self.smplx_to_mano_mapping_r = self.smplx_to_mano_mapping["right_hand"] 55 | 56 | # Organize db id based on split 57 | split_db_id_file_path = os.path.join(self.root_path, 'splits', f'{self.data_split}.json') 58 | with open(split_db_id_file_path, 'r') as f: 59 | self.db = json.load(f) 60 | split_db_pid_file_path = os.path.join(self.root_path, 'splits', f'{self.data_split}_pid.json') 61 | with open(split_db_pid_file_path, 'r') as f: 62 | self.db_pid = json.load(f) 63 | 64 | # Sort contact by difficulty (Balanced contact sampling) 65 | if self.data_split == 'train' and cfg.MODEL.balanced_sampling: 66 | sample_id_to_db_id = {} 67 | for db_idx in range(len(self.db)): 68 | each_sample_id = get_sample_id(self.db, self.db_pid, db_idx) 69 | if each_sample_id in sample_id_to_db_id: 70 | raise KeyError(f"Key '{key}' already exists in the dictionary.") 71 | else: 72 | sample_id_to_db_id[each_sample_id] = self.db[db_idx] 73 | 74 | contact_means_path = os.path.join(f'data/base_data/contact_data/{dataset_name}/contact_means_{dataset_name}.npy') 75 | sample_id_difficulty_list = get_contact_difficulty_sample_id(sample_id_to_db_id, self.contact_data_path, contact_means_path) 76 | 77 | new_db = [sample_id_to_db_id[key] for key in sample_id_difficulty_list] 78 | self.db = new_db 79 | 80 | 81 | def __len__(self): 82 | return len(self.db) 83 | 84 | 85 | def __getitem__(self, index): 86 | aid = self.db[index] 87 | pid = self.db_pid[index] 88 | pair_name = aid.split('/')[-5] 89 | action_name = aid.split('/')[-4] 90 | cam_name = aid.split('/')[-2] 91 | img_name = aid.split('/')[-1].split('.jpg')[0] 92 | sample_id = f'{pair_name}-{action_name}-{cam_name}-{img_name}-{pid}' 93 | 94 | orig_img_path = os.path.join(self.data_dir, pair_name, action_name, 'images', cam_name, f'{img_name}.jpg') 95 | 96 | orig_img = load_img(orig_img_path) 97 | img_shape = orig_img.shape[:2] 98 | img_h, img_w = img_shape 99 | 100 | mano_valid = np.ones((1), dtype=np.float32) 101 | 102 | 103 | ################################## LOAD ANNOTATION DATA ##################################### 104 | annot_data_path = os.path.join(self.annot_data_path, f'{sample_id}.npz') 105 | 106 | annot_data = np.load(annot_data_path, allow_pickle=True) 107 | mano_r_contact_0 = annot_data['mano_r_contact_0'] 108 | mano_r_contact_1 = annot_data['mano_r_contact_1'] 109 | bbox_hand_r_0 = annot_data['bbox_hand_r_0'] 110 | bbox_hand_r_1 = annot_data['bbox_hand_r_1'] 111 | 112 | if pid == 0: 113 | contact_h = mano_r_contact_0.astype(np.float32) 114 | else: 115 | contact_h = mano_r_contact_1.astype(np.float32) 116 | 117 | contact_data = dict(contact_h=contact_h) 118 | ################################## LOAD ANNOTATION DATA ##################################### 119 | 120 | 121 | ############################### PROCESS CROP AND AUGMENTATION ################################ 122 | if pid == 0: 123 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r_0, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 124 | else: 125 | img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), bbox_hand_r_1, self.data_split, enforce_flip=False) # TODO: CHNAGE THIS FOR TRAINING 126 | crop_img = img.copy() 127 | 128 | # Transform for 3D HMR 129 | if ('resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type or 'handoccnet' in cfg.MODEL.backbone_type): 130 | img = self.transform(img.astype(np.float32)/255.0) 131 | elif (cfg.MODEL.backbone_type in ['hamer']) or ('vit' in cfg.MODEL.backbone_type): 132 | normalize_img = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std) 133 | img = img.transpose(2, 0, 1) / 255.0 134 | img = normalize_img(torch.from_numpy(img)).float() 135 | else: 136 | raise NotImplementedError 137 | ############################### PROCESS CROP AND AUGMENTATION ################################ 138 | 139 | 140 | input_data = dict(image=img) 141 | targets_data = dict(contact_data=contact_data) 142 | meta_info = dict(sample_id=sample_id, mano_valid=mano_valid) 143 | 144 | 145 | return dict(input_data=input_data, targets_data=targets_data, meta_info=meta_info) --------------------------------------------------------------------------------