├── .gitignore ├── H36M-Toolbox ├── common │ ├── README.MD │ ├── quaternion.py │ ├── mocap_dataset.py │ ├── custom_dataset.py │ ├── utils.py │ ├── camera.py │ ├── skeleton.py │ ├── humaneva_dataset.py │ ├── model_stmo.py │ ├── model_stmo_pretrain.py │ ├── loss.py │ └── arguments.py ├── images │ └── README.MD ├── requirements.txt ├── videonames.mat ├── camera_data.pkl ├── data │ └── README.MD ├── config.ini.example ├── docker-compose.yml ├── video_to_images.py ├── metadata.py ├── README.md ├── extract_all.py ├── checksums.txt ├── download_all.py └── transform.py ├── ContextPose ├── mvn │ ├── models │ │ ├── networks │ │ │ ├── __init__.py │ │ │ ├── network.py │ │ │ ├── refineNet.py │ │ │ └── globalNet.py │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── models.py │ │ │ └── default.py │ │ ├── cpn │ │ │ ├── config.py │ │ │ ├── test_config.py │ │ │ ├── train.py │ │ │ └── test.py │ │ ├── conpose.py │ │ └── loss.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── logger.py │ │ └── cfg.py │ └── datasets │ │ ├── __init__.py │ │ ├── _init_path.py │ │ └── utils.py ├── data │ └── pretrained │ │ └── coco │ │ └── README.MD ├── requirements.txt ├── conda-requirements.txt └── experiments │ └── human36m │ └── human36m.yaml ├── ContextPose_mpi ├── dataset │ ├── README.txt │ ├── mpi_inf_3dhp │ │ ├── util │ │ │ ├── mpii_config_paths.m │ │ │ ├── mpii_get_camera_set.m │ │ │ ├── mpii_get_joint_set.m │ │ │ └── mpii_get_sequence_info.m │ │ ├── license.txt │ │ ├── .README.txt.swp │ │ ├── get_testset.sh │ │ ├── conf.ig │ │ ├── get_dataset.sh │ │ └── README.txt │ ├── pretrained │ │ └── README.txt │ ├── process_data.sh │ └── data_util │ │ └── video_to_images.py ├── checkpoint │ └── README.txt ├── 3dhp_test │ ├── annot-test.h5 │ ├── mpii_3dhp_evaluation_sequencewise.xlsx │ ├── test_util │ │ ├── mpii_3D_error.m │ │ ├── mpii_get_activity_name.m │ │ ├── camera_calibration │ │ │ ├── ts1-4cameras.calib │ │ │ └── ts5-6cameras.calib │ │ ├── mpii_get_pck_auc_joint_groups.m │ │ ├── mpii_perspective_correction_code.m │ │ ├── mpii_test_predictions_py.m │ │ ├── mpii_get_joints.m │ │ ├── mpii_evaluate_errors.m │ │ └── mpii_compute_3d_pck.m │ └── README.txt ├── common │ ├── utils_3dhp.py │ ├── camera.py │ ├── skeleton.py │ ├── opt.py │ ├── cfg.py │ ├── load_data_3dhp_mae.py │ └── generator_tds.py ├── model │ └── conpose.py └── README.md └── images ├── teaser.png └── framework.png /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .DS_Store -------------------------------------------------------------------------------- /H36M-Toolbox/common/README.MD: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/README.txt: -------------------------------------------------------------------------------- 1 | Put the datasets in this folder. -------------------------------------------------------------------------------- /ContextPose/mvn/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | -------------------------------------------------------------------------------- /ContextPose/mvn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | -------------------------------------------------------------------------------- /ContextPose_mpi/checkpoint/README.txt: -------------------------------------------------------------------------------- 1 | Place the model checkpoint in this folder. -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_config_paths.m: -------------------------------------------------------------------------------- 1 | mpii_data_path = '../'; 2 | -------------------------------------------------------------------------------- /H36M-Toolbox/images/README.MD: -------------------------------------------------------------------------------- 1 | RGB images from Human3.6M should be under this directory. 2 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/pretrained/README.txt: -------------------------------------------------------------------------------- 1 | Put the pre-trained weights of HRNet in this folder. -------------------------------------------------------------------------------- /images/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/images/teaser.png -------------------------------------------------------------------------------- /ContextPose/data/pretrained/coco/README.MD: -------------------------------------------------------------------------------- 1 | Download the pretrained weights for 2D pose detectors here. 2 | -------------------------------------------------------------------------------- /images/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/images/framework.png -------------------------------------------------------------------------------- /H36M-Toolbox/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.13.3 2 | tqdm==4.19.8 3 | h5py==2.7.1 4 | spacepy==0.1.6 5 | requests==2.20.0 6 | -------------------------------------------------------------------------------- /H36M-Toolbox/videonames.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/H36M-Toolbox/videonames.mat -------------------------------------------------------------------------------- /H36M-Toolbox/camera_data.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/H36M-Toolbox/camera_data.pkl -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/annot-test.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/3dhp_test/annot-test.h5 -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/license.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/dataset/mpi_inf_3dhp/license.txt -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/.README.txt.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/dataset/mpi_inf_3dhp/.README.txt.swp -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/mpii_3dhp_evaluation_sequencewise.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/3dhp_test/mpii_3dhp_evaluation_sequencewise.xlsx -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_3D_error.m: -------------------------------------------------------------------------------- 1 | function out_struct = mpii_3D_error(method_name, error_vector) 2 | out_struct = struct('method', method_name, 'error', error_vector); 3 | end -------------------------------------------------------------------------------- /H36M-Toolbox/data/README.MD: -------------------------------------------------------------------------------- 1 | Please refer to [VideoPose3D](https://github.com/facebookresearch/VideoPose3D/blob/main/DATASETS.md) to download `data_2d_h36m_cpn_ft_h36m_dbb.npz` to this directory. 2 | -------------------------------------------------------------------------------- /H36M-Toolbox/config.ini.example: -------------------------------------------------------------------------------- 1 | [General] 2 | 3 | # Get your PHPSESSID by logging into http://vision.imar.ro/human3.6m/ and inspecting the cookies 4 | # with your web browser. 5 | PHPSESSID=xxxxxxxxxxxxxxxxxxxxxxxxxx 6 | -------------------------------------------------------------------------------- /H36M-Toolbox/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.3' 2 | services: 3 | main: 4 | build: . 5 | volumes: 6 | - .:/app 7 | - /etc/localtime:/etc/localtime:ro 8 | environment: 9 | - PYTHONIOENCODING=utf_8 10 | init: true 11 | network_mode: host 12 | -------------------------------------------------------------------------------- /ContextPose/mvn/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from mvn.datasets.human36m import Human36MMultiViewDataset as multiview_human36m 4 | from mvn.datasets.human36m import Human36MSingleViewDataset as human36m 5 | from mvn.datasets.human36m import Human36MKeypointDataset as keypoint_human36m 6 | -------------------------------------------------------------------------------- /ContextPose/mvn/datasets/_init_path.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os.path as osp 6 | import sys 7 | 8 | 9 | def add_path(path): 10 | if path not in sys.path: 11 | sys.path.insert(0, path) 12 | 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | lib_path = osp.join(this_dir, '..', '..') 17 | add_path(lib_path) -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_get_activity_name.m: -------------------------------------------------------------------------------- 1 | function [activity_names] = mpii_get_activity_name(activity_id) 2 | 3 | activities{1} = 'Standing/Walking'; 4 | activities{2} = 'Exercising'; 5 | activities{3} = 'Sitting'; 6 | activities{4} = 'Reaching/Crouching'; 7 | activities{5} = 'On The Floor'; 8 | activities{6} = 'Sports'; 9 | activities{7} = 'Miscellaneous'; 10 | 11 | activity_names = activities(activity_id); 12 | end -------------------------------------------------------------------------------- /ContextPose/mvn/models/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .models import MODEL_EXTRAS 10 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/camera_calibration/ts1-4cameras.calib: -------------------------------------------------------------------------------- 1 | tc camera calibration v0.3 2 | camera 0 cam_8 3 | frame 0 4 | sensorSize 10 10 # in mm 5 | focalLength 7.32506 # in mm 6 | pixelAspect 1.00044 7 | centerOffset -0.0322884 0.0929296 # in mm (positive values move right and down) 8 | distortionModel OpenCV 9 | distortion 0.0 0.0 0.0 0.0 0.0 10 | origin 3427.28 1387.86 309.42 11 | up -0.208215 0.976233 0.06014 12 | right 0.000575281 0.0616098 -0.9981 13 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_get_pck_auc_joint_groups.m: -------------------------------------------------------------------------------- 1 | function [joint_groups] = mpii_get_pck_auc_joint_groups() 2 | 3 | joint_groups = { %'Head', [1,17]; 4 | 'Head', [1]; 5 | 'Neck', [2]; 6 | 'Shou', [3,6]; 7 | 'Elbow', [4,7]; 8 | 'Wrist', [5,8]; 9 | %'spine', [16]; 10 | 'Hip', [9,12]; 11 | 'Knee', [10,13]; 12 | 'Ankle', [11,14]; 13 | }; 14 | end -------------------------------------------------------------------------------- /ContextPose_mpi/common/utils_3dhp.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def mpii_get_sequence_info(subject_id, sequence): 4 | 5 | switcher = { 6 | "1 1": [6416,25], 7 | "1 2": [12430,50], 8 | "2 1": [6502,25], 9 | "2 2": [6081,25], 10 | "3 1": [12488,50], 11 | "3 2": [12283,50], 12 | "4 1": [6171,25], 13 | "4 2": [6675,25], 14 | "5 1": [12820,50], 15 | "5 2": [12312,50], 16 | "6 1": [6188,25], 17 | "6 2": [6145,25], 18 | "7 1": [6239,25], 19 | "7 2": [6320,25], 20 | "8 1": [6468,25], 21 | "8 2": [6054,25], 22 | 23 | } 24 | return switcher.get(subject_id+" "+sequence) 25 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/get_testset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to fetch and unzip the test set 3 | echo "Reading configuration from ./config....." >&2 4 | source ./conf.ig 5 | if [[ $ready_to_download -eq 0 ]]; then 6 | echo "Please read the documentation and edit the config file accordingly." >&2 7 | exit 1 8 | fi 9 | source_path="http://gvv.mpi-inf.mpg.de/3dhp-dataset" 10 | if [ ! -f "./mpi_inf_3dhp_test_set.zip" ]; then 11 | wget "$source_path/mpi_inf_3dhp_test_set.zip" 12 | fi 13 | if [ -f "./mpi_inf_3dhp_test_set.zip" ]; then 14 | if [ ! -d "$destination" ]; then 15 | mkdir "$destination" 16 | fi 17 | unzip "./mpi_inf_3dhp_test_set.zip" -d "$destination/mpi_inf_3dhp_test_set" 18 | rm "./mpi_inf_3dhp_test_set.zip" 19 | fi 20 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/process_data.sh: -------------------------------------------------------------------------------- 1 | cd dataset/mpi_inf_3dhp 2 | 3 | # Download raw videos and annotations 4 | bash get_dataset.sh 5 | bash get_testset.sh 6 | 7 | mv mpi_inf_3dhp_test_set/mpi_inf_3dhp_test_set ../ 8 | rm -r mpi_inf_3dhp_test_set 9 | 10 | # Prepare labels 11 | cd ../../ 12 | if [ ! -f "dataset/data_train_3dhp.npz" ]; then 13 | python dataset/data_util/data_to_npz_3dhp.py 14 | fi 15 | 16 | if [ ! -f "dataset/data_test_3dhp.npz" ]; then 17 | python dataset/data_util/data_to_npz_3dhp_test.py 18 | fi 19 | 20 | # Convert raw videos to images for the training set 21 | python dataset/data_util/video_to_images.py 22 | 23 | # Crop images to a smaller size (256x192) 24 | python dataset/data_util/convert_to_small.py 25 | python dataset/data_util/convert_to_small_test.py -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_get_camera_set.m: -------------------------------------------------------------------------------- 1 | function [camera_set] = mpii_get_camera_set(camera_set_name) 2 | 3 | switch camera_set_name 4 | 5 | case 'regular' 6 | camera_set = 0:13; %Cameras with regular lenses, not fisheye 7 | case 'relevant' 8 | camera_set = 0:10; %All cameras except the ceiling mounted ones 9 | case 'ceiling' 10 | camera_set = 11:13; %Top down views 11 | case 'vnect' 12 | camera_set = [0, 1, 2, 4, 5, 6, 7, 8]; %Chest high, knee high and 2 cameras angled down. Use for VNect @ SIGGRAPH 17 13 | case 'mm3d_chest' 14 | camera_set = [0, 2, 4, 7, 8]; %Subset of chest high, used in "Monocular 3D Human Pose Estimation in-the-wild Using Improved CNN supervision" 15 | otherwise 16 | camera_set = []; 17 | end 18 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/conf.ig: -------------------------------------------------------------------------------- 1 | # The data would be downloaded to this path 2 | # Make sure you have approx 25GB space in this 3 | # path to download the complete training set. 4 | # The test set needs another 7GB and can be 5 | # downloaded with get_testset.sh 6 | destination='./' 7 | # The subjects you want to download the train data for. 8 | # Start with a few if all you want to do is examine the data 9 | subjects=(1 2 3 4 5 6 7 8) 10 | # Set if you want to download the camera views 11 | # that were not used for VNect 12 | download_extra_wall_cameras=0 13 | download_extra_ceiling_cameras=0 14 | # Unset if you don't want to download the segmentation 15 | # masks for the sequences 16 | download_masks=0 17 | # Set if you agree with the license conditions and want 18 | # to proceed with downloading the dataset 19 | ready_to_download=1 20 | -------------------------------------------------------------------------------- /ContextPose/mvn/utils/misc.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import json 3 | 4 | 5 | def config_to_str(config): 6 | return yaml.dump(yaml.safe_load(json.dumps(config))) # fuck yeah 7 | 8 | 9 | class AverageMeter(object): 10 | """Computes and stores the average and current value""" 11 | def __init__(self): 12 | self.reset() 13 | 14 | def reset(self): 15 | self.val = 0 16 | self.avg = 0 17 | self.sum = 0 18 | self.count = 0 19 | 20 | def update(self, val, n=1): 21 | self.val = val 22 | self.sum += val * n 23 | self.count += n 24 | self.avg = self.sum / self.count 25 | 26 | 27 | def calc_gradient_norm(named_parameters): 28 | total_norm = 0.0 29 | for name, p in named_parameters: 30 | param_norm = p.grad.data.norm(2) 31 | total_norm += param_norm.item() ** 2 32 | 33 | total_norm = total_norm ** (1. / 2) 34 | 35 | return total_norm 36 | -------------------------------------------------------------------------------- /ContextPose/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.9.24 2 | charset-normalizer==3.1.0 3 | cycler==0.11.0 4 | easydict==1.10 5 | einops==0.6.1 6 | fonttools==4.39.4 7 | idna==3.4 8 | imageio==2.31.1 9 | kiwisolver==1.4.4 10 | lazy_loader==0.2 11 | matplotlib==3.5.2 12 | networkx==2.8.8 13 | numpy==1.24.4 14 | opencv-python==4.6.0.66 15 | packaging==23.1 16 | Pillow==10.0.0 17 | protobuf==3.20.3 18 | pyparsing==3.0.9 19 | python-dateutil==2.8.2 20 | PyWavelets==1.3.0 21 | PyYAML==6.0 22 | requests==2.31.0 23 | scikit-image==0.20.0 24 | scipy==1.9.1 25 | six==1.16.0 26 | tensorboardX==2.6 27 | tifffile==2023.7.10 28 | timm==0.6.7 29 | torch @ http://download.pytorch.org/whl/cu113/torch-1.11.0%2Bcu113-cp38-cp38-linux_x86_64.whl#sha256=b6a799bdb6ee3d914e5e62bddb4276d4a10248c1af4f2d217738e5f9ee27485b 30 | torchvision @ http://download.pytorch.org/whl/cu113/torchvision-0.12.0%2Bcu113-cp38-cp38-linux_x86_64.whl#sha256=37133e8c5b0ec2f01999e59116f6d0e36d9afb1c7f8f58bd0c3dc8996f835419 31 | tqdm==4.66.1 32 | typing_extensions==4.6.2 33 | urllib3==2.0.2 34 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_perspective_correction_code.m: -------------------------------------------------------------------------------- 1 | test_subject_id = [1,2,3,4,5,6]; 2 | focalL{1} = (2048/10)*7.320339203; % res/sensorsize*focalLengthMM 3 | focalL{2} = focalL{1}; 4 | focalL{3} = focalL{1}; 5 | focalL{4} = focalL{1}; 6 | focalL{5} = (1920/10)*8.770747185; % res/sensorsize*focalLengthMM 7 | focalL{6} = focalL{5}; 8 | 9 | for ts = 1:6 10 | 11 | %Fancy predictions here: predict_2d and predict_3d. predict_2d is in the uncropped(?) image space 12 | 13 | focalLengthInPX = focalL{ts}; 14 | 15 | resolutionXInPX = image_size{ts}(2); %I can't seem to remember which one is x or why. Try both until something works :) 16 | resolutionYInPX = image_size{ts}(1); 17 | principalPointX = resolutionXInPX/2; 18 | principalPointY = resolutionYInPX/2; 19 | center = predict_2d(15,:) - [principalPointX, principalPointY]; % (pelvis location) 20 | R = mpii_perspective_correction(center(1), 0, focalLengthInPX); 21 | predict_3d = R * predict_3d; 22 | end -------------------------------------------------------------------------------- /ContextPose/mvn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | 5 | class Logger(): 6 | def __init__(self, log_path, level="DEBUG"): 7 | self.logger = logging.getLogger() 8 | self.logger.setLevel(level) 9 | self.log_path = log_path 10 | self.add_handler() 11 | 12 | def console_handler(self,level="DEBUG"): 13 | console_handler = logging.StreamHandler() 14 | console_handler.setLevel(level) 15 | 16 | console_handler.setFormatter(self.get_formatter()[0]) 17 | 18 | return console_handler 19 | 20 | def file_handler(self, level="DEBUG"): 21 | file_handler = logging.FileHandler(os.path.join(self.log_path, "log.txt"),mode="a",encoding="utf-8") 22 | file_handler.setLevel(level) 23 | 24 | file_handler.setFormatter(self.get_formatter()[1]) 25 | 26 | return file_handler 27 | 28 | def get_formatter(self): 29 | console_fmt = logging.Formatter(fmt="%(asctime)s: %(message)s") 30 | file_fmt = logging.Formatter(fmt="%(asctime)s: %(message)s") 31 | 32 | return console_fmt,file_fmt 33 | 34 | def add_handler(self): 35 | self.logger.addHandler(self.console_handler()) 36 | self.logger.addHandler(self.file_handler()) 37 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/quaternion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import torch 9 | 10 | def qrot(q, v): 11 | """ 12 | Rotate vector(s) v about the rotation described by quaternion(s) q. 13 | Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v, 14 | where * denotes any number of dimensions. 15 | Returns a tensor of shape (*, 3). 16 | """ 17 | assert q.shape[-1] == 4 18 | assert v.shape[-1] == 3 19 | assert q.shape[:-1] == v.shape[:-1] 20 | 21 | qvec = q[..., 1:] 22 | uv = torch.cross(qvec, v, dim=len(q.shape)-1) 23 | uuv = torch.cross(qvec, uv, dim=len(q.shape)-1) 24 | return (v + 2 * (q[..., :1] * uv + uuv)) 25 | 26 | 27 | def qinverse(q, inplace=False): 28 | # We assume the quaternion to be normalized 29 | if inplace: 30 | q[..., 1:] *= -1 31 | return q 32 | else: 33 | w = q[..., :1] 34 | xyz = q[..., 1:] 35 | return torch.cat((w, -xyz), dim=len(q.shape)-1) -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/README.txt: -------------------------------------------------------------------------------- 1 | Details: 2 | The test set has 6 sequences and a dedicated folder for each sequence. 3 | Each folder contains a .mat file with the following information 4 | 5 | valid_frame: Indicates whether the frame is valid or not. Invalid frames 6 | won't be used for evaluation. Refer to mpii_test_predictions.m for more. 7 | 8 | activity_annnotation: Activity annotations per frame, used for 9 | generating activitywise error reports 10 | 11 | univ_annot3: A 3x17x1xn matrix containing the 3D annotations in mm for 12 | 17 joints. The annotations are scaled to the height of the universal 13 | skeleton used by Human3.6m. The order and the names of the joints can be 14 | observed using [~,~,~,joint_names] = mpii_get_joints('relevant'); 15 | 16 | The file mpii_test_predictions.m should be a helpful starting point. 17 | Additionally, information about the crops (the original size in the frame) 18 | is available in the mat/zip file attached with the email. 19 | 20 | If you want to evaluate by scene setting, you can use the sequencewise evaluation 21 | to convert to these numbers by doing 22 | #1:Studio with Green Screen (TS1*603 + TS2 *540)/ (603+540) 23 | #2:Studio without Green Screen (TS3*505+TS4*553)/(505+553) 24 | #3:Outdoor (TS5*276+TS6*452)/(276+452) 25 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/data_util/video_to_images.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | from scipy.io import loadmat 4 | from subprocess import call 5 | from os import makedirs 6 | 7 | 8 | subject_list = [1, 2, 3, 4, 5, 6, 7, 8] 9 | sequence_list = [1, 2] 10 | camera_list = [0, 1, 2, 4, 5, 6, 7, 8] 11 | 12 | makedirs('dataset/mpi_inf_3dhp/images', exist_ok=True) 13 | 14 | cnt = 0 15 | for s in subject_list: 16 | for se in sequence_list: 17 | for c in camera_list: 18 | subdir_format = 's_{:02d}_seq_{:02d}_ca_{:02d}' 19 | 20 | subdir = subdir_format.format(s, se, c) 21 | makedirs(osp.join('dataset/mpi_inf_3dhp/images', subdir), exist_ok=True) 22 | 23 | fileformat = 'dataset/mpi_inf_3dhp/images' + '/' + subdir + '/' + subdir + '_%06d.jpg' 24 | 25 | videopath = 'dataset/mpi_inf_3dhp/S{:01d}/Seq{:01d}/imageSequence/video_{:01d}.avi'.format(s, se, c) 26 | # print(videoname.split('.')[0]) 27 | subject = 'S' + str(s) 28 | 29 | print(videopath) 30 | cnt += 1 31 | call([ 32 | 'ffmpeg', 33 | '-nostats', 34 | '-i', videopath, 35 | '-qscale:v', '3', 36 | fileformat 37 | ]) 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/cpn/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'CPN50' 16 | 17 | lr = 5e-4 18 | lr_gamma = 0.5 19 | lr_dec_epoch = list(range(6,40,6)) 20 | 21 | batch_size = 32 22 | weight_decay = 1e-5 23 | 24 | num_class = 17 25 | img_path = os.path.join(root_dir, 'data', 'COCO2017', 'train2017') 26 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16)] 27 | bbox_extend_factor = (0.1, 0.15) # x, y 28 | 29 | # data augmentation setting 30 | scale_factor=(0.7, 1.35) 31 | rot_factor=45 32 | 33 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 34 | data_shape = (256, 192) 35 | output_shape = (64, 48) 36 | gaussain_kernel = (7, 7) 37 | 38 | gk15 = (15, 15) 39 | gk11 = (11, 11) 40 | gk9 = (9, 9) 41 | gk7 = (7, 7) 42 | 43 | gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'COCO_2017_train.json') 44 | 45 | cfg = Config() 46 | add_pypath(cfg.root_dir) 47 | 48 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/networks/network.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | import torch.nn as nn 3 | from .globalNet import globalNet 4 | from .refineNet import refineNet 5 | 6 | __all__ = ['CPN50', 'CPN101'] 7 | 8 | class CPN(nn.Module): 9 | def __init__(self, resnet, output_shape, num_class, pretrained=True): 10 | super(CPN, self).__init__() 11 | channel_settings = [2048, 1024, 512, 256] 12 | self.resnet = resnet 13 | self.global_net = globalNet(channel_settings, output_shape, num_class) 14 | self.refine_net = refineNet(channel_settings[-1], output_shape, num_class) 15 | 16 | def forward(self, x): 17 | res_out = self.resnet(x) 18 | # global_fms, global_outs = self.global_net(res_out) 19 | global_fms = self.global_net(res_out) 20 | refine_out = self.refine_net(global_fms) 21 | 22 | return refine_out 23 | # return global_outs, refine_out 24 | 25 | def CPN50(out_size,num_class,pretrained=True): 26 | res50 = resnet50(pretrained=pretrained) 27 | model = CPN(res50, output_shape=out_size,num_class=num_class, pretrained=pretrained) 28 | return model 29 | 30 | def CPN101(out_size,num_class,pretrained=True): 31 | res101 = resnet101(pretrained=pretrained) 32 | model = CPN(res101, output_shape=out_size,num_class=num_class, pretrained=pretrained) 33 | return model 34 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/cpn/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'CPN50' # option 'CPN50', 'CPN101' 16 | 17 | num_class = 17 18 | img_path = os.path.join(root_dir, 'data', 'COCO2017', 'val2017') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | use_GT_bbox = True 27 | if use_GT_bbox: 28 | gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'COCO_2017_val.json') 29 | else: 30 | # if False, make sure you have downloaded the val_dets.json and place it into annotation folder 31 | gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'val_dets.json') 32 | ori_gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'person_keypoints_val2017.json') 33 | 34 | cfg = Config() 35 | add_pypath(cfg.root_dir) 36 | add_pypath(os.path.join(cfg.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /ContextPose_mpi/model/conpose.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import numpy as np 3 | import pickle 4 | import random 5 | from time import time 6 | 7 | import torch 8 | from torch import nn 9 | import torch.nn.functional as F 10 | 11 | from model import pose_hrnet 12 | from model.pose_dformer import PoseTransformer 13 | 14 | 15 | class VolumetricTriangulationNet(nn.Module): 16 | def __init__(self, config, device='cuda:0'): 17 | super().__init__() 18 | 19 | self.num_joints = config.model.backbone.num_joints 20 | 21 | self.backbone = pose_hrnet.get_pose_net(config.model.backbone) 22 | 23 | if config.model.backbone.fix_weights: 24 | print("model backbone weights are fixed") 25 | for p in self.backbone.parameters(): 26 | p.requires_grad = False 27 | 28 | self.volume_net = PoseTransformer(config.model.poseformer) 29 | 30 | 31 | def forward(self, images, keypoints_2d_cpn, keypoints_2d_cpn_crop): 32 | device = images.device 33 | images = images.permute(0, 3, 1, 2).contiguous() 34 | 35 | keypoints_2d_cpn_crop[..., :2] /= torch.tensor([192//2, 256//2], device=device) 36 | keypoints_2d_cpn_crop[..., :2] -= torch.tensor([1, 1], device=device) 37 | 38 | # forward backbone 39 | features_list = self.backbone(images) 40 | keypoints_3d = self.volume_net(keypoints_2d_cpn, keypoints_2d_cpn_crop, features_list) 41 | 42 | return keypoints_3d 43 | 44 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/mocap_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import numpy as np 9 | from common.skeleton import Skeleton 10 | 11 | class MocapDataset: 12 | def __init__(self, fps, skeleton): 13 | self._skeleton = skeleton 14 | self._fps = fps 15 | self._data = None # Must be filled by subclass 16 | self._cameras = None # Must be filled by subclass 17 | 18 | def remove_joints(self, joints_to_remove): 19 | kept_joints = self._skeleton.remove_joints(joints_to_remove) 20 | for subject in self._data.keys(): 21 | for action in self._data[subject].keys(): 22 | s = self._data[subject][action] 23 | if 'positions' in s: 24 | s['positions'] = s['positions'][:, kept_joints] 25 | 26 | 27 | def __getitem__(self, key): 28 | return self._data[key] 29 | 30 | def subjects(self): 31 | return self._data.keys() 32 | 33 | def fps(self): 34 | return self._fps 35 | 36 | def skeleton(self): 37 | return self._skeleton 38 | 39 | def cameras(self): 40 | return self._cameras 41 | 42 | def supports_semi_supervised(self): 43 | # This method can be overridden 44 | return False -------------------------------------------------------------------------------- /H36M-Toolbox/video_to_images.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | from scipy.io import loadmat 4 | from subprocess import call 5 | from os import makedirs 6 | 7 | 8 | subject_list = [1, 5, 6, 7, 8, 9, 11] 9 | action_list = [x for x in range(2, 17)] 10 | subaction_list = [x for x in range(1, 3)] 11 | camera_list = [x for x in range(1, 5)] 12 | 13 | 14 | from metadata import load_h36m_metadata 15 | metadata = load_h36m_metadata() 16 | 17 | makedirs('images', exist_ok=True) 18 | 19 | 20 | cnt = 0 21 | for s in subject_list: 22 | for a in action_list: 23 | for sa in subaction_list: 24 | for c in camera_list: 25 | subdir_format = 's_{:02d}_act_{:02d}_subact_{:02d}_ca_{:02d}' 26 | 27 | subdir = subdir_format.format(s, a, sa, c) 28 | makedirs(osp.join('images', subdir), exist_ok=True) 29 | 30 | fileformat = 'images' + '/' + subdir + '/' + subdir + '_%06d.jpg' 31 | 32 | basename = metadata.get_base_filename('S{:d}'.format(s), '{:d}'.format(a), '{:d}'.format(sa), metadata.camera_ids[c-1]) 33 | videoname = basename + '.mp4' 34 | subject = 'S' + str(s) 35 | videopath = osp.join('extracted', subject, 'Videos', videoname) 36 | 37 | print(videopath) 38 | cnt += 1 39 | call([ 40 | 'ffmpeg', 41 | '-nostats', 42 | '-i', videopath, 43 | '-qscale:v', '3', 44 | fileformat 45 | ]) 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /ContextPose/conda-requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=main 5 | _openmp_mutex=5.1=1_gnu 6 | ca-certificates=2023.08.22=h06a4308_0 7 | certifi=2022.9.24=pypi_0 8 | charset-normalizer=3.1.0=pypi_0 9 | cycler=0.11.0=pypi_0 10 | easydict=1.10=pypi_0 11 | einops=0.6.1=pypi_0 12 | fonttools=4.39.4=pypi_0 13 | idna=3.4=pypi_0 14 | imageio=2.31.1=pypi_0 15 | kiwisolver=1.4.4=pypi_0 16 | lazy-loader=0.2=pypi_0 17 | ld_impl_linux-64=2.38=h1181459_1 18 | libffi=3.3=he6710b0_2 19 | libgcc-ng=11.2.0=h1234567_1 20 | libgomp=11.2.0=h1234567_1 21 | libstdcxx-ng=11.2.0=h1234567_1 22 | matplotlib=3.5.2=pypi_0 23 | ncurses=6.4=h6a678d5_0 24 | networkx=2.8.8=pypi_0 25 | numpy=1.24.4=pypi_0 26 | opencv-python=4.6.0.66=pypi_0 27 | openssl=1.1.1w=h7f8727e_0 28 | packaging=23.1=pypi_0 29 | pillow=10.0.0=pypi_0 30 | pip=23.2.1=py38h06a4308_0 31 | protobuf=3.20.3=pypi_0 32 | pyparsing=3.0.9=pypi_0 33 | python=3.8.10=h12debd9_8 34 | python-dateutil=2.8.2=pypi_0 35 | pywavelets=1.3.0=pypi_0 36 | pyyaml=6.0=pypi_0 37 | readline=8.2=h5eee18b_0 38 | requests=2.31.0=pypi_0 39 | scikit-image=0.20.0=pypi_0 40 | scipy=1.9.1=pypi_0 41 | setuptools=68.0.0=py38h06a4308_0 42 | six=1.16.0=pypi_0 43 | sqlite=3.41.2=h5eee18b_0 44 | tensorboardx=2.6=pypi_0 45 | tifffile=2023.7.10=pypi_0 46 | timm=0.6.7=pypi_0 47 | tk=8.6.12=h1ccaba5_0 48 | torch=1.11.0+cu113=pypi_0 49 | torchvision=0.12.0+cu113=pypi_0 50 | tqdm=4.66.1=pypi_0 51 | typing-extensions=4.6.2=pypi_0 52 | urllib3=2.0.2=pypi_0 53 | wheel=0.41.2=py38h06a4308_0 54 | xz=5.4.2=h5eee18b_0 55 | zlib=1.2.13=h5eee18b_0 56 | -------------------------------------------------------------------------------- /H36M-Toolbox/metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import xml.etree.ElementTree as ET 4 | 5 | 6 | class H36M_Metadata: 7 | def __init__(self, metadata_file): 8 | self.subjects = [] 9 | self.sequence_mappings = {} 10 | self.action_names = {} 11 | self.camera_ids = [] 12 | 13 | tree = ET.parse(metadata_file) 14 | root = tree.getroot() 15 | 16 | for i, tr in enumerate(root.find('mapping')): 17 | if i == 0: 18 | _, _, *self.subjects = [td.text for td in tr] 19 | self.sequence_mappings = {subject: {} for subject in self.subjects} 20 | elif i < 33: 21 | action_id, subaction_id, *prefixes = [td.text for td in tr] 22 | for subject, prefix in zip(self.subjects, prefixes): 23 | self.sequence_mappings[subject][(action_id, subaction_id)] = prefix 24 | 25 | for i, elem in enumerate(root.find('actionnames')): 26 | action_id = str(i + 1) 27 | self.action_names[action_id] = elem.text 28 | 29 | self.camera_ids = [elem.text for elem in root.find('dbcameras/index2id')] 30 | 31 | def get_base_filename(self, subject, action, subaction, camera): 32 | return '{}.{}'.format(self.sequence_mappings[subject][(action, subaction)], camera) 33 | 34 | 35 | def load_h36m_metadata(): 36 | return H36M_Metadata('metadata.xml') 37 | 38 | 39 | if __name__ == '__main__': 40 | metadata = load_h36m_metadata() 41 | print(metadata.subjects) 42 | print(metadata.sequence_mappings) 43 | print(metadata.action_names) 44 | print(metadata.camera_ids) 45 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/conpose.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from mvn.models import pose_hrnet 5 | from mvn.models.networks import network 6 | from mvn.models.cpn.test_config import cfg 7 | from mvn.models.pose_dformer import PoseTransformer 8 | 9 | 10 | class CA_PF(nn.Module): 11 | def __init__(self, config, device='cuda:0'): 12 | super().__init__() 13 | 14 | self.num_joints = config.model.backbone.num_joints 15 | 16 | if config.model.backbone.type in ['hrnet_32', 'hrnet_48']: 17 | self.backbone = pose_hrnet.get_pose_net(config.model.backbone) 18 | 19 | elif config.model.backbone.type == 'cpn': 20 | self.backbone = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False) 21 | 22 | if config.model.backbone.fix_weights: 23 | print("model backbone weights are fixed") 24 | for p in self.backbone.parameters(): 25 | p.requires_grad = False 26 | 27 | self.volume_net = PoseTransformer(config.model.poseformer, backbone=config.model.backbone.type) 28 | 29 | 30 | def forward(self, images, keypoints_2d_cpn, keypoints_2d_cpn_crop): 31 | device = keypoints_2d_cpn.device 32 | images = images.permute(0, 3, 1, 2).contiguous() 33 | 34 | keypoints_2d_cpn_crop[..., :2] /= torch.tensor([192//2, 256//2], device=device) 35 | keypoints_2d_cpn_crop[..., :2] -= torch.tensor([1, 1], device=device) 36 | 37 | # forward backbone 38 | features_list = self.backbone(images) 39 | 40 | keypoints_3d = self.volume_net(keypoints_2d_cpn, keypoints_2d_cpn_crop, features_list) 41 | 42 | return keypoints_3d 43 | 44 | -------------------------------------------------------------------------------- /H36M-Toolbox/README.md: -------------------------------------------------------------------------------- 1 | This code is built on top of 2 | https://github.com/anibali/h36m-fetch 3 | 4 | 5 | [Human3.6M](http://vision.imar.ro/human3.6m/description.php) is a 3D 6 | human pose dataset containing 3.6 million human poses and corresponding 7 | images. The scripts in this repository make it easy to download, 8 | extract, and preprocess the images and annotations from Human3.6M. 9 | 10 | **Please do not ask me for a copy of the Human3.6M dataset. I do not own 11 | the data, nor do I have permission to redistribute it. Please visit 12 | http://vision.imar.ro/human3.6m/ in order to request access and contact 13 | the maintainers of the dataset.** 14 | 15 | ## Requirements 16 | 17 | * Python 3 18 | * [`axel`](https://github.com/axel-download-accelerator/axel) 19 | * CDF (https://www.scivision.dev/spacepy-install-anaconda-python/) 20 | 21 | ## Usage 22 | 23 | 1. Firstly, you will need to create an account at 24 | http://vision.imar.ro/human3.6m/ to gain access to the dataset. 25 | 2. Once your account has been approved, log in and inspect your cookies 26 | to find your PHPSESSID. 27 | 3. Copy the configuration file `config.ini.example` to `config.ini` 28 | and fill in your PHPSESSID. 29 | 4. download_all.py -> extract_all.py -> video_to_images.py -> generate_labels.py 30 | 31 | 32 | ## License 33 | 34 | The code in this repository is licensed under the terms of the 35 | [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 36 | 37 | Please read the 38 | [license agreement](http://vision.imar.ro/human3.6m/eula.php) for the 39 | Human3.6M dataset itself, which specifies citations you must make when 40 | using the data in your own research. The file `metadata.xml` is directly 41 | copied from the "Visualisation and large scale prediction software" 42 | bundle from the Human3.6M website, and is subject to the same license 43 | agreement. 44 | -------------------------------------------------------------------------------- /H36M-Toolbox/extract_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from os import path, makedirs 4 | import tarfile 5 | from tqdm import tqdm 6 | 7 | 8 | subjects = ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11'] 9 | 10 | 11 | # https://stackoverflow.com/a/6718435 12 | def commonprefix(m): 13 | s1 = min(m) 14 | s2 = max(m) 15 | for i, c in enumerate(s1): 16 | if c != s2[i]: 17 | return s1[:i] 18 | return s1 19 | 20 | 21 | def extract_tgz(tgz_file, dest): 22 | if path.exists(dest): 23 | return 24 | with tarfile.open(tgz_file, 'r:gz') as tar: 25 | members = [m for m in tar.getmembers() if m.isreg()] 26 | member_dirs = [path.dirname(m.name).split(path.sep) for m in members] 27 | base_path = path.sep.join(commonprefix(member_dirs)) 28 | for m in members: 29 | m.name = path.relpath(m.name, base_path) 30 | tar.extractall(dest) 31 | 32 | 33 | def extract_all(): 34 | for subject_id in tqdm(subjects, ascii=True): 35 | out_dir = path.join('extracted', subject_id) 36 | makedirs(out_dir, exist_ok=True) 37 | extract_tgz('archives/Poses_D2_Positions_{}.tgz'.format(subject_id), 38 | path.join(out_dir, 'Poses_D2_Positions')) 39 | extract_tgz('archives/Poses_D3_Positions_{}.tgz'.format(subject_id), 40 | path.join(out_dir, 'Poses_D3_Positions')), 41 | extract_tgz('archives/Poses_D3_Positions_mono_{}.tgz'.format(subject_id), 42 | path.join(out_dir, 'Poses_D3_Positions_mono')), 43 | extract_tgz('archives/Poses_D3_Positions_mono_universal_{}.tgz'.format(subject_id), 44 | path.join(out_dir, 'Poses_D3_Positions_mono_universal')), 45 | extract_tgz('archives/Videos_{}.tgz'.format(subject_id), 46 | path.join(out_dir, 'Videos')) 47 | 48 | 49 | if __name__ == '__main__': 50 | extract_all() 51 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_get_joint_set.m: -------------------------------------------------------------------------------- 1 | function [joint_idx, joint_parents_o1, joint_parents_o2, joint_names] = mpii_get_joint_set(joint_set_name) 2 | 3 | all_joint_names = {'spine3', 'spine4', 'spine2', 'spine', 'pelvis', ... %5 4 | 'neck', 'head', 'head_top', 'left_clavicle', 'left_shoulder', 'left_elbow', ... %11 5 | 'left_wrist', 'left_hand', 'right_clavicle', 'right_shoulder', 'right_elbow', 'right_wrist', ... %17 6 | 'right_hand', 'left_hip', 'left_knee', 'left_ankle', 'left_foot', 'left_toe', ... %23 7 | 'right_hip' , 'right_knee', 'right_ankle', 'right_foot', 'right_toe'}; 8 | 9 | %The O1 and O2 indices are relaive to the joint_idx, regardless of the joint set 10 | 11 | switch joint_set_name 12 | case 'all' 13 | joint_idx = 1:28; 14 | joint_parents_o1 = [3, 1, 4, 5, 5, 2, 6, 7, 6, 9, 10, 11, 12, 6, 14, 15, 16, 17, 5, 19, 20, 21, 22, 5, 24, 25, 26, 27 ]; 15 | joint_parents_o2 = [4, 3, 5, 5, 5, 1, 2, 6, 2, 6, 9, 10, 11, 2, 6, 14, 15, 16, 4, 5, 19, 20, 21, 4, 5, 24, 25, 26]; 16 | joint_names = all_joint_names; 17 | 18 | case 'relevant' %Human3.6m compatible joint set in Our order 19 | joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7]; 20 | joint_parents_o1 = [ 2, 16, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15, 15, 2]; 21 | joint_parents_o2 = [ 16, 15, 16, 2, 3, 16, 2, 6, 16, 15, 9, 16, 15, 12, 15, 15, 16]; 22 | joint_names = all_joint_names(joint_idx); 23 | case 'extended' %Human3.6m compatible joint set in Our order + End effectors for Hands and Feet 24 | joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7, 18, 13, 28, 23]; 25 | joint_parents_o1 = [ 2, 16, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15, 15, 2, 5, 8, 11, 14]; 26 | joint_parents_o2 = [ 16, 15, 16, 2, 3, 16, 2, 6, 16, 15, 9, 16, 15, 12, 15, 15, 16, 4, 7, 10, 13]; 27 | joint_names = all_joint_names(joint_idx); 28 | otherwise 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /ContextPose/experiments/human36m/human36m.yaml: -------------------------------------------------------------------------------- 1 | title: "human36m" 2 | kind: "human36m" 3 | azureroot: "" 4 | batch_output: true 5 | vis_freq: 1000 6 | vis_n_elements: 10 7 | 8 | model: 9 | image_shape: [192, 256] 10 | 11 | init_weights: false 12 | checkpoint: "" 13 | 14 | backbone: 15 | type: "hrnet_32" # ["hrnet_48", "cpn"] 16 | num_final_layer_channel: 17 17 | num_joints: 17 18 | num_layers: 152 19 | 20 | init_weights: true 21 | fix_weights: true 22 | checkpoint: "data/pretrained/coco/pose_hrnet_w32_256x192.pth" 23 | # checkpoint: "data/pretrained/coco/pose_hrnet_w48_256x192.pth" 24 | # checkpoint: "data/pretrained/coco/CPN50_256x192.pth.tar" 25 | 26 | poseformer: 27 | embed_dim_ratio: 128 28 | depth: 4 29 | 30 | loss: 31 | criterion: "MPJPE" 32 | scale_keypoints_3d: 0.1 33 | 34 | use_volumetric_ce_loss: true 35 | volumetric_ce_loss_weight: 0.01 36 | 37 | use_global_attention_loss: True 38 | global_attention_loss_weight: 1000000 39 | 40 | dataset: 41 | kind: "human36m" 42 | data_format: "" 43 | root: "../H36M-Toolbox/images/" 44 | train_labels_path: "data/h36m_train.pkl" 45 | val_labels_path: "data/h36m_validation.pkl" 46 | 47 | train: 48 | n_objects_per_epoch: 15000 49 | n_epochs: 9999 50 | 51 | batch_size: 512 # 512 for other backbones, 256 for cpn 52 | 53 | optimizer: 'Adam' 54 | backbone_lr: 0.0 55 | backbone_lr_step: [1000] 56 | backbone_lr_factor: 0.1 57 | process_features_lr: 0.001 58 | volume_net_lr: 0.00064 # 0.00032 for cpn 59 | volume_net_lr_decay: 0.99 60 | volume_net_lr_step: [1000] 61 | volume_net_lr_factor: 0.5 62 | 63 | with_damaged_actions: true 64 | undistort_images: true 65 | 66 | scale_bbox: 1.0 67 | 68 | shuffle: true 69 | randomize_n_views: true 70 | min_n_views: 1 71 | max_n_views: 1 72 | num_workers: 14 73 | 74 | val: 75 | batch_size: 512 # 512 for fixed backbone, 256 for cpn 76 | 77 | flip_test: true 78 | with_damaged_actions: true 79 | undistort_images: true 80 | 81 | scale_bbox: 1.0 82 | 83 | shuffle: false 84 | randomize_n_views: true 85 | min_n_views: 1 86 | max_n_views: 1 87 | num_workers: 14 88 | retain_every_n_frames_in_test: 1 -------------------------------------------------------------------------------- /ContextPose_mpi/README.md: -------------------------------------------------------------------------------- 1 | ## MPI-INF-3DHP 2 | 3 | We heavily borrowed code from [P-STMO](https://github.com/paTRICK-swk/P-STMO) to train and evaluate our model on MPI-INF-3DHP. 4 | 5 | **Note:** We did not use Deformable Context Extraction for this dataset as our input is ground truth 2D keypoint. 6 | 7 | ### Dataset Preparation 8 | 9 | 1. Download and pre-process data by running (which may take a while to complete): 10 | 11 | ~~~shell 12 | bash dataset/process_data.sh 13 | ~~~ 14 | 15 | This handles (1) Data download, (2) Extracting labels, and (3) Processing raw videos. 16 | 17 | 2. Download (COCO) pre-trained weights for HRNet-32/HRNet-48 from https://drive.google.com/drive/folders/1nzM_OBV9LbAEA7HClC0chEyf_7ECDXYA and place it under `dataset/pretrained/`. 18 | 19 | 20 | 3. Your `dataset` directory should look like this if you follow the previous steps correctly. 21 | 22 | ```bash 23 | dataset/ 24 | ├── process_data.sh 25 | ├── data_train_3dhp.npz 26 | ├── data_test_3dhp.npz 27 | ├── data_util/ 28 | └── mpi_inf_3dhp/ 29 | ├── ... 30 | └── images/ 31 | └── mpi_inf_3dhp_test_set/ 32 | ├── ... 33 | └── images/ 34 | └── pretrained/ 35 | ├── pose_hrnet_w32_256x192.pth 36 | └── pose_hrnet_w48_256x192.pth 37 | ``` 38 | 39 | ### Train 40 | 41 | Use the following command to train our HRNet-32 model: 42 | 43 | ``` 44 | python run_3dhp.py -f 1 -b 160 --train 1 --lr 0.0007 -lrd 0.97 --backbone hrnet_32 45 | ``` 46 | 47 | Similarly, for HRNet-48, run the following command: 48 | 49 | ``` 50 | python run_3dhp.py -f 1 -b 160 --train 1 --lr 0.0007 -lrd 0.97 --backbone hrnet_48 51 | ``` 52 | 53 | ### Evaluation 54 | 55 | A simple evaluation can be done by running: 56 | 57 | ``` 58 | python run_3dhp.py -f 1 -b 160 --train 0 --reload 1 --backbone hrnet_32 59 | ``` 60 | 61 | Likewise, run this for the HRNet-48 model: 62 | 63 | ``` 64 | python run_3dhp.py -f 1 -b 160 --train 0 --reload 1 --backbone hrnet_48 65 | ``` 66 | 67 | Our checkpoints are released [here,](https://drive.google.com/drive/folders/1O_i3OUTnqlkLWFu_3WKPU7YepWhItd59?usp=drive_link) and we assume you placed them (`HRNet_32_64_no_refine_24_3214.pth` or `HRNet_48_96_no_refine_45_3125.pth`) under `checkpoint/`. For more metrics (e.g., PCK), please follow the instructions in the [original repo](https://github.com/paTRICK-swk/P-STMO?tab=readme-ov-file#mpi-inf-3dhp). 68 | 69 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_resnet related params 15 | POSE_RESNET = CN() 16 | POSE_RESNET.NUM_LAYERS = 50 17 | POSE_RESNET.DECONV_WITH_BIAS = False 18 | POSE_RESNET.NUM_DECONV_LAYERS = 3 19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 21 | POSE_RESNET.FINAL_CONV_KERNEL = 1 22 | POSE_RESNET.PRETRAINED_LAYERS = ['*'] 23 | 24 | # pose_multi_resoluton_net related params 25 | POSE_HIGH_RESOLUTION_NET = CN() 26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 29 | 30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() 31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 37 | 38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() 39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 45 | 46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() 47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 53 | 54 | 55 | MODEL_EXTRAS = { 56 | 'pose_resnet': POSE_RESNET, 57 | 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, 58 | } 59 | -------------------------------------------------------------------------------- /H36M-Toolbox/checksums.txt: -------------------------------------------------------------------------------- 1 | 69e038858ace96ba5f6c5ccea52e95e8 archives/Poses_D2_Positions_S1.tgz 2 | d4ae2827d0227dea8c88e6a082763d0a archives/Poses_D3_Positions_S1.tgz 3 | 4c844740ba583517c74b6c496c190761 archives/Poses_D3_Positions_mono_S1.tgz 4 | 3c75f06fdf3c4f3b8fb1f8f11d18a10e archives/Poses_D3_Positions_mono_universal_S1.tgz 5 | d517e6c0b1112427b2a39fcbd732281c archives/Videos_S1.tgz 6 | 7ac8c4830468a1ed3464076ee9603632 archives/Poses_D2_Positions_S5.tgz 7 | 7a0bd0f458612decc9de0a04e0b589cc archives/Poses_D3_Positions_S5.tgz 8 | 4e14165ed00b7aff1111a81c1ca4b7b3 archives/Poses_D3_Positions_mono_S5.tgz 9 | a0c821f5501fcc450e28c38e5ebd0c17 archives/Poses_D3_Positions_mono_universal_S5.tgz 10 | 02ef041813c3a37b137f86df24419e5a archives/Videos_S5.tgz 11 | 5f9706d5259f648cca802c069dec9681 archives/Poses_D2_Positions_S6.tgz 12 | 0970a30cbc947c3c0454c834db9b84e0 archives/Poses_D3_Positions_S6.tgz 13 | 9681696b33a0d487493330e825b408d6 archives/Poses_D3_Positions_mono_S6.tgz 14 | dce0fb2f44b487b2bd36f603d1ff894a archives/Poses_D3_Positions_mono_universal_S6.tgz 15 | a4b8690e5320c5854f99f60bf31cbabc archives/Videos_S6.tgz 16 | 543c4053c962db54d1d7361d4accffb4 archives/Poses_D2_Positions_S7.tgz 17 | abeea2a40650517cefb7cd911caa6472 archives/Poses_D3_Positions_S7.tgz 18 | 807109c1a304ce67c6f0cc06a94846fc archives/Poses_D3_Positions_mono_S7.tgz 19 | 848717a95a96336ec7707b20ec463965 archives/Poses_D3_Positions_mono_universal_S7.tgz 20 | 79caf93c6ec31b1c14cd1d31d5f292e0 archives/Videos_S7.tgz 21 | e9de190d782452edc954ac191907adcf archives/Poses_D2_Positions_S8.tgz 22 | 5695796fe478579ffe9b9ff09203dd27 archives/Poses_D3_Positions_S8.tgz 23 | da8b6c948e7dcd280061cd4d99d7352f archives/Poses_D3_Positions_mono_S8.tgz 24 | 8f5182924c29721d9c4227aa43e3d7b3 archives/Poses_D3_Positions_mono_universal_S8.tgz 25 | 18818148e68fcd80fce1efa82f98126d archives/Videos_S8.tgz 26 | 232c2244afae96cb900908c6825d478c archives/Poses_D2_Positions_S9.tgz 27 | fce28bb66bf9908016e2d9738e5cb2db archives/Poses_D3_Positions_S9.tgz 28 | 0fad285a69fdcdf4958cc4c80d93abbc archives/Poses_D3_Positions_mono_S9.tgz 29 | bbc436bc0f35bd09e272ad0ed1f188e2 archives/Poses_D3_Positions_mono_universal_S9.tgz 30 | 3e7d923d5c573ac833334a31b5f8a797 archives/Videos_S9.tgz 31 | df1fde6b5656729336f54dcd79ab6e47 archives/Poses_D2_Positions_S11.tgz 32 | 729e93d4e50c806f4a55fd1b87e2ff52 archives/Poses_D3_Positions_S11.tgz 33 | 944a8bca62a933f5d630a835868fba23 archives/Poses_D3_Positions_mono_S11.tgz 34 | c00b5b22ed1b88de5a536433e300503e archives/Poses_D3_Positions_mono_universal_S11.tgz 35 | 13a24f30eb4e7cc505cbf80410c90ffe archives/Videos_S11.tgz 36 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/custom_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import numpy as np 9 | import copy 10 | from common.skeleton import Skeleton 11 | from common.mocap_dataset import MocapDataset 12 | from common.camera import normalize_screen_coordinates, image_coordinates 13 | from common.h36m_dataset import h36m_skeleton 14 | 15 | 16 | custom_camera_params = { 17 | 'id': None, 18 | 'res_w': None, # Pulled from metadata 19 | 'res_h': None, # Pulled from metadata 20 | 21 | # Dummy camera parameters (taken from Human3.6M), only for visualization purposes 22 | 'azimuth': 70, # Only used for visualization 23 | 'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088], 24 | 'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125], 25 | } 26 | 27 | class CustomDataset(MocapDataset): 28 | def __init__(self, detections_path, remove_static_joints=True): 29 | super().__init__(fps=None, skeleton=h36m_skeleton) 30 | 31 | # Load serialized dataset 32 | data = np.load(detections_path, allow_pickle=True) 33 | resolutions = data['metadata'].item()['video_metadata'] 34 | 35 | self._cameras = {} 36 | self._data = {} 37 | for video_name, res in resolutions.items(): 38 | cam = {} 39 | cam.update(custom_camera_params) 40 | cam['orientation'] = np.array(cam['orientation'], dtype='float32') 41 | cam['translation'] = np.array(cam['translation'], dtype='float32') 42 | cam['translation'] = cam['translation']/1000 # mm to meters 43 | 44 | cam['id'] = video_name 45 | cam['res_w'] = res['w'] 46 | cam['res_h'] = res['h'] 47 | 48 | self._cameras[video_name] = [cam] 49 | 50 | self._data[video_name] = { 51 | 'custom': { 52 | 'cameras': cam 53 | } 54 | } 55 | 56 | if remove_static_joints: 57 | # Bring the skeleton to 17 joints instead of the original 32 58 | self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31]) 59 | 60 | # Rewire shoulders to the correct parents 61 | self._skeleton._parents[11] = 8 62 | self._skeleton._parents[14] = 8 63 | 64 | def supports_semi_supervised(self): 65 | return False 66 | -------------------------------------------------------------------------------- /ContextPose_mpi/common/camera.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import torch 4 | 5 | def normalize_screen_coordinates(X, w, h): 6 | assert X.shape[-1] == 2 7 | return X / w * 2 - [1, h / w] 8 | 9 | 10 | def image_coordinates(X, w, h): 11 | assert X.shape[-1] == 2 12 | 13 | # Reverse camera frame normalization 14 | return (X + [1, h / w]) * w / 2 15 | 16 | def world_to_camera(X, R, t): 17 | Rt = wrap(qinverse, R) 18 | return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t) 19 | 20 | def camera_to_world(X, R, t): 21 | return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t 22 | 23 | def wrap(func, *args, unsqueeze=False): 24 | args = list(args) 25 | for i, arg in enumerate(args): 26 | if type(arg) == np.ndarray: 27 | args[i] = torch.from_numpy(arg) 28 | if unsqueeze: 29 | args[i] = args[i].unsqueeze(0) 30 | 31 | result = func(*args) 32 | 33 | if isinstance(result, tuple): 34 | result = list(result) 35 | for i, res in enumerate(result): 36 | if type(res) == torch.Tensor: 37 | if unsqueeze: 38 | res = res.squeeze(0) 39 | result[i] = res.numpy() 40 | return tuple(result) 41 | elif type(result) == torch.Tensor: 42 | if unsqueeze: 43 | result = result.squeeze(0) 44 | return result.numpy() 45 | else: 46 | return result 47 | 48 | def qrot(q, v): 49 | assert q.shape[-1] == 4 50 | assert v.shape[-1] == 3 51 | assert q.shape[:-1] == v.shape[:-1] 52 | 53 | qvec = q[..., 1:] 54 | uv = torch.cross(qvec, v, dim=len(q.shape) - 1) 55 | uuv = torch.cross(qvec, uv, dim=len(q.shape) - 1) 56 | return (v + 2 * (q[..., :1] * uv + uuv)) 57 | 58 | 59 | def qinverse(q, inplace=False): 60 | if inplace: 61 | q[..., 1:] *= -1 62 | return q 63 | else: 64 | w = q[..., :1] 65 | xyz = q[..., 1:] 66 | return torch.cat((w, -xyz), dim=len(q.shape) - 1) 67 | 68 | 69 | def get_uvd2xyz(uvd, gt_3D, cam): 70 | N, T, V,_ = uvd.size() 71 | 72 | dec_out_all = uvd.view(-1, T, V, 3).clone() 73 | root = gt_3D[:, :, 0, :].unsqueeze(-2).repeat(1, 1, V, 1).clone() 74 | enc_in_all = uvd[:, :, :, :2].view(-1, T, V, 2).clone() 75 | 76 | cam_f_all = cam[..., :2].view(-1,1,1,2).repeat(1,T,V,1) 77 | cam_c_all = cam[..., 2:4].view(-1,1,1,2).repeat(1,T,V,1) 78 | 79 | z_global = dec_out_all[:, :, :, 2] 80 | z_global[:, :, 0] = root[:, :, 0, 2] 81 | z_global[:, :, 1:] = dec_out_all[:, :, 1:, 2] + root[:, :, 1:, 2] 82 | z_global = z_global.unsqueeze(-1) 83 | 84 | uv = enc_in_all - cam_c_all 85 | xy = uv * z_global.repeat(1, 1, 1, 2) / cam_f_all 86 | xyz_global = torch.cat((xy, z_global), -1) 87 | xyz_offset = (xyz_global - xyz_global[:, :, 0, :].unsqueeze(-2).repeat(1, 1, V, 1)) 88 | 89 | return xyz_offset 90 | 91 | 92 | -------------------------------------------------------------------------------- /ContextPose_mpi/common/skeleton.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | 5 | class Skeleton: 6 | def __init__(self, parents, joints_left, joints_right): 7 | assert len(joints_left) == len(joints_right) 8 | 9 | self._parents = np.array(parents) 10 | self._joints_left = joints_left 11 | self._joints_right = joints_right 12 | self._compute_metadata() 13 | 14 | def num_joints(self): 15 | return len(self._parents) 16 | 17 | def parents(self): 18 | return self._parents 19 | 20 | def has_children(self): 21 | return self._has_children 22 | 23 | def children(self): 24 | return self._children 25 | 26 | def remove_joints(self, joints_to_remove): 27 | 28 | valid_joints = [] 29 | for joint in range(len(self._parents)): 30 | if joint not in joints_to_remove: 31 | valid_joints.append(joint) 32 | 33 | for i in range(len(self._parents)): 34 | while self._parents[i] in joints_to_remove: 35 | self._parents[i] = self._parents[self._parents[i]] 36 | 37 | index_offsets = np.zeros(len(self._parents), dtype=int) 38 | new_parents = [] 39 | for i, parent in enumerate(self._parents): 40 | if i not in joints_to_remove: 41 | new_parents.append(parent - index_offsets[parent]) 42 | else: 43 | index_offsets[i:] += 1 44 | self._parents = np.array(new_parents) 45 | 46 | if self._joints_left is not None: 47 | new_joints_left = [] 48 | for joint in self._joints_left: 49 | if joint in valid_joints: 50 | new_joints_left.append(joint - index_offsets[joint]) 51 | self._joints_left = new_joints_left 52 | if self._joints_right is not None: 53 | new_joints_right = [] 54 | for joint in self._joints_right: 55 | if joint in valid_joints: 56 | new_joints_right.append(joint - index_offsets[joint]) 57 | self._joints_right = new_joints_right 58 | 59 | self._compute_metadata() 60 | 61 | return valid_joints 62 | 63 | def joints_left(self): 64 | return self._joints_left 65 | 66 | def joints_right(self): 67 | return self._joints_right 68 | 69 | def _compute_metadata(self): 70 | self._has_children = np.zeros(len(self._parents)).astype(bool) 71 | for i, parent in enumerate(self._parents): 72 | if parent != -1: 73 | self._has_children[parent] = True 74 | 75 | self._children = [] 76 | for i, parent in enumerate(self._parents): 77 | self._children.append([]) 78 | for i, parent in enumerate(self._parents): 79 | if parent != -1: 80 | self._children[parent].append(i) 81 | 82 | 83 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import torch 9 | import numpy as np 10 | import hashlib 11 | 12 | def wrap(func, *args, unsqueeze=False): 13 | """ 14 | Wrap a torch function so it can be called with NumPy arrays. 15 | Input and return types are seamlessly converted. 16 | """ 17 | 18 | # Convert input types where applicable 19 | args = list(args) 20 | for i, arg in enumerate(args): 21 | if type(arg) == np.ndarray: 22 | args[i] = torch.from_numpy(arg) 23 | if unsqueeze: 24 | args[i] = args[i].unsqueeze(0) 25 | 26 | result = func(*args) 27 | 28 | # Convert output types where applicable 29 | if isinstance(result, tuple): 30 | result = list(result) 31 | for i, res in enumerate(result): 32 | if type(res) == torch.Tensor: 33 | if unsqueeze: 34 | res = res.squeeze(0) 35 | result[i] = res.numpy() 36 | return tuple(result) 37 | elif type(result) == torch.Tensor: 38 | if unsqueeze: 39 | result = result.squeeze(0) 40 | return result.numpy() 41 | else: 42 | return result 43 | 44 | def deterministic_random(min_value, max_value, data): 45 | digest = hashlib.sha256(data.encode()).digest() 46 | raw_value = int.from_bytes(digest[:4], byteorder='little', signed=False) 47 | return int(raw_value / (2**32 - 1) * (max_value - min_value)) + min_value 48 | 49 | def load_pretrained_weights(model, checkpoint): 50 | """Load pretrianed weights to model 51 | Incompatible layers (unmatched in name or size) will be ignored 52 | Args: 53 | - model (nn.Module): network model, which must not be nn.DataParallel 54 | - weight_path (str): path to pretrained weights 55 | """ 56 | import collections 57 | if 'state_dict' in checkpoint: 58 | state_dict = checkpoint['state_dict'] 59 | else: 60 | state_dict = checkpoint 61 | model_dict = model.state_dict() 62 | new_state_dict = collections.OrderedDict() 63 | matched_layers, discarded_layers = [], [] 64 | for k, v in state_dict.items(): 65 | # If the pretrained state_dict was saved as nn.DataParallel, 66 | # keys would contain "module.", which should be ignored. 67 | if k.startswith('module.'): 68 | k = k[7:] 69 | if k in model_dict and model_dict[k].size() == v.size(): 70 | new_state_dict[k] = v 71 | matched_layers.append(k) 72 | else: 73 | discarded_layers.append(k) 74 | # new_state_dict.requires_grad = False 75 | model_dict.update(new_state_dict) 76 | 77 | model.load_state_dict(model_dict) 78 | print('load_weight', len(matched_layers)) 79 | # model.state_dict(model_dict).requires_grad = False 80 | return model 81 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/camera.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import numpy as np 9 | import torch 10 | 11 | from common.utils import wrap 12 | from common.quaternion import qrot, qinverse 13 | 14 | def normalize_screen_coordinates(X, w, h): 15 | assert X.shape[-1] == 2 16 | 17 | # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio 18 | return X/w*2 - [1, h/w] 19 | 20 | 21 | def image_coordinates(X, w, h): 22 | assert X.shape[-1] == 2 23 | 24 | # Reverse camera frame normalization 25 | return (X + [1, h/w])*w/2 26 | 27 | 28 | def world_to_camera(X, R, t): 29 | Rt = wrap(qinverse, R) # Invert rotation 30 | return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t) # Rotate and translate 31 | 32 | 33 | def camera_to_world(X, R, t): 34 | return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t 35 | 36 | 37 | def project_to_2d(X, camera_params): 38 | """ 39 | Project 3D points to 2D using the Human3.6M camera projection function. 40 | This is a differentiable and batched reimplementation of the original MATLAB script. 41 | 42 | Arguments: 43 | X -- 3D points in *camera space* to transform (N, *, 3) 44 | camera_params -- intrinsic parameteres (N, 2+2+3+2=9) 45 | """ 46 | assert X.shape[-1] == 3 47 | assert len(camera_params.shape) == 2 48 | assert camera_params.shape[-1] == 9 49 | assert X.shape[0] == camera_params.shape[0] 50 | 51 | while len(camera_params.shape) < len(X.shape): 52 | camera_params = camera_params.unsqueeze(1) 53 | 54 | f = camera_params[..., :2] 55 | c = camera_params[..., 2:4] 56 | k = camera_params[..., 4:7] 57 | p = camera_params[..., 7:] 58 | 59 | XX = torch.clamp(X[..., :2] / X[..., 2:], min=-1, max=1) 60 | r2 = torch.sum(XX[..., :2]**2, dim=len(XX.shape)-1, keepdim=True) 61 | 62 | radial = 1 + torch.sum(k * torch.cat((r2, r2**2, r2**3), dim=len(r2.shape)-1), dim=len(r2.shape)-1, keepdim=True) 63 | tan = torch.sum(p*XX, dim=len(XX.shape)-1, keepdim=True) 64 | 65 | XXX = XX*(radial + tan) + p*r2 66 | 67 | return f*XXX + c 68 | 69 | def project_to_2d_linear(X, camera_params): 70 | """ 71 | Project 3D points to 2D using only linear parameters (focal length and principal point). 72 | 73 | Arguments: 74 | X -- 3D points in *camera space* to transform (N, *, 3) 75 | camera_params -- intrinsic parameteres (N, 2+2+3+2=9) 76 | """ 77 | assert X.shape[-1] == 3 78 | assert len(camera_params.shape) == 2 79 | assert camera_params.shape[-1] == 9 80 | assert X.shape[0] == camera_params.shape[0] 81 | 82 | while len(camera_params.shape) < len(X.shape): 83 | camera_params = camera_params.unsqueeze(1) 84 | 85 | f = camera_params[..., :2] 86 | c = camera_params[..., 2:4] 87 | 88 | XX = torch.clamp(X[..., :2] / X[..., 2:], min=-1, max=1) 89 | 90 | return f*XX + c -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_test_predictions_py.m: -------------------------------------------------------------------------------- 1 | 2 | test_subject_id = [1,2,3,4,5,6]; 3 | test_data_path = '../'; %Change to wherever you put this data. 4 | data_base_path = [test_data_path filesep 'TS']; 5 | % addpath('') 6 | % addpath('../util'); 7 | 8 | [~,o1,o2,relevant_labels] = mpii_get_joints('relevant'); 9 | 10 | % net_base = 'PathToYourAwesomeMethod'; 11 | % snapshot_base = net_base; 12 | 13 | %% Load the nets, or plug in your method and do predictions 14 | net_path = {}; 15 | % Usage: net_path = [net_path; {, }]; 16 | % Example: net_path = [net_path; {'MyAwesome3DNet', 135000}]; 17 | % caffe.set_mode_gpu() 18 | % caffe.set_device(0) 19 | 20 | % for n = 1:size(net_path,1) 21 | %n = 1; 22 | %NN or whatever fancy method you are using 23 | % caffe.reset_all(); 24 | % net = caffe.Net(fullfile(net_base, net_path{n,1}, 'deploy_net.prototxt'), fullfile(snapshot_base, net_path{n,1}, sprintf('snapshot_iter_%d.caffemodel', net_path{n,2})), 'test'); 25 | % fprintf('Doing %s \n', net_path{n,1}); 26 | %% 27 | load('..\..\checkpoint\inference_data.mat') 28 | sequencewise_per_joint_error = cell(6,1); 29 | sequencewise_activity_labels = cell(6,1); 30 | for i = 1:length(test_subject_id) 31 | dat = load([data_base_path int2str(test_subject_id(i)) filesep 'annot_data.mat']); 32 | num_test_points = sum(dat.valid_frame(:)); 33 | per_joint_error = zeros(17,1,num_test_points); 34 | pje_idx = 1; 35 | sequencewise_activity_labels{i} = dat.activity_annotation(dat.valid_frame == 1); 36 | %% 37 | seq_name = ['TS',int2str(test_subject_id(i))]; 38 | pred_seq = eval(seq_name); 39 | for j = 1:length(dat.valid_frame) 40 | if(dat.valid_frame(j)) 41 | fprintf('Image %d of %d for Test ID %d\n',j, length(dat.annot2), test_subject_id(i)); 42 | error = zeros(17,1); 43 | 44 | %img = imread([data_base_path int2str(test_subject_id(i)) filesep 'imageSequence' filesep sprintf('img_%06d.jpg',j)]); 45 | %The GT has 17 joints, and the order and the annotation of the joints can be observed through the 'relevant_labels' variable 46 | P = dat.univ_annot3(:,:,:,j)-repmat(dat.univ_annot3(:,15,:,j),1,17); 47 | 48 | % 49 | pred_p = pred_seq(:,:,:,pje_idx); %Replace with the actual prediction formatted as 3x17; 50 | error_p = (pred_p - P).^2; 51 | error_p = sqrt(sum(error_p, 1)); 52 | error(:,1) = error(:,1) + error_p(:); 53 | 54 | 55 | per_joint_error(:,:,pje_idx) = error; 56 | pje_idx = pje_idx +1; 57 | end 58 | end 59 | sequencewise_per_joint_error{i} = per_joint_error; 60 | 61 | end 62 | 63 | save([test_data_path filesep 'mpii_3dhp_prediction.mat'], 'sequencewise_per_joint_error', 'sequencewise_activity_labels'); 64 | [seq_table, activity_table] = mpii_evaluate_errors(sequencewise_per_joint_error, sequencewise_activity_labels); 65 | 66 | out_file = [test_data_path filesep 'mpii_3dhp_evaluation']; 67 | writetable(cell2table(seq_table), [out_file '_sequencewise.csv']); 68 | writetable(cell2table(activity_table), [out_file '_activitywise.csv']); 69 | 70 | % end 71 | 72 | % 73 | % 74 | % 75 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/skeleton.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import numpy as np 9 | 10 | class Skeleton: 11 | def __init__(self, parents, joints_left, joints_right): 12 | assert len(joints_left) == len(joints_right) 13 | 14 | self._parents = np.array(parents) 15 | self._joints_left = joints_left 16 | self._joints_right = joints_right 17 | self._compute_metadata() 18 | 19 | def num_joints(self): 20 | return len(self._parents) 21 | 22 | def parents(self): 23 | return self._parents 24 | 25 | def has_children(self): 26 | return self._has_children 27 | 28 | def children(self): 29 | return self._children 30 | 31 | def remove_joints(self, joints_to_remove): 32 | """ 33 | Remove the joints specified in 'joints_to_remove'. 34 | """ 35 | valid_joints = [] 36 | for joint in range(len(self._parents)): 37 | if joint not in joints_to_remove: 38 | valid_joints.append(joint) 39 | 40 | for i in range(len(self._parents)): 41 | while self._parents[i] in joints_to_remove: 42 | self._parents[i] = self._parents[self._parents[i]] 43 | 44 | index_offsets = np.zeros(len(self._parents), dtype=int) 45 | new_parents = [] 46 | for i, parent in enumerate(self._parents): 47 | if i not in joints_to_remove: 48 | new_parents.append(parent - index_offsets[parent]) 49 | else: 50 | index_offsets[i:] += 1 51 | self._parents = np.array(new_parents) 52 | 53 | 54 | if self._joints_left is not None: 55 | new_joints_left = [] 56 | for joint in self._joints_left: 57 | if joint in valid_joints: 58 | new_joints_left.append(joint - index_offsets[joint]) 59 | self._joints_left = new_joints_left 60 | if self._joints_right is not None: 61 | new_joints_right = [] 62 | for joint in self._joints_right: 63 | if joint in valid_joints: 64 | new_joints_right.append(joint - index_offsets[joint]) 65 | self._joints_right = new_joints_right 66 | 67 | self._compute_metadata() 68 | 69 | return valid_joints 70 | 71 | def joints_left(self): 72 | return self._joints_left 73 | 74 | def joints_right(self): 75 | return self._joints_right 76 | 77 | def _compute_metadata(self): 78 | self._has_children = np.zeros(len(self._parents)).astype(bool) 79 | for i, parent in enumerate(self._parents): 80 | if parent != -1: 81 | self._has_children[parent] = True 82 | 83 | self._children = [] 84 | for i, parent in enumerate(self._parents): 85 | self._children.append([]) 86 | for i, parent in enumerate(self._parents): 87 | if parent != -1: 88 | self._children[parent].append(i) -------------------------------------------------------------------------------- /ContextPose/mvn/models/networks/refineNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class Bottleneck(nn.Module): 4 | expansion = 4 5 | 6 | def __init__(self, inplanes, planes, stride=1): 7 | super(Bottleneck, self).__init__() 8 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 9 | self.bn1 = nn.BatchNorm2d(planes) 10 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 11 | padding=1, bias=False) 12 | self.bn2 = nn.BatchNorm2d(planes) 13 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False) 14 | self.bn3 = nn.BatchNorm2d(planes * 2) 15 | self.relu = nn.ReLU(inplace=True) 16 | 17 | self.downsample = nn.Sequential( 18 | nn.Conv2d(inplanes, planes * 2, 19 | kernel_size=1, stride=stride, bias=False), 20 | nn.BatchNorm2d(planes * 2), 21 | ) 22 | 23 | self.stride = stride 24 | 25 | def forward(self, x): 26 | residual = x 27 | 28 | out = self.conv1(x) 29 | out = self.bn1(out) 30 | out = self.relu(out) 31 | 32 | out = self.conv2(out) 33 | out = self.bn2(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv3(out) 37 | out = self.bn3(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | out = self.relu(out) 44 | 45 | return out 46 | 47 | class refineNet(nn.Module): 48 | def __init__(self, lateral_channel, out_shape, num_class): 49 | super(refineNet, self).__init__() 50 | cascade = [] 51 | num_cascade = 4 52 | for i in range(num_cascade): 53 | cascade.append(self._make_layer(lateral_channel, num_cascade-i-1, out_shape)) 54 | self.cascade = nn.ModuleList(cascade) 55 | self.final_predict = self._predict(4*lateral_channel, num_class) 56 | 57 | def _make_layer(self, input_channel, num, output_shape): 58 | layers = [] 59 | for i in range(num): 60 | layers.append(Bottleneck(input_channel, 128)) 61 | layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True)) 62 | return nn.Sequential(*layers) 63 | 64 | def _predict(self, input_channel, num_class): 65 | layers = [] 66 | layers.append(Bottleneck(input_channel, 128)) 67 | layers.append(nn.Conv2d(256, num_class, 68 | kernel_size=3, stride=1, padding=1, bias=False)) 69 | layers.append(nn.BatchNorm2d(num_class)) 70 | return nn.Sequential(*layers) 71 | 72 | def forward(self, x): 73 | refine_fms = [] 74 | for i in range(4): 75 | refine_fms.append(self.cascade[i](x[i])) 76 | # for i in range(4): 77 | # print(refine_fms[i].shape) 78 | # out = torch.cat(refine_fms, dim=1) 79 | # print(out.shape) 80 | # out = self.final_predict(out) 81 | # print(out.shape) 82 | # torch.Size([256, 256, 64, 48]) 83 | # torch.Size([256, 256, 64, 48]) 84 | # torch.Size([256, 256, 64, 48]) 85 | # torch.Size([256, 256, 64, 48]) 86 | # torch.Size([256, 1024, 64, 48]) 87 | # torch.Size([256, 17, 64, 48]) 88 | return refine_fms 89 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/camera_calibration/ts5-6cameras.calib: -------------------------------------------------------------------------------- 1 | tc camera calibration v0.3 2 | camera 0 GOPR0046_intrinsics.MP4 3 | colorCorrection 4 | red 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 5 | green 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 6 | blue 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 7 | frame 0 8 | sensorSize 10.000000000 5.625000000 # in mm 9 | focalLength 8.770747185 # in mm 10 | pixelAspect 0.993236423 # y / x 11 | centerOffset -0.104908645 0.104899704 # in mm (positive values move right and down) 12 | distortionModel OpenCV 13 | distortion -0.276859611 0.131125256 -0.000360494 -0.001149441 -0.049318332 14 | origin -2104.3074 1038.6707 -4596.6367 15 | up 0.025272345 0.995038509 0.096227370 16 | right -0.939647257 -0.009210289 0.342020929 17 | 18 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_get_joints.m: -------------------------------------------------------------------------------- 1 | function [joint_idx, joint_parents_o1, joint_parents_o2, joint_names] = mpii_get_joints(joint_set_name) 2 | 3 | original_joint_idx = [10, 13, 16, 19, 22, 25, 28, 29, 31, 36, 40, 42, 43, 45, 50, 54, 56, 57, 63, 64, 69, 70, 71, 77, 78, 83, 84, 85]; % 4 | 5 | original_joint_names = {'spine3', 'spine4', 'spine2', 'spine1', 'spine', ... %5 6 | 'neck', 'head', 'head_top', 'left_shoulder', 'left_arm', 'left_forearm', ... %11 7 | 'left_hand', 'left_hand_ee', 'right_shoulder', 'right_arm', 'right_forearm', 'right_hand', ... %17 8 | 'right_hand_ee', 'left_leg_up', 'left_leg', 'left_foot', 'left_toe', 'left_ee', ... %23 9 | 'right_leg_up' , 'right_leg', 'right_foot', 'right_toe', 'right_ee'}; 10 | 11 | 12 | all_joint_names = {'spine3', 'spine4', 'spine2', 'spine', 'pelvis', ... %5 13 | 'neck', 'head', 'head_top', 'left_clavicle', 'left_shoulder', 'left_elbow', ... %11 14 | 'left_wrist', 'left_hand', 'right_clavicle', 'right_shoulder', 'right_elbow', 'right_wrist', ... %17 15 | 'right_hand', 'left_hip', 'left_knee', 'left_ankle', 'left_foot', 'left_toe', ... %23 16 | 'right_hip' , 'right_knee', 'right_ankle', 'right_foot', 'right_toe'}; 17 | 18 | 19 | %The O1 and O2 indices are relaive to the joint_idx, regardless of the joint set 20 | 21 | switch joint_set_name 22 | %For internal use only!!! 23 | case 'original' %%These give the original indices from the dumped out mddd file, the remaining joint sets are wrt the 'all' labels 24 | joint_idx = original_joint_idx; % 25 | joint_parents_o1 = [3, 1, 4, 5, 5, 2, 6, 7, 6, 9, 10, 11, 12, 6, 14, 15, 16, 17, 5, 19, 20, 21, 22, 5, 24, 25, 26, 27 ]; 26 | joint_parents_o2 = [4, 3, 5, 5, 5, 1, 2, 6, 2, 6, 9, 10, 11, 2, 6, 14, 15, 16, 4, 5, 19, 20, 21, 4, 5, 24, 25, 26]; 27 | joint_names = original_joint_names; 28 | %Use joint sets from here 29 | case 'all' 30 | joint_idx = 1:28; %These index into the joints extracted in the original set 31 | joint_parents_o1 = [3, 1, 4, 5, 5, 2, 6, 7, 6, 9, 10, 11, 12, 6, 14, 15, 16, 17, 5, 19, 20, 21, 22, 5, 24, 25, 26, 27 ]; 32 | joint_parents_o2 = [4, 3, 5, 5, 5, 1, 2, 6, 2, 6, 9, 10, 11, 2, 6, 14, 15, 16, 4, 5, 19, 20, 21, 4, 5, 24, 25, 26]; 33 | joint_names = all_joint_names; 34 | 35 | case 'cpm' %CPM Joints in CPM Order 36 | joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5]; 37 | joint_parents_o1 = [ 2, 15, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15]; 38 | joint_parents_o2 = [15, 15, 15, 2, 3, 15, 2, 6, 2, 15, 9, 2, 15, 12, 15]; 39 | joint_names = all_joint_names(joint_idx); 40 | 41 | case 'relevant' %Human3.6m joints in CPM order 42 | joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7]; 43 | joint_parents_o1 = [ 2, 16, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15, 15, 2]; 44 | joint_parents_o2 = [ 16, 15, 16, 2, 3, 16, 2, 6, 16, 15, 9, 16, 15, 12, 15, 15, 16]; 45 | joint_names = all_joint_names(joint_idx); 46 | 47 | otherwise 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/networks/globalNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import math 4 | 5 | class globalNet(nn.Module): 6 | def __init__(self, channel_settings, output_shape, num_class): 7 | super(globalNet, self).__init__() 8 | self.channel_settings = channel_settings 9 | laterals, upsamples, predict = [], [], [] 10 | for i in range(len(channel_settings)): 11 | laterals.append(self._lateral(channel_settings[i])) 12 | predict.append(self._predict(output_shape, num_class)) 13 | if i != len(channel_settings) - 1: 14 | upsamples.append(self._upsample()) 15 | self.laterals = nn.ModuleList(laterals) 16 | self.upsamples = nn.ModuleList(upsamples) 17 | self.predict = nn.ModuleList(predict) 18 | 19 | for m in self.modules(): 20 | if isinstance(m, nn.Conv2d): 21 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 22 | m.weight.data.normal_(0, math.sqrt(2. / n)) 23 | if m.bias is not None: 24 | m.bias.data.zero_() 25 | elif isinstance(m, nn.BatchNorm2d): 26 | m.weight.data.fill_(1) 27 | m.bias.data.zero_() 28 | 29 | def _lateral(self, input_size): 30 | layers = [] 31 | layers.append(nn.Conv2d(input_size, 256, 32 | kernel_size=1, stride=1, bias=False)) 33 | layers.append(nn.BatchNorm2d(256)) 34 | layers.append(nn.ReLU(inplace=True)) 35 | 36 | return nn.Sequential(*layers) 37 | 38 | def _upsample(self): 39 | layers = [] 40 | layers.append(torch.nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)) 41 | layers.append(torch.nn.Conv2d(256, 256, 42 | kernel_size=1, stride=1, bias=False)) 43 | layers.append(nn.BatchNorm2d(256)) 44 | 45 | return nn.Sequential(*layers) 46 | 47 | def _predict(self, output_shape, num_class): 48 | layers = [] 49 | layers.append(nn.Conv2d(256, 256, 50 | kernel_size=1, stride=1, bias=False)) 51 | layers.append(nn.BatchNorm2d(256)) 52 | layers.append(nn.ReLU(inplace=True)) 53 | 54 | layers.append(nn.Conv2d(256, num_class, 55 | kernel_size=3, stride=1, padding=1, bias=False)) 56 | layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True)) 57 | layers.append(nn.BatchNorm2d(num_class)) 58 | 59 | return nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | global_fms, global_outs = [], [] 63 | for i in range(len(self.channel_settings)): 64 | if i == 0: 65 | feature = self.laterals[i](x[i]) 66 | else: 67 | feature = self.laterals[i](x[i]) + up 68 | global_fms.append(feature) 69 | if i != len(self.channel_settings) - 1: 70 | up = self.upsamples[i](feature) 71 | feature = self.predict[i](feature) 72 | # global_outs.append(feature) 73 | 74 | # 0 torch.Size([512, 256, 8, 6]) 75 | # 0 torch.Size([512, 17, 64, 48]) 76 | # 1 torch.Size([512, 256, 16, 12]) 77 | # 1 torch.Size([512, 17, 64, 48]) 78 | # 2 torch.Size([512, 256, 32, 24]) 79 | # 2 torch.Size([512, 17, 64, 48]) 80 | # 3 torch.Size([512, 256, 64, 48]) 81 | # 3 torch.Size([512, 17, 64, 48]) 82 | 83 | return global_fms # , global_outs 84 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_evaluate_errors.m: -------------------------------------------------------------------------------- 1 | function [sequencewise_table, activitywise_table] = mpii_evaluate_errors(sequencewise_error, sequencewise_activity) 2 | 3 | joint_groups = mpii_get_pck_auc_joint_groups(); 4 | [~,~,~,joint_names] = mpii_get_joints('relevant'); 5 | all_errors = []; 6 | all_activities = []; 7 | sequencewise_pck = {}; 8 | sequencewise_auc = {}; 9 | nj = length(joint_names); 10 | sequencewise_mpjpe = cell(length(sequencewise_error)+1,nj+2); 11 | sequencewise_mpjpe(1,2:(nj+1)) = joint_names; 12 | sequencewise_mpjpe{1,(nj+2)} = 'Average'; 13 | %Generate MPJPE and PCK/AUC By sequence first 14 | %error_dat = {}; 15 | %delete('error_dat'); 16 | for i = 1:length(sequencewise_error) 17 | if(isempty(all_errors)) 18 | all_errors = sequencewise_error{i}(:,1,:); 19 | else 20 | all_errors = cat(3,all_errors, sequencewise_error{i}(:,1,:)); 21 | end 22 | all_activities = [all_activities; sequencewise_activity{i}(:)]; 23 | 24 | error_dat(i) = mpii_3D_error(['TestSeq' int2str(i)], sequencewise_error{i}(:,1,:)); 25 | sequencewise_mpjpe{i+1,1}= ['TestSeq' int2str(i)]; 26 | mpjpe = mean(sequencewise_error{i}(:,1,:),3); 27 | sequencewise_mpjpe(i+1,2:(nj+1)) = num2cell(mpjpe'); 28 | sequencewise_mpjpe{i+1,(nj+2)} = mean(mpjpe(:)); 29 | end 30 | [pck, auc] = mpii_compute_3d_pck(error_dat, joint_groups, []); 31 | sequencewise_pck = [sequencewise_pck; pck]; 32 | sequencewise_pck{1,1} = 'PCK'; 33 | sequencewise_auc = [sequencewise_auc; auc]; 34 | sequencewise_auc{1,1} = 'AUC'; 35 | 36 | activitywise_pck = {}; 37 | activitywise_auc = {}; 38 | activitywise_mpjpe = cell(7+2,nj+2); 39 | activitywise_mpjpe(1,2:(nj+1)) = joint_names; 40 | activitywise_mpjpe{1,(nj+2)} = 'Average'; 41 | %Generate MPJPE and PCK/AUC By activity 42 | %error_dat = {}; 43 | clear('error_dat'); 44 | for i = 1:7 45 | error_dat(i) = mpii_3D_error(mpii_get_activity_name(i), all_errors(:,:,all_activities == i)); 46 | activitywise_mpjpe{i+1,1} = mpii_get_activity_name(i); 47 | mpjpe = mean(all_errors(:,:,all_activities == i),3); 48 | activitywise_mpjpe(i+1,2:(nj+1)) = num2cell(mpjpe'); 49 | activitywise_mpjpe{i+1,(nj+2)} = mean(mpjpe(:)); 50 | end 51 | overall_mpjpe = mean(all_errors,3); 52 | activitywise_mpjpe{end,1} = 'All'; 53 | activitywise_mpjpe(end,2:(nj+1)) = num2cell(overall_mpjpe'); 54 | activitywise_mpjpe{end,(nj+2)} = mean(overall_mpjpe(:)); 55 | [pck, auc] = mpii_compute_3d_pck(error_dat, joint_groups, []); 56 | activitywise_pck = [activitywise_pck; pck]; 57 | activitywise_pck{1,1} = 'PCK'; 58 | activitywise_auc = [activitywise_auc; auc]; 59 | activitywise_auc{1,1} = 'AUC'; 60 | clear('error_dat'); 61 | error_dat(1) = mpii_3D_error('All', all_errors); 62 | [pck, auc] = mpii_compute_3d_pck(error_dat, joint_groups, []); 63 | activitywise_pck = [activitywise_pck; pck(2:end,:)]; 64 | activitywise_auc = [activitywise_auc; auc(2:end,:)]; 65 | 66 | sequencewise_table = sequencewise_mpjpe; 67 | sequencewise_table(size(sequencewise_table,1)+1:size(sequencewise_table,1)+size(sequencewise_pck,1),1:size(sequencewise_pck,2)) = sequencewise_pck; 68 | sequencewise_table(size(sequencewise_table,1)+1:size(sequencewise_table,1)+size(sequencewise_auc,1),1:size(sequencewise_auc,2)) = sequencewise_auc; 69 | activitywise_table = activitywise_mpjpe; 70 | activitywise_table(size(activitywise_table,1)+1:size(activitywise_table,1)+size(activitywise_pck,1),1:size(activitywise_pck,2)) = activitywise_pck; 71 | activitywise_table(size(activitywise_table,1)+1:size(activitywise_table,1)+size(activitywise_auc,1),1:size(activitywise_auc,2)) = activitywise_auc; 72 | 73 | 74 | 75 | end -------------------------------------------------------------------------------- /H36M-Toolbox/download_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from subprocess import call 4 | from os import path, makedirs 5 | import hashlib 6 | from tqdm import tqdm 7 | import configparser 8 | import requests 9 | 10 | 11 | BASE_URL = 'http://vision.imar.ro/human3.6m/filebrowser.php' 12 | 13 | subjects = [ 14 | ('S1', 1), 15 | ('S5', 6), 16 | ('S6', 7), 17 | ('S7', 2), 18 | ('S8', 3), 19 | ('S9', 4), 20 | ('S11', 5), 21 | ] 22 | 23 | 24 | def md5(filename): 25 | hash_md5 = hashlib.md5() 26 | with open(filename, 'rb') as f: 27 | for chunk in iter(lambda: f.read(4096), b''): 28 | hash_md5.update(chunk) 29 | return hash_md5.hexdigest() 30 | 31 | 32 | def download_file(url, dest_file, phpsessid): 33 | call(['axel', 34 | '-a', 35 | '-n', '24', 36 | '-H', 'COOKIE: PHPSESSID=' + phpsessid, 37 | '-o', dest_file, 38 | url]) 39 | 40 | 41 | def get_phpsessid(): 42 | config = configparser.ConfigParser() 43 | config.read('config.ini') 44 | try: 45 | phpsessid = config['General']['PHPSESSID'] 46 | except (KeyError, configparser.NoSectionError): 47 | print('Could not read PHPSESSID from `config.ini`.') 48 | phpsessid = input('Enter PHPSESSID: ') 49 | return phpsessid 50 | 51 | 52 | def verify_phpsessid(phpsessid): 53 | requests.packages.urllib3.disable_warnings() 54 | test_url = 'http://vision.imar.ro/human3.6m/filebrowser.php' 55 | resp = requests.get(test_url, verify=False, cookies=dict(PHPSESSID=phpsessid)) 56 | fail_message = 'Failed to verify your PHPSESSID. Please ensure that you ' \ 57 | 'are currently logged in at http://vision.imar.ro/human3.6m/ ' \ 58 | 'and that you have copied the PHPSESSID cookie correctly.' 59 | assert resp.url == test_url, fail_message 60 | 61 | 62 | def download_all(phpsessid): 63 | checksums = {} 64 | with open('checksums.txt', 'r') as f: 65 | for line in f.read().splitlines(keepends=False): 66 | v, k = line.split(' ') 67 | checksums[k] = v 68 | 69 | files = [] 70 | for subject_id, id in subjects: 71 | files += [ 72 | ('Poses_D2_Positions_{}.tgz'.format(subject_id), 73 | 'download=1&filepath=Poses/D2_Positions&filename=SubjectSpecific_{}.tgz'.format(id)), 74 | ('Poses_D3_Positions_{}.tgz'.format(subject_id), 75 | 'download=1&filepath=Poses/D3_Positions&filename=SubjectSpecific_{}.tgz'.format(id)), 76 | ('Poses_D3_Positions_mono_{}.tgz'.format(subject_id), 77 | 'download=1&filepath=Poses/D3_Positions_mono&filename=SubjectSpecific_{}.tgz'.format(id)), 78 | ('Poses_D3_Positions_mono_universal_{}.tgz'.format(subject_id), 79 | 'download=1&filepath=Poses/D3_Positions_mono_universal&filename=SubjectSpecific_{}.tgz'.format(id)), 80 | ('Videos_{}.tgz'.format(subject_id), 81 | 'download=1&filepath=Videos&filename=SubjectSpecific_{}.tgz'.format(id)), 82 | ] 83 | 84 | out_dir = 'archives' 85 | makedirs(out_dir, exist_ok=True) 86 | 87 | for filename, query in tqdm(files, ascii=True): 88 | out_file = path.join(out_dir, filename) 89 | 90 | if path.isfile(out_file): 91 | checksum = md5(out_file) 92 | if checksums.get(out_file, None) == checksum: 93 | continue 94 | 95 | download_file(BASE_URL + '?' + query, out_file, phpsessid) 96 | 97 | 98 | if __name__ == '__main__': 99 | phpsessid = get_phpsessid() 100 | verify_phpsessid(phpsessid) 101 | download_all(phpsessid) 102 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/get_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to fetch and unzip the available data. 3 | # It uses config as the configuration file (duh!) 4 | echo "Reading configuration from ./config....." >&2 5 | source ./conf.ig 6 | if [[ $ready_to_download -eq 0 ]]; then 7 | echo "Please read the documentation and edit the config file accordingly." >&2 8 | exit 1 9 | fi 10 | if [ ! -d "$destination" ]; then 11 | mkdir "$destination" 12 | fi 13 | seq_sets=('imageSequence') 14 | if [[ $download_masks -eq 1 ]]; then 15 | seq_sets=('imageSequence' 'FGmasks' 'ChairMasks') 16 | fi 17 | source_path="http://gvv.mpi-inf.mpg.de/3dhp-dataset" 18 | echo "Download destination set to $destination " >&2 19 | 20 | for subject in ${subjects[@]}; do 21 | if [ ! -d "$destination/S$subject" ]; then 22 | mkdir "$destination/S$subject" 23 | fi 24 | for seq in 1 2; do 25 | if [ ! -d "$destination/S$subject/Seq$seq" ]; then 26 | mkdir "$destination/S$subject/Seq$seq" 27 | fi 28 | echo "Downloading Subject $subject, Sequence $seq ... " >&2 29 | wget "$source_path/S$subject/Seq$seq/annot.mat" 30 | mv "./annot.mat" "$destination/S$subject/Seq$seq/annot.mat" 31 | wget "$source_path/S$subject/Seq$seq/camera.calibration" 32 | mv "./camera.calibration" "$destination/S$subject/Seq$seq/camera.calibration" 33 | 34 | #Download the videos first, and then unzip them 35 | for im in "${seq_sets[@]}"; do 36 | echo "... $im ... " >&2 37 | if [ ! -d "$destination/S$subject/Seq$seq/$im" ]; then 38 | mkdir "$destination/S$subject/Seq$seq/$im" 39 | fi 40 | #One could check here if the downloaded videos are available unzipped, but whatever, download if 41 | #zip is missing 42 | if [ ! -f "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" ]; then 43 | wget "$source_path/S$subject/Seq$seq/$im/vnect_cameras.zip" 44 | mv "./vnect_cameras.zip" "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" 45 | fi 46 | if [ $download_extra_wall_cameras -ne 0 ]; then 47 | if [ ! -f "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" ]; then 48 | wget "$source_path/S$subject/Seq$seq/$im/other_angled_cameras.zip" 49 | mv "./other_angled_cameras.zip" "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" 50 | fi 51 | fi 52 | if [ $download_extra_ceiling_cameras -ne 0 ]; then 53 | if [ ! -f "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" ]; then 54 | wget "$source_path/S$subject/Seq$seq/$im/ceiling_cameras.zip" 55 | mv "./ceiling_cameras.zip" "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" 56 | fi 57 | fi 58 | done 59 | #Unzip the videos now 60 | for im in "${seq_sets[@]}"; do 61 | echo "... $im ... " >&2 62 | if [ ! -d "$destination/S$subject/Seq$seq/$im" ]; then 63 | mkdir "$destination/S$subject/Seq$seq/$im" 64 | fi 65 | if [ -f "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" ]; then 66 | unzip -j "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" -d "$destination/S$subject/Seq$seq/$im/" 67 | rm "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" 68 | fi 69 | if [ -f "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" ]; then 70 | unzip -j "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" -d "$destination/S$subject/Seq$seq/$im/" 71 | rm "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" 72 | fi 73 | if [ -f "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" ]; then 74 | unzip -j "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" -d "$destination/S$subject/Seq$seq/$im/" 75 | rm "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" 76 | fi 77 | done 78 | 79 | done #Seq 80 | done #Subject 81 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_get_sequence_info.m: -------------------------------------------------------------------------------- 1 | function [bg_augmentable, ub_augmentable, lb_augmentable, chair_augmentable, fps, num_frames] = mpii_get_sequence_info(subject_id, sequence) 2 | ub_augmentable = false; 3 | lb_augmentable = false; 4 | bg_augmentable = false; 5 | chair_augmentable = false; 6 | fps = 25; 7 | switch subject_id 8 | case 1 9 | switch sequence 10 | case 1 11 | bg_augmentable = true; 12 | chair_augmentable = true; 13 | num_frames = 6416; 14 | case 2 15 | ub_augmentable = true; %The LB masks are bad, so skip putting textures there and in the BG 16 | chair_augmentable = true; 17 | num_frames = 12430; 18 | fps = 50; 19 | otherwise 20 | end 21 | case 2 22 | switch sequence 23 | case 1 24 | bg_augmentable = true; 25 | chair_augmentable = true; 26 | num_frames = 6502; 27 | case 2 28 | bg_augmentable = true; 29 | chair_augmentable = true; 30 | ub_augmentable = true; 31 | lb_augmentable = true; 32 | num_frames = 6081; 33 | end 34 | case 3 35 | switch sequence 36 | fps = 50; 37 | case 1 38 | bg_augmentable = true; 39 | chair_augmentable = true; 40 | num_frames = 12488; 41 | case 2 42 | bg_augmentable = true; 43 | chair_augmentable = true; 44 | ub_augmentable = true; 45 | lb_augmentable = true; 46 | num_frames = 12283; 47 | end 48 | case 4 49 | switch sequence 50 | case 1 51 | bg_augmentable = true; 52 | chair_augmentable = true; 53 | num_frames = 6171; 54 | case 2 55 | chair_augmentable = true; %The LB masks are bad, so skip putting textures there and in the BG 56 | ub_augmentable = true; 57 | num_frames = 6675; 58 | end 59 | case 5 60 | switch sequence 61 | fps = 50; 62 | case 1 63 | bg_augmentable = true; 64 | chair_augmentable = true; 65 | num_frames = 12820; 66 | case 2 67 | chair_augmentable = true; 68 | ub_augmentable = true; 69 | bg_augmentable = true; 70 | lb_augmentable = true; 71 | num_frames = 12312; 72 | otherwise 73 | end 74 | case 6 75 | switch sequence 76 | case 1 77 | bg_augmentable = true; 78 | chair_augmentable = true; 79 | num_frames = 6188; 80 | case 2 81 | ub_augmentable = true; 82 | lb_augmentable = true; 83 | bg_augmentable = true; 84 | chair_augmentable = true; 85 | num_frames = 6145; 86 | otherwise 87 | end 88 | case 7 89 | switch sequence 90 | case 1 91 | bg_augmentable = true; 92 | chair_augmentable = true; 93 | ub_augmentable = true; 94 | lb_augmentable = true; 95 | num_frames = 6239; 96 | case 2 97 | bg_augmentable = true; 98 | chair_augmentable = true; 99 | num_frames = 6320; 100 | end 101 | case 8 102 | switch sequence 103 | case 1 104 | bg_augmentable = true; 105 | chair_augmentable = true; 106 | ub_augmentable = true; 107 | lb_augmentable = true; 108 | num_frames = 6468; 109 | case 2 110 | bg_augmentable = true; 111 | chair_augmentable = true; 112 | num_frames = 6054; 113 | end 114 | end 115 | end 116 | -------------------------------------------------------------------------------- /ContextPose_mpi/3dhp_test/test_util/mpii_compute_3d_pck.m: -------------------------------------------------------------------------------- 1 | function [pck_table, auc_table] = mpii_compute_3d_pck(error_data, joint_groups, output_base_path) 2 | 3 | %Input 4 | %error_data is a struct array of type mpii_3d_error 5 | %The struct zcarries information about the name of the method as well as an 6 | %nj x 1 x nf matrix with the joint errors. 7 | %joint_groups is an ng x 2 cell, where ng is the number of groups. It 8 | %carries the name of the group as well as the indices of the joints that 9 | %belong to the group. 10 | 11 | %If the error_data array has multiple inputs, there are additional 12 | %comparative AUC plots output per joint in addition to the individual ones. 13 | ng = size(joint_groups,1); 14 | 15 | 16 | pck_curve_array = cell(length(error_data), ng+1); %Contains the PCK results per joint group, per error_data cell 17 | pck_array = cell(length(error_data), ng+1); %Contains the AUC results per joint group 18 | auc_array = cell(length(error_data), ng+1); %Contains the AUC results per joint group 19 | %thresh = 0:5:200; 20 | thresh = 0:5:150; 21 | pck_thresh = 150; 22 | 23 | 24 | for i = 1:length(error_data) 25 | joint_count = 0; 26 | nf = size(error_data(i).error,3); 27 | for j = 1:ng 28 | for ti =1:length(thresh) 29 | t = thresh(ti); 30 | pck_curve_array{i,j} = [pck_curve_array{i,j}, sum(sum(error_data(i).error(joint_groups{j,2},1,:) < t, 3),1) / (length(joint_groups{j,2}) *nf)]; 31 | end 32 | 33 | joint_count = joint_count + length(joint_groups{j,2}); 34 | if(isempty(pck_curve_array{i,ng+1})) 35 | pck_curve_array{i,ng+1} = pck_curve_array{i,j} * length(joint_groups{j,2}); 36 | else 37 | pck_curve_array{i,ng+1} = pck_curve_array{i,ng+1} + pck_curve_array{i,j} * length(joint_groups{j,2}); 38 | end 39 | auc_array{i,j} = 100* sum(pck_curve_array{i,j}(:))/ length(thresh); 40 | pck_array{i,j} = 100* sum(sum(error_data(i).error(joint_groups{j,2},1,:) < pck_thresh, 3),1) / (length(joint_groups{j,2}) *nf); 41 | if(isempty(pck_array{i,ng+1})) 42 | pck_array{i,ng+1} = pck_array{i,j} * length(joint_groups{j,2}); 43 | else 44 | pck_array{i,ng+1} = pck_array{i,ng+1} + pck_array{i,j} * length(joint_groups{j,2}); 45 | end 46 | end 47 | pck_array{i,ng+1} = pck_array{i,ng+1} / joint_count; 48 | pck_curve_array{i,ng+1} = pck_curve_array{i,ng+1} / joint_count; 49 | auc_array{i,ng+1} = 100* sum(pck_curve_array{i,ng+1}(:))/ length(thresh); 50 | end 51 | 52 | pck_table = cell(length(error_data)+1, ng+2); 53 | pck_table{1,ng+2} = 'Total'; 54 | for i = 1:length(error_data) 55 | pck_table{1+i,1} = error_data(i).method; 56 | end 57 | for i = 1:ng 58 | pck_table{1,i+1} = joint_groups{i,1}; 59 | end 60 | auc_table = pck_table; 61 | auc_table(2:end,2:end) = auc_array; 62 | pck_table(2:end,2:end) = pck_array; 63 | 64 | 65 | if(~isempty(output_base_path)) 66 | %Generate and save plots to output_path 67 | %First generate individual plots from each row of the pck_curve_array 68 | colormap default; 69 | 70 | for i = 1:length(error_data) 71 | all_plot = []; 72 | for j = 1:ng+1 73 | figure(1); 74 | cla; 75 | plot(thresh,pck_curve_array{i,j},'LineWidth',2); 76 | all_plot = [all_plot; pck_curve_array{i,j}]; 77 | axis([0 150 0 1]); 78 | title([pck_table{1,j+1} ' PCK150mm']); 79 | output_dir = [output_base_path filesep error_data(i).method]; 80 | if(exist(output_dir,'dir') ~= 7) 81 | mkdir(output_dir); 82 | end 83 | saveas(gcf,[output_dir filesep pck_table{1,j+1}], 'fig'); 84 | saveas(gcf,[output_dir filesep pck_table{1,j+1}], 'svg'); 85 | saveas(gcf,[output_dir filesep pck_table{1,j+1}], 'png'); 86 | 87 | end 88 | figure(2); 89 | cla; 90 | plot(thresh,all_plot,'LineWidth',2); 91 | axis([0 150 0 1]); 92 | hold off; 93 | legend(pck_table(1,2:end)); 94 | saveas(gcf,[output_dir filesep 'All'], 'fig'); 95 | saveas(gcf,[output_dir filesep 'All'], 'svg'); 96 | saveas(gcf,[output_dir filesep 'All'], 'png'); 97 | end 98 | end 99 | 100 | end 101 | %Then group the plots by methods 102 | 103 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | 14 | from yacs.config import CfgNode as CN 15 | 16 | 17 | _C = CN() 18 | 19 | _C.OUTPUT_DIR = '' 20 | _C.LOG_DIR = '' 21 | _C.DATA_DIR = '' 22 | _C.GPUS = (0,) 23 | _C.WORKERS = 4 24 | _C.PRINT_FREQ = 20 25 | _C.AUTO_RESUME = False 26 | _C.PIN_MEMORY = True 27 | _C.RANK = 0 28 | 29 | # Cudnn related params 30 | _C.CUDNN = CN() 31 | _C.CUDNN.BENCHMARK = True 32 | _C.CUDNN.DETERMINISTIC = False 33 | _C.CUDNN.ENABLED = True 34 | 35 | # common params for NETWORK 36 | _C.MODEL = CN() 37 | _C.MODEL.NAME = 'pose_hrnet' 38 | _C.MODEL.INIT_WEIGHTS = True 39 | _C.MODEL.PRETRAINED = '' 40 | _C.MODEL.NUM_JOINTS = 17 41 | _C.MODEL.TAG_PER_JOINT = True 42 | _C.MODEL.TARGET_TYPE = 'gaussian' 43 | _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 44 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 45 | _C.MODEL.SIGMA = 2 46 | _C.MODEL.EXTRA = CN(new_allowed=True) 47 | 48 | _C.LOSS = CN() 49 | _C.LOSS.USE_OHKM = False 50 | _C.LOSS.TOPK = 8 51 | _C.LOSS.USE_TARGET_WEIGHT = True 52 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 53 | 54 | # DATASET related params 55 | _C.DATASET = CN() 56 | _C.DATASET.ROOT = '' 57 | _C.DATASET.DATASET = 'mpii' 58 | _C.DATASET.TRAIN_SET = 'train' 59 | _C.DATASET.TEST_SET = 'valid' 60 | _C.DATASET.DATA_FORMAT = 'jpg' 61 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 62 | _C.DATASET.SELECT_DATA = False 63 | 64 | # training data augmentation 65 | _C.DATASET.FLIP = True 66 | _C.DATASET.SCALE_FACTOR = 0.25 67 | _C.DATASET.ROT_FACTOR = 30 68 | _C.DATASET.PROB_HALF_BODY = 0.0 69 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 70 | _C.DATASET.COLOR_RGB = False 71 | 72 | # train 73 | _C.TRAIN = CN() 74 | 75 | _C.TRAIN.LR_FACTOR = 0.1 76 | _C.TRAIN.LR_STEP = [90, 110] 77 | _C.TRAIN.LR = 0.001 78 | 79 | _C.TRAIN.OPTIMIZER = 'adam' 80 | _C.TRAIN.MOMENTUM = 0.9 81 | _C.TRAIN.WD = 0.0001 82 | _C.TRAIN.NESTEROV = False 83 | _C.TRAIN.GAMMA1 = 0.99 84 | _C.TRAIN.GAMMA2 = 0.0 85 | 86 | _C.TRAIN.BEGIN_EPOCH = 0 87 | _C.TRAIN.END_EPOCH = 140 88 | 89 | _C.TRAIN.RESUME = False 90 | _C.TRAIN.CHECKPOINT = '' 91 | 92 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 93 | _C.TRAIN.SHUFFLE = True 94 | 95 | # testing 96 | _C.TEST = CN() 97 | 98 | # size of images for each device 99 | _C.TEST.BATCH_SIZE_PER_GPU = 32 100 | # Test Model Epoch 101 | _C.TEST.FLIP_TEST = False 102 | _C.TEST.POST_PROCESS = False 103 | _C.TEST.SHIFT_HEATMAP = False 104 | 105 | _C.TEST.USE_GT_BBOX = False 106 | 107 | # nms 108 | _C.TEST.IMAGE_THRE = 0.1 109 | _C.TEST.NMS_THRE = 0.6 110 | _C.TEST.SOFT_NMS = False 111 | _C.TEST.OKS_THRE = 0.5 112 | _C.TEST.IN_VIS_THRE = 0.0 113 | _C.TEST.COCO_BBOX_FILE = '' 114 | _C.TEST.BBOX_THRE = 1.0 115 | _C.TEST.MODEL_FILE = '' 116 | 117 | # debug 118 | _C.DEBUG = CN() 119 | _C.DEBUG.DEBUG = False 120 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 121 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 122 | _C.DEBUG.SAVE_HEATMAPS_GT = False 123 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 124 | 125 | 126 | def update_config(cfg, args): 127 | cfg.defrost() 128 | cfg.merge_from_file(args.cfg) 129 | cfg.merge_from_list(args.opts) 130 | 131 | if args.modelDir: 132 | cfg.OUTPUT_DIR = args.modelDir 133 | 134 | if args.logDir: 135 | cfg.LOG_DIR = args.logDir 136 | 137 | if args.dataDir: 138 | cfg.DATA_DIR = args.dataDir 139 | 140 | cfg.DATASET.ROOT = os.path.join( 141 | cfg.DATA_DIR, cfg.DATASET.ROOT 142 | ) 143 | 144 | cfg.MODEL.PRETRAINED = os.path.join( 145 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 146 | ) 147 | 148 | if cfg.TEST.MODEL_FILE: 149 | cfg.TEST.MODEL_FILE = os.path.join( 150 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 151 | ) 152 | 153 | cfg.freeze() 154 | 155 | 156 | if __name__ == '__main__': 157 | import sys 158 | with open(sys.argv[1], 'w') as f: 159 | print(_C, file=f) 160 | 161 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/humaneva_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import numpy as np 9 | import copy 10 | from common.skeleton import Skeleton 11 | from common.mocap_dataset import MocapDataset 12 | from common.camera import normalize_screen_coordinates, image_coordinates 13 | 14 | humaneva_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 1, 5, 6, 0, 8, 9, 0, 11, 12, 1], 15 | joints_left=[2, 3, 4, 8, 9, 10], 16 | joints_right=[5, 6, 7, 11, 12, 13]) 17 | 18 | humaneva_cameras_intrinsic_params = [ 19 | { 20 | 'id': 'C1', 21 | 'res_w': 640, 22 | 'res_h': 480, 23 | 'azimuth': 0, # Only used for visualization 24 | }, 25 | { 26 | 'id': 'C2', 27 | 'res_w': 640, 28 | 'res_h': 480, 29 | 'azimuth': -90, # Only used for visualization 30 | }, 31 | { 32 | 'id': 'C3', 33 | 'res_w': 640, 34 | 'res_h': 480, 35 | 'azimuth': 90, # Only used for visualization 36 | }, 37 | ] 38 | 39 | humaneva_cameras_extrinsic_params = { 40 | 'S1': [ 41 | { 42 | 'orientation': [0.424207, -0.4983646, -0.5802981, 0.4847012], 43 | 'translation': [4062.227, 663.2477, 1528.397], 44 | }, 45 | { 46 | 'orientation': [0.6503354, -0.7481602, -0.0919284, 0.0941766], 47 | 'translation': [844.8131, -3805.2092, 1504.9929], 48 | }, 49 | { 50 | 'orientation': [0.0664734, -0.0690535, 0.7416416, -0.6639132], 51 | 'translation': [-797.67377, 3916.3174, 1433.6602], 52 | }, 53 | ], 54 | 'S2': [ 55 | { 56 | 'orientation': [ 0.4214752, -0.4961493, -0.5838273, 0.4851187 ], 57 | 'translation': [ 4112.9121, 626.4929, 1545.2988], 58 | }, 59 | { 60 | 'orientation': [ 0.6501393, -0.7476588, -0.0954617, 0.0959808 ], 61 | 'translation': [ 923.5740, -3877.9243, 1504.5518], 62 | }, 63 | { 64 | 'orientation': [ 0.0699353, -0.0712403, 0.7421637, -0.662742 ], 65 | 'translation': [ -781.4915, 3838.8853, 1444.9929], 66 | }, 67 | ], 68 | 'S3': [ 69 | { 70 | 'orientation': [ 0.424207, -0.4983646, -0.5802981, 0.4847012 ], 71 | 'translation': [ 4062.2271, 663.2477, 1528.3970], 72 | }, 73 | { 74 | 'orientation': [ 0.6503354, -0.7481602, -0.0919284, 0.0941766 ], 75 | 'translation': [ 844.8131, -3805.2092, 1504.9929], 76 | }, 77 | { 78 | 'orientation': [ 0.0664734, -0.0690535, 0.7416416, -0.6639132 ], 79 | 'translation': [ -797.6738, 3916.3174, 1433.6602], 80 | }, 81 | ], 82 | 'S4': [ 83 | {}, 84 | {}, 85 | {}, 86 | ], 87 | 88 | } 89 | 90 | class HumanEvaDataset(MocapDataset): 91 | def __init__(self, path): 92 | super().__init__(fps=60, skeleton=humaneva_skeleton) 93 | 94 | self._cameras = copy.deepcopy(humaneva_cameras_extrinsic_params) 95 | for cameras in self._cameras.values(): 96 | for i, cam in enumerate(cameras): 97 | cam.update(humaneva_cameras_intrinsic_params[i]) 98 | for k, v in cam.items(): 99 | if k not in ['id', 'res_w', 'res_h']: 100 | cam[k] = np.array(v, dtype='float32') 101 | if 'translation' in cam: 102 | cam['translation'] = cam['translation']/1000 # mm to meters 103 | 104 | for subject in list(self._cameras.keys()): 105 | data = self._cameras[subject] 106 | del self._cameras[subject] 107 | for prefix in ['Train/', 'Validate/', 'Unlabeled/Train/', 'Unlabeled/Validate/', 'Unlabeled/']: 108 | self._cameras[prefix + subject] = data 109 | 110 | # Load serialized dataset 111 | data = np.load(path, allow_pickle=True)['positions_3d'].item() 112 | 113 | self._data = {} 114 | for subject, actions in data.items(): 115 | self._data[subject] = {} 116 | for action_name, positions in actions.items(): 117 | self._data[subject][action_name] = { 118 | 'positions': positions, 119 | 'cameras': self._cameras[subject], 120 | } 121 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/model_stmo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from model.block.vanilla_transformer_encoder import Transformer 4 | from model.block.strided_transformer_encoder import Transformer as Transformer_reduce 5 | 6 | class Linear(nn.Module): 7 | def __init__(self, linear_size, p_dropout=0.25): 8 | super(Linear, self).__init__() 9 | self.l_size = linear_size 10 | 11 | self.relu = nn.LeakyReLU(0.2, inplace=True) 12 | self.dropout = nn.Dropout(p_dropout) 13 | 14 | #self.w1 = nn.Linear(self.l_size, self.l_size) 15 | self.w1 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1) 16 | self.batch_norm1 = nn.BatchNorm1d(self.l_size) 17 | 18 | #self.w2 = nn.Linear(self.l_size, self.l_size) 19 | self.w2 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1) 20 | self.batch_norm2 = nn.BatchNorm1d(self.l_size) 21 | 22 | def forward(self, x): 23 | y = self.w1(x) 24 | y = self.batch_norm1(y) 25 | y = self.relu(y) 26 | y = self.dropout(y) 27 | 28 | y = self.w2(y) 29 | y = self.batch_norm2(y) 30 | y = self.relu(y) 31 | y = self.dropout(y) 32 | 33 | out = x + y 34 | 35 | return out 36 | 37 | class FCBlock(nn.Module): 38 | 39 | def __init__(self, channel_in, channel_out, linear_size, block_num): 40 | super(FCBlock, self).__init__() 41 | 42 | self.linear_size = linear_size 43 | self.block_num = block_num 44 | self.layers = [] 45 | self.channel_in = channel_in 46 | self.stage_num = 3 47 | self.p_dropout = 0.1 48 | #self.fc_1 = nn.Linear(self.channel_in, self.linear_size) 49 | self.fc_1 = nn.Conv1d(self.channel_in, self.linear_size, kernel_size=1) 50 | self.bn_1 = nn.BatchNorm1d(self.linear_size) 51 | for i in range(block_num): 52 | self.layers.append(Linear(self.linear_size, self.p_dropout)) 53 | #self.fc_2 = nn.Linear(self.linear_size, channel_out) 54 | self.fc_2 = nn.Conv1d(self.linear_size, channel_out, kernel_size=1) 55 | 56 | self.layers = nn.ModuleList(self.layers) 57 | self.relu = nn.LeakyReLU(0.2, inplace=True) 58 | self.dropout = nn.Dropout(self.p_dropout) 59 | 60 | def forward(self, x): 61 | 62 | x = self.fc_1(x) 63 | x = self.bn_1(x) 64 | x = self.relu(x) 65 | x = self.dropout(x) 66 | for i in range(self.block_num): 67 | x = self.layers[i](x) 68 | x = self.fc_2(x) 69 | 70 | return x 71 | 72 | class PoseTransformer(nn.Module): 73 | def __init__(self, args): 74 | super().__init__() 75 | 76 | layers, channel, d_hid, length = 3, 256, 512, 9#args.frames 77 | stride_num = { 78 | '9': [1, 3, 3], 79 | '27': [3, 3, 3], 80 | '351': [3, 9, 13], 81 | '81': [3, 3, 3, 3], 82 | '243': [3, 3, 3, 3, 3], 83 | } 84 | stride_num = stride_num[str(length)] 85 | self.num_joints_in, self.num_joints_out = 17, 17 86 | 87 | self.encoder = FCBlock(2*self.num_joints_in, channel, 2*channel, 1) 88 | 89 | self.Transformer = Transformer(layers, channel, d_hid, length=length) 90 | self.Transformer_reduce = Transformer_reduce(len(stride_num), channel, d_hid, \ 91 | length=length, stride_num=stride_num) 92 | 93 | self.fcn = nn.Sequential( 94 | nn.BatchNorm1d(channel, momentum=0.1), 95 | nn.Conv1d(channel, 3*self.num_joints_out, kernel_size=1) 96 | ) 97 | 98 | self.fcn_1 = nn.Sequential( 99 | nn.BatchNorm1d(channel, momentum=0.1), 100 | nn.Conv1d(channel, 3*self.num_joints_out, kernel_size=1) 101 | ) 102 | 103 | def forward(self, x): 104 | x = x[:, :, :, :, 0].permute(0, 2, 3, 1).contiguous() 105 | x_shape = x.shape 106 | 107 | x = x.view(x.shape[0], x.shape[1], -1) 108 | x = x.permute(0, 2, 1).contiguous() 109 | 110 | x = self.encoder(x) 111 | 112 | x = x.permute(0, 2, 1).contiguous() 113 | x = self.Transformer(x) 114 | 115 | x_VTE = x 116 | x_VTE = x_VTE.permute(0, 2, 1).contiguous() 117 | x_VTE = self.fcn_1(x_VTE) 118 | 119 | x_VTE = x_VTE.view(x_shape[0], self.num_joints_out, -1, x_VTE.shape[2]) 120 | x_VTE = x_VTE.permute(0, 2, 3, 1).contiguous().unsqueeze(dim=-1) 121 | 122 | x = self.Transformer_reduce(x) 123 | x = x.permute(0, 2, 1).contiguous() 124 | x = self.fcn(x) 125 | 126 | x = x.view(x_shape[0], self.num_joints_out, -1, x.shape[2]) 127 | x = x.permute(0, 2, 3, 1).contiguous().unsqueeze(dim=-1) 128 | 129 | return x, x_VTE 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /ContextPose_mpi/dataset/mpi_inf_3dhp/README.txt: -------------------------------------------------------------------------------- 1 | #################### 2 | MPI-INF-3DHP Dataset 3 | #################### 4 | 5 | Terms of use: 6 | The provided dataset is intended for research purposes only and any use of it for non-scientific and/or commercial means is not allowed. This includes publishing any scientific 7 | results obtained with our data in non-scientific literature, such as tabloid press. We ask the user to respect our actors and not to use the data for any distasteful manipulations. 8 | If you use our training or test data, you are required to cite the origin: 9 | [1] VNect: Real-time 3D Human Pose Estimation With A Single RGB Camera (ACM Trans. on Graphics, SIGGRAPH 2017) 10 | Mehta, D.; Sridhar, S.; Sotnycheno, O.; Rhodin, H.; Shafiei, M.; Seidel, H.; Xu, W.; Casas, D.; Theobalt, C. 11 | [2] Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision (3DV 2017) 12 | Mehta, D.; Rhodin, H.; Casas, D.; Fua, P.; Sotnychenko, O.; Xu, W.; Theobalt, C. 13 | 14 | Refer to the license (license.txt) distributed with the data. 15 | 16 | ######################## 17 | Downloading the Dataset 18 | ######################## 19 | Use the script get_dataset.sh to download the training set and get_testset.sh for the test set. You would need to read and review the configuration under conf.ig before you can proceed with downloading the dataset 20 | 21 | #################### 22 | Training Set Details 23 | #################### 24 | The dataset comprises of 8 subjects, covering the following 8 activities with 2 sequences per subject. 25 | Sequence 1: 26 | A1: Walking/Standing 27 | Walking, jogging, waiting in a queue, pointing at things, having an animated conversation, smoking while standing or walking, phone call etc 28 | A2: Exercise 29 | Lunges, pushups, bridge, stretch legs, other forms of slow exercise 30 | A3: Sitting(1) 31 | Eating, working at a computer, picking something off the floor, lie back on the chair, cross feet, sit with hands behind head etc 32 | A4: Crouch/Reach 33 | Crouch and pretend lift something, tie shoe laces, photography while crouching, crouch and interact with objects etc 34 | 35 | Sequence 2: 36 | A5: On the Floor 37 | Cycling, crunches and other complicated poses while lying on the ground. 38 | A6: Sports 39 | Boxing, tennis, golf, soccer and other forms of fast motion. Slightly awkward because the green screen covering the floor didn't have traction on the floor. 40 | A7: Sitting(2) 41 | Move the chair around while seated, wave someone over, cheer for sports team, animated conversation with someone, cross legs etc 42 | A8: Miscellaneous 43 | Dance, jump, walk hand in hand with another (pretend) person, etc 44 | with 2 sets of clothing each. There is at least 1 clothing set per subject that is unique to that subject. 45 | 46 | Each sequence is roughly 4 minutes, with each activity taking roughly 1 minute. 47 | 48 | Each subject wears a different set of clothing in the two sequences. At least 1 set of clothing per subject is unique to that subject. 49 | 50 | The dataset was recorded in a green screen studio with 14 cameras. The dataset has segmentation masks available for the background, for the chair, and for upper body and lower body clothing. 51 | Use mpii_get_sequence_info to get information about which masks are available for each subject-sequence combination. The same function also provides information about the frame rate of the videos in the sequence, as well as the number of frames available per video. 52 | 53 | The dataset is organized in the following hierarchy. 54 | SX: Where X is the subject ID (1 to 8) 55 | SeqY: Where Y is the sequence number (1 or 2) 56 | ChairMasks: Masks for the chair. This mask is encoded in the Red channel. 57 | FGmaks: Masks for the green screen, and the lower body and the upper body when available. See mpii_get_sequence_info for available augmentation opportunities. The green screen mask is in the Red channel, and when available, the green channel carries the upper body mask and the blue channel carries the lower body mask. It may be helpful to apply some gaussian smoothing to the masks when using them. 58 | imageSequence: The RGB frames. 59 | annot.mat: Body joint annotations in each camera's coordinate system. There are 2D pose annotations, 3D pose annotations and normalized 3D pose annotations (universal) available for each camera. For information about the joint order, joint labels and the joint subsets used in various projects, refer to mpii_get_joint_set. The file also contains the camera correspondence for each annotation cell (cameras, indexed with 0). For the camera subsets used in various projects, refer to mpii_get_camera_set. The 3D annotations (annot3) when reprojected into the image match the 2D annotations (annot2), however the same is not true of the normalized 3D annotations (univ_annot3). The 2D annotations (annot2) for each frame are arranged in a single row as x1,y1,x2,y2..,xj,yj, while the 3D annotations (annot3,univ_annot3) are arranged as x1,y1,z1,x2,y2,z2..,xj,yj,zj.. The file also contains the frame number correspondence for each row of annotations (frames). Though rare, it is possible that some sequences have a few frames missing at the end but annotations still available. Thus it is advisable to get the frame count F from mpii_get_sequence_info and read only the first F rows of annotations. 60 | camera.calibration: Camera calibration parameters 61 | 62 | 63 | The image frames of the dataset come in the form of video sequences, which are further grouped by common camera sets for ease of distribution. Before using the data, it is recommended to convert the videos back to image sequences using ffmpeg (ffmpeg -i "/video_X.avi" -qscale:v 1 "/img_X_%%06d.jpg") to ensure valid correspondence between the annotations and the frames. 64 | -------------------------------------------------------------------------------- /ContextPose_mpi/common/opt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import math 4 | import time 5 | import torch 6 | 7 | class opts(): 8 | def __init__(self): 9 | self.parser = argparse.ArgumentParser() 10 | 11 | def init(self): 12 | self.parser.add_argument('--backbone', default='hrnet_48', type=str) 13 | self.parser.add_argument('--layers', default=3, type=int) 14 | self.parser.add_argument('--channel', default=256, type=int) 15 | self.parser.add_argument('--d_hid', default=512, type=int) 16 | self.parser.add_argument('--dataset', type=str, default='h36m') 17 | self.parser.add_argument('-k', '--keypoints', default='cpn_ft_h36m_dbb', type=str) 18 | self.parser.add_argument('--data_augmentation', type=bool, default=True) 19 | self.parser.add_argument('--reverse_augmentation', type=bool, default=False) 20 | self.parser.add_argument('--test_augmentation', type=bool, default=True) 21 | self.parser.add_argument('--crop_uv', type=int, default=0) 22 | self.parser.add_argument('--root_path', type=str, default='dataset/') 23 | self.parser.add_argument('-a', '--actions', default='*', type=str) 24 | self.parser.add_argument('--downsample', default=1, type=int) 25 | self.parser.add_argument('--subset', default=1, type=float) 26 | self.parser.add_argument('-s', '--stride', default=1, type=int) 27 | self.parser.add_argument('--gpu', default='0', type=str, help='') 28 | self.parser.add_argument('--train', type=int, default=0) 29 | self.parser.add_argument('--test', type=int, default=1) 30 | self.parser.add_argument('--nepoch', type=int, default=80) 31 | self.parser.add_argument('-b','--batchSize', type=int, default=160) 32 | self.parser.add_argument('--lr', type=float, default=1e-3) 33 | self.parser.add_argument('--lr_refine', type=float, default=1e-5) 34 | self.parser.add_argument('--lr_decay_large', type=float, default=0.5) 35 | self.parser.add_argument('--large_decay_epoch', type=int, default=80) 36 | self.parser.add_argument('--workers', type=int, default=14) 37 | self.parser.add_argument('-lrd', '--lr_decay', default=0.95, type=float) 38 | self.parser.add_argument('-f','--frames', type=int, default=243) 39 | self.parser.add_argument('--pad', type=int, default=121) 40 | self.parser.add_argument('--refine', action='store_true') 41 | self.parser.add_argument('--reload', type=int, default=0) 42 | self.parser.add_argument('--refine_reload', type=int, default=0) 43 | self.parser.add_argument('-c','--checkpoint', type=str, default='model') 44 | self.parser.add_argument('--previous_dir', type=str, default='') 45 | self.parser.add_argument('--n_joints', type=int, default=17) 46 | self.parser.add_argument('--out_joints', type=int, default=17) 47 | self.parser.add_argument('--out_all', type=int, default=1) 48 | self.parser.add_argument('--in_channels', type=int, default=2) 49 | self.parser.add_argument('--out_channels', type=int, default=3) 50 | self.parser.add_argument('-previous_best_threshold', type=float, default= math.inf) 51 | self.parser.add_argument('-previous_name', type=str, default='') 52 | self.parser.add_argument('--previous_refine_name', type=str, default='') 53 | self.parser.add_argument('--manualSeed', type=int, default=1) 54 | 55 | self.parser.add_argument('--MAE', action='store_true') 56 | self.parser.add_argument('-tmr','--temporal_mask_rate', type=float, default=0) 57 | self.parser.add_argument('-smn', '--spatial_mask_num', type=int, default=0) 58 | self.parser.add_argument('-tds', '--t_downsample', type=int, default=1) 59 | 60 | self.parser.add_argument('--MAE_reload', type=int, default=0) 61 | self.parser.add_argument('-r', '--resume', action='store_true') 62 | 63 | 64 | 65 | def parse(self): 66 | self.init() 67 | self.opt = self.parser.parse_args() 68 | 69 | self.opt.pad = (self.opt.frames-1) // 2 70 | 71 | stride_num = { 72 | '9': [1, 3, 3], 73 | '27': [3, 3, 3], 74 | '351': [3, 9, 13], 75 | '81': [3, 3, 3, 3], 76 | '243': [3, 3, 3, 3, 3], 77 | } 78 | 79 | if str(self.opt.frames) in stride_num: 80 | self.opt.stride_num = stride_num[str(self.opt.frames)] 81 | else: 82 | self.opt.stride_num = None 83 | # print('no stride_num') 84 | # exit() 85 | 86 | self.opt.subjects_train = 'S1,S5,S6,S7,S8' 87 | self.opt.subjects_test = 'S9,S11' 88 | #self.opt.subjects_test = 'S11' 89 | 90 | #if self.opt.train: 91 | logtime = time.strftime('%m%d_%H%M_%S_') 92 | 93 | ckp_suffix = '' 94 | if self.opt.refine: 95 | ckp_suffix='_refine' 96 | elif self.opt.MAE: 97 | ckp_suffix = '_pretrain' 98 | else: 99 | ckp_suffix = '_STMO' 100 | self.opt.checkpoint = 'checkpoint/'+self.opt.checkpoint + '_%d'%(self.opt.pad*2+1) + \ 101 | '%s'%ckp_suffix 102 | 103 | if not os.path.exists(self.opt.checkpoint): 104 | os.makedirs(self.opt.checkpoint) 105 | 106 | if self.opt.train: 107 | args = dict((name, getattr(self.opt, name)) for name in dir(self.opt) 108 | if not name.startswith('_')) 109 | 110 | file_name = os.path.join(self.opt.checkpoint, 'opt.txt') 111 | with open(file_name, 'wt') as opt_file: 112 | opt_file.write('==> Args:\n') 113 | for k, v in sorted(args.items()): 114 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 115 | opt_file.write('==> Args:\n') 116 | 117 | return self.opt 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/model_stmo_pretrain.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from model.block.vanilla_transformer_encoder_pretrain import Transformer, Transformer_dec 4 | from model.block.strided_transformer_encoder import Transformer as Transformer_reduce 5 | import numpy as np 6 | 7 | class LayerNorm(nn.Module): 8 | def __init__(self, features, eps=1e-6): 9 | super(LayerNorm, self).__init__() 10 | self.a_2 = nn.Parameter(torch.ones(features)) 11 | self.b_2 = nn.Parameter(torch.zeros(features)) 12 | self.eps = eps 13 | 14 | def forward(self, x): 15 | mean = x.mean(-1, keepdim=True) 16 | std = x.std(-1, keepdim=True) 17 | return self.a_2 * (x - mean) / (std + self.eps) + self.b_2 18 | 19 | class Linear(nn.Module): 20 | def __init__(self, linear_size, p_dropout=0.25): 21 | super(Linear, self).__init__() 22 | self.l_size = linear_size 23 | 24 | self.relu = nn.LeakyReLU(0.2, inplace=True) 25 | self.dropout = nn.Dropout(p_dropout) 26 | 27 | #self.w1 = nn.Linear(self.l_size, self.l_size) 28 | self.w1 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1) 29 | self.batch_norm1 = nn.BatchNorm1d(self.l_size) 30 | 31 | #self.w2 = nn.Linear(self.l_size, self.l_size) 32 | self.w2 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1) 33 | self.batch_norm2 = nn.BatchNorm1d(self.l_size) 34 | 35 | def forward(self, x): 36 | y = self.w1(x) 37 | y = self.batch_norm1(y) 38 | y = self.relu(y) 39 | y = self.dropout(y) 40 | 41 | y = self.w2(y) 42 | y = self.batch_norm2(y) 43 | y = self.relu(y) 44 | y = self.dropout(y) 45 | 46 | out = x + y 47 | 48 | return out 49 | 50 | class FCBlock(nn.Module): 51 | 52 | def __init__(self, channel_in, channel_out, linear_size, block_num): 53 | super(FCBlock, self).__init__() 54 | 55 | self.linear_size = linear_size 56 | self.block_num = block_num 57 | self.layers = [] 58 | self.channel_in = channel_in 59 | self.stage_num = 3 60 | self.p_dropout = 0.1 61 | #self.fc_1 = nn.Linear(self.channel_in, self.linear_size) 62 | self.fc_1 = nn.Conv1d(self.channel_in, self.linear_size, kernel_size=1) 63 | self.bn_1 = nn.BatchNorm1d(self.linear_size) 64 | for i in range(block_num): 65 | self.layers.append(Linear(self.linear_size, self.p_dropout)) 66 | #self.fc_2 = nn.Linear(self.linear_size, channel_out) 67 | self.fc_2 = nn.Conv1d(self.linear_size, channel_out, kernel_size=1) 68 | 69 | self.layers = nn.ModuleList(self.layers) 70 | self.relu = nn.LeakyReLU(0.2, inplace=True) 71 | self.dropout = nn.Dropout(self.p_dropout) 72 | 73 | def forward(self, x): 74 | 75 | x = self.fc_1(x) 76 | x = self.bn_1(x) 77 | x = self.relu(x) 78 | x = self.dropout(x) 79 | for i in range(self.block_num): 80 | x = self.layers[i](x) 81 | x = self.fc_2(x) 82 | 83 | return x 84 | 85 | class PoseTransformer(nn.Module): 86 | def __init__(self, args): 87 | super().__init__() 88 | 89 | layers, channel, d_hid, length = 3, 256, 512, args.number_of_frames 90 | stride_num = { 91 | '9': [1, 3, 3], 92 | '27': [3, 3, 3], 93 | '351': [3, 9, 13], 94 | '81': [3, 3, 3, 3], 95 | '243': [3, 3, 3, 3, 3], 96 | } 97 | stride_num = stride_num[str(length)] 98 | self.spatial_mask_num = 2 99 | self.num_joints_in, self.num_joints_out = 17, 17 100 | 101 | self.length = length 102 | dec_dim_shrink = 2 103 | 104 | self.encoder = FCBlock(2*self.num_joints_in, channel, 2*channel, 1) 105 | 106 | self.Transformer = Transformer(layers, channel, d_hid, length=length) 107 | self.Transformer_dec = Transformer_dec(layers-1, channel//dec_dim_shrink, d_hid//dec_dim_shrink, length=length) 108 | 109 | self.encoder_to_decoder = nn.Linear(channel, channel//dec_dim_shrink, bias=False) 110 | self.encoder_LN = LayerNorm(channel) 111 | 112 | self.fcn_dec = nn.Sequential( 113 | nn.BatchNorm1d(channel//dec_dim_shrink, momentum=0.1), 114 | nn.Conv1d(channel//dec_dim_shrink, 2*self.num_joints_out, kernel_size=1) 115 | ) 116 | 117 | # self.fcn_1 = nn.Sequential( 118 | # nn.BatchNorm1d(channel, momentum=0.1), 119 | # nn.Conv1d(channel, 3*self.num_joints_out, kernel_size=1) 120 | # ) 121 | 122 | self.dec_pos_embedding = nn.Parameter(torch.randn(1, length, channel//dec_dim_shrink)) 123 | self.mask_token = nn.Parameter(torch.randn(1, 1, channel//dec_dim_shrink)) 124 | 125 | self.spatial_mask_token = nn.Parameter(torch.randn(1, 1, 2)) 126 | 127 | def forward(self, x_in, mask, spatial_mask): 128 | x_in = x_in[:, :, :, :, 0].permute(0, 2, 3, 1).contiguous() 129 | b,f,_,_ = x_in.shape 130 | 131 | # spatial mask out 132 | x = x_in.clone() 133 | 134 | x[:,spatial_mask] = self.spatial_mask_token.expand(b,self.spatial_mask_num*f,2) 135 | 136 | 137 | x = x.view(b, f, -1) 138 | 139 | x = x.permute(0, 2, 1).contiguous() 140 | 141 | x = self.encoder(x) 142 | 143 | x = x.permute(0, 2, 1).contiguous() 144 | feas = self.Transformer(x, mask_MAE=mask) 145 | 146 | feas = self.encoder_LN(feas) 147 | feas = self.encoder_to_decoder(feas) 148 | 149 | B, N, C = feas.shape 150 | 151 | # we don't unshuffle the correct visible token order, 152 | # but shuffle the pos embedding accorddingly. 153 | expand_pos_embed = self.dec_pos_embedding.expand(B, -1, -1).clone() 154 | pos_emd_vis = expand_pos_embed[:, ~mask].reshape(B, -1, C) 155 | pos_emd_mask = expand_pos_embed[:, mask].reshape(B, -1, C) 156 | x_full = torch.cat([feas + pos_emd_vis, self.mask_token + pos_emd_mask], dim=1) 157 | 158 | x_out = self.Transformer_dec(x_full, pos_emd_mask.shape[1]) 159 | 160 | x_out = x_out.permute(0, 2, 1).contiguous() 161 | x_out = self.fcn_dec(x_out) 162 | 163 | x_out = x_out.view(b, self.num_joints_out, 2, -1) 164 | x_out = x_out.permute(0, 2, 3, 1).contiguous().unsqueeze(dim=-1) 165 | 166 | return x_out 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /H36M-Toolbox/common/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import torch 9 | import torch.nn as nn 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | 13 | def mpjpe(predicted, target, weights=None, gamma=0, return_weights=False): 14 | """ 15 | Mean per-joint position error (i.e. mean Euclidean distance), 16 | often referred to as "Protocol #1" in many papers. 17 | """ 18 | assert predicted.shape == target.shape 19 | norm = torch.norm(predicted - target, dim=len(target.shape)-1)#.mean(axis=2).squeeze(-1) 20 | if weights is not None: 21 | norm = (weights ** gamma) * norm 22 | # norm = (weights.view(-1, 1, 1) ** gamma) * norm 23 | 24 | if return_weights: 25 | return torch.mean(norm), norm 26 | else: 27 | return torch.mean(norm) #, norm 28 | # return torch.mean(torch.norm(predicted - target, dim=len(target.shape)-1)) 29 | 30 | def pck(pred, gt): 31 | error = np.linalg.norm(pred - gt, ord=2, axis=-1) 32 | pck = (error < 0.15).astype(np.float32).mean() * 100 33 | return pck 34 | 35 | def auc(pred, gt): 36 | error = np.linalg.norm(pred - gt, ord=2, axis=-1) 37 | 38 | thresholds = np.linspace(0., 0.15, 31) 39 | pck_values = np.zeros(len(thresholds)) 40 | for i in range(len(thresholds)): 41 | pck_values[i] = (error < thresholds[i]).astype(np.float32).mean() 42 | 43 | auc = pck_values.mean() * 100 44 | return auc 45 | 46 | class kl_loss(nn.Module): 47 | def __init__(self, num_bins): 48 | super(kl_loss, self).__init__() 49 | self.LogSoftmax = nn.LogSoftmax(dim=-1) #[B,LOGITS] 50 | self.criterion_ = nn.KLDivLoss(reduction='mean') 51 | self.num_bins = num_bins 52 | 53 | def criterion(self, dec_outs, labels): 54 | scores = self.LogSoftmax(dec_outs) 55 | loss = self.criterion_(scores, labels) 56 | return loss 57 | 58 | def forward(self, predicted, target, weights=None, gamma=0): 59 | output_x, output_y, output_z = predicted 60 | target_x = target[:,:,:,:self.num_bins[0]] 61 | target_y = target[:,:,:,self.num_bins[0]:self.num_bins[0]+self.num_bins[1]] 62 | target_z = target[:,:,:,-self.num_bins[2]:] 63 | num_joints = output_x.size(2) 64 | 65 | loss = 0 66 | for idx in range(num_joints): 67 | loss += self.criterion(output_x[:,:,idx],target_x[:,:,idx]) 68 | loss += self.criterion(output_y[:,:,idx],target_y[:,:,idx]) 69 | loss += self.criterion(output_z[:,:,idx],target_z[:,:,idx]) 70 | return loss / num_joints 71 | 72 | # def kl_loss(predicted, target, weights=None, gamma=0): 73 | # LogSoftmax = nn.LogSoftmax(dim=-1) 74 | # loss = nn.KLDivLoss(reduction="mean") 75 | # return loss(LogSoftmax(predicted), target) 76 | 77 | def mse(predicted, target, weights=None, gamma=0): 78 | loss = nn.MSELoss() 79 | return loss(predicted, target) 80 | 81 | def cross_entropy(predicted, target, weights=None, gamma=0, return_weights=False): 82 | loss = nn.CrossEntropyLoss() 83 | return loss(predicted.permute(0, 4, 1, 2, 3), target) 84 | 85 | def weighted_mpjpe(predicted, target, w): 86 | """ 87 | Weighted mean per-joint position error (i.e. mean Euclidean distance) 88 | """ 89 | assert predicted.shape == target.shape 90 | assert w.shape[0] == predicted.shape[0] 91 | return torch.mean(w * torch.norm(predicted - target, dim=len(target.shape)-1)) 92 | 93 | def p_mpjpe(predicted, target): 94 | """ 95 | Pose error: MPJPE after rigid alignment (scale, rotation, and translation), 96 | often referred to as "Protocol #2" in many papers. 97 | """ 98 | assert predicted.shape == target.shape 99 | 100 | muX = np.mean(target, axis=1, keepdims=True) 101 | muY = np.mean(predicted, axis=1, keepdims=True) 102 | 103 | X0 = target - muX 104 | Y0 = predicted - muY 105 | 106 | normX = np.sqrt(np.sum(X0**2, axis=(1, 2), keepdims=True)) 107 | normY = np.sqrt(np.sum(Y0**2, axis=(1, 2), keepdims=True)) 108 | 109 | X0 /= normX 110 | Y0 /= normY 111 | 112 | H = np.matmul(X0.transpose(0, 2, 1), Y0) 113 | U, s, Vt = np.linalg.svd(H) 114 | V = Vt.transpose(0, 2, 1) 115 | R = np.matmul(V, U.transpose(0, 2, 1)) 116 | 117 | # Avoid improper rotations (reflections), i.e. rotations with det(R) = -1 118 | sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1)) 119 | V[:, :, -1] *= sign_detR 120 | s[:, -1] *= sign_detR.flatten() 121 | R = np.matmul(V, U.transpose(0, 2, 1)) # Rotation 122 | 123 | tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2) 124 | 125 | a = tr * normX / normY # Scale 126 | t = muX - a*np.matmul(muY, R) # Translation 127 | 128 | # Perform rigid transformation on the input 129 | predicted_aligned = a*np.matmul(predicted, R) + t 130 | 131 | # Return MPJPE 132 | return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape)-1)) 133 | 134 | def n_mpjpe(predicted, target): 135 | """ 136 | Normalized MPJPE (scale only), adapted from: 137 | https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py 138 | """ 139 | assert predicted.shape == target.shape 140 | 141 | norm_predicted = torch.mean(torch.sum(predicted**2, dim=3, keepdim=True), dim=2, keepdim=True) 142 | norm_target = torch.mean(torch.sum(target*predicted, dim=3, keepdim=True), dim=2, keepdim=True) 143 | scale = norm_target / norm_predicted 144 | return mpjpe(scale * predicted, target)#[0] 145 | 146 | def weighted_bonelen_loss(predict_3d_length, gt_3d_length): 147 | loss_length = 0.001 * torch.pow(predict_3d_length - gt_3d_length, 2).mean() 148 | return loss_length 149 | 150 | def weighted_boneratio_loss(predict_3d_length, gt_3d_length): 151 | loss_length = 0.1 * torch.pow((predict_3d_length - gt_3d_length)/gt_3d_length, 2).mean() 152 | return loss_length 153 | 154 | def mean_velocity_error(predicted, target): 155 | """ 156 | Mean per-joint velocity error (i.e. mean Euclidean distance of the 1st derivative) 157 | """ 158 | assert predicted.shape == target.shape 159 | 160 | velocity_predicted = np.diff(predicted, axis=0) 161 | velocity_target = np.diff(target, axis=0) 162 | 163 | return np.mean(np.linalg.norm(velocity_predicted - velocity_target, axis=len(target.shape)-1)) -------------------------------------------------------------------------------- /ContextPose/mvn/models/cpn/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import time 4 | import matplotlib.pyplot as plt 5 | 6 | import torch 7 | import torch.nn.parallel 8 | import torch.backends.cudnn as cudnn 9 | import torch.optim 10 | import torchvision.datasets as datasets 11 | 12 | from config import cfg 13 | from utils.logger import Logger 14 | from utils.evaluation import AverageMeter 15 | from utils.misc import save_model, adjust_learning_rate 16 | from utils.osutils import mkdir_p, isfile, isdir, join 17 | from networks import network 18 | from dataloader.mscocoMulti import MscocoMulti 19 | 20 | 21 | def main(args): 22 | # create checkpoint dir 23 | if not isdir(args.checkpoint): 24 | mkdir_p(args.checkpoint) 25 | 26 | # create model 27 | model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = True) 28 | model = torch.nn.DataParallel(model).cuda() 29 | 30 | # define loss function (criterion) and optimizer 31 | criterion1 = torch.nn.MSELoss().cuda() # for Global loss 32 | criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss 33 | optimizer = torch.optim.Adam(model.parameters(), 34 | lr = cfg.lr, 35 | weight_decay=cfg.weight_decay) 36 | 37 | if args.resume: 38 | if isfile(args.resume): 39 | print("=> loading checkpoint '{}'".format(args.resume)) 40 | checkpoint = torch.load(args.resume) 41 | pretrained_dict = checkpoint['state_dict'] 42 | model.load_state_dict(pretrained_dict) 43 | args.start_epoch = checkpoint['epoch'] 44 | optimizer.load_state_dict(checkpoint['optimizer']) 45 | print("=> loaded checkpoint '{}' (epoch {})" 46 | .format(args.resume, checkpoint['epoch'])) 47 | logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) 48 | else: 49 | print("=> no checkpoint found at '{}'".format(args.resume)) 50 | else: 51 | logger = Logger(join(args.checkpoint, 'log.txt')) 52 | logger.set_names(['Epoch', 'LR', 'Train Loss']) 53 | 54 | cudnn.benchmark = True 55 | print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4)) 56 | 57 | train_loader = torch.utils.data.DataLoader( 58 | MscocoMulti(cfg), 59 | batch_size=cfg.batch_size*args.num_gpus, shuffle=True, 60 | num_workers=args.workers, pin_memory=True) 61 | 62 | for epoch in range(args.start_epoch, args.epochs): 63 | lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) 64 | print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) 65 | 66 | # train for one epoch 67 | train_loss = train(train_loader, model, [criterion1, criterion2], optimizer) 68 | print('train_loss: ',train_loss) 69 | 70 | # append logger file 71 | logger.append([epoch + 1, lr, train_loss]) 72 | 73 | save_model({ 74 | 'epoch': epoch + 1, 75 | 'state_dict': model.state_dict(), 76 | 'optimizer' : optimizer.state_dict(), 77 | }, checkpoint=args.checkpoint) 78 | 79 | logger.close() 80 | 81 | 82 | 83 | def train(train_loader, model, criterions, optimizer): 84 | # prepare for refine loss 85 | def ohkm(loss, top_k): 86 | ohkm_loss = 0. 87 | for i in range(loss.size()[0]): 88 | sub_loss = loss[i] 89 | topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False) 90 | tmp_loss = torch.gather(sub_loss, 0, topk_idx) 91 | ohkm_loss += torch.sum(tmp_loss) / top_k 92 | ohkm_loss /= loss.size()[0] 93 | return ohkm_loss 94 | criterion1, criterion2 = criterions 95 | 96 | batch_time = AverageMeter() 97 | data_time = AverageMeter() 98 | losses = AverageMeter() 99 | 100 | # switch to train mode 101 | model.train() 102 | 103 | for i, (inputs, targets, valid, meta) in enumerate(train_loader): 104 | input_var = torch.autograd.Variable(inputs.cuda()) 105 | 106 | target15, target11, target9, target7 = targets 107 | refine_target_var = torch.autograd.Variable(target7.cuda(async=True)) 108 | valid_var = torch.autograd.Variable(valid.cuda(async=True)) 109 | 110 | # compute output 111 | global_outputs, refine_output = model(input_var) 112 | score_map = refine_output.data.cpu() 113 | 114 | loss = 0. 115 | global_loss_record = 0. 116 | refine_loss_record = 0. 117 | # comput global loss and refine loss 118 | for global_output, label in zip(global_outputs, targets): 119 | num_points = global_output.size()[1] 120 | global_label = label * (valid > 1.1).type(torch.FloatTensor).view(-1, num_points, 1, 1) 121 | global_loss = criterion1(global_output, torch.autograd.Variable(global_label.cuda(async=True))) / 2.0 122 | loss += global_loss 123 | global_loss_record += global_loss.data.item() 124 | refine_loss = criterion2(refine_output, refine_target_var) 125 | refine_loss = refine_loss.mean(dim=3).mean(dim=2) 126 | refine_loss *= (valid_var > 0.1).type(torch.cuda.FloatTensor) 127 | refine_loss = ohkm(refine_loss, 8) 128 | loss += refine_loss 129 | refine_loss_record = refine_loss.data.item() 130 | 131 | # record loss 132 | losses.update(loss.data.item(), inputs.size(0)) 133 | 134 | # compute gradient and do Optimization step 135 | optimizer.zero_grad() 136 | loss.backward() 137 | optimizer.step() 138 | 139 | if(i%100==0 and i!=0): 140 | print('iteration {} | loss: {}, global loss: {}, refine loss: {}, avg loss: {}' 141 | .format(i, loss.data.item(), global_loss_record, 142 | refine_loss_record, losses.avg)) 143 | 144 | return losses.avg 145 | 146 | 147 | 148 | if __name__ == '__main__': 149 | parser = argparse.ArgumentParser(description='PyTorch CPN Training') 150 | parser.add_argument('-j', '--workers', default=12, type=int, metavar='N', 151 | help='number of data loading workers (default: 12)') 152 | parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N', 153 | help='number of GPU to use (default: 1)') 154 | parser.add_argument('--epochs', default=32, type=int, metavar='N', 155 | help='number of total epochs to run (default: 32)') 156 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 157 | help='manual epoch number (useful on restarts)') 158 | parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH', 159 | help='path to save checkpoint (default: checkpoint)') 160 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 161 | help='path to latest checkpoint') 162 | 163 | 164 | main(parser.parse_args()) 165 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/cpn/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import torch 5 | import torch.nn.parallel 6 | import torch.optim 7 | import cv2 8 | import json 9 | import numpy as np 10 | 11 | from test_config import cfg 12 | from pycocotools.coco import COCO 13 | from pycocotools.cocoeval import COCOeval 14 | from utils.osutils import mkdir_p, isdir 15 | from utils.imutils import im_to_numpy, im_to_torch 16 | from networks import network 17 | from dataloader.mscocoMulti import MscocoMulti 18 | from tqdm import tqdm 19 | 20 | def main(args): 21 | # create model 22 | model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = False) 23 | model = torch.nn.DataParallel(model).cuda() 24 | 25 | test_loader = torch.utils.data.DataLoader( 26 | MscocoMulti(cfg, train=False), 27 | batch_size=args.batch*args.num_gpus, shuffle=False, 28 | num_workers=args.workers, pin_memory=True) 29 | 30 | # load trainning weights 31 | checkpoint_file = os.path.join(args.checkpoint, args.test+'.pth.tar') 32 | checkpoint = torch.load(checkpoint_file) 33 | model.load_state_dict(checkpoint['state_dict']) 34 | print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) 35 | 36 | # change to evaluation mode 37 | model.eval() 38 | 39 | print('testing...') 40 | full_result = [] 41 | for i, (inputs, meta) in tqdm(enumerate(test_loader)): 42 | with torch.no_grad(): 43 | input_var = torch.autograd.Variable(inputs.cuda()) 44 | if args.flip == True: 45 | flip_inputs = inputs.clone() 46 | for i, finp in enumerate(flip_inputs): 47 | finp = im_to_numpy(finp) 48 | finp = cv2.flip(finp, 1) 49 | flip_inputs[i] = im_to_torch(finp) 50 | flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) 51 | 52 | # compute output 53 | global_outputs, refine_output = model(input_var) 54 | score_map = refine_output.data.cpu() 55 | score_map = score_map.numpy() 56 | 57 | if args.flip == True: 58 | flip_global_outputs, flip_output = model(flip_input_var) 59 | flip_score_map = flip_output.data.cpu() 60 | flip_score_map = flip_score_map.numpy() 61 | 62 | for i, fscore in enumerate(flip_score_map): 63 | fscore = fscore.transpose((1,2,0)) 64 | fscore = cv2.flip(fscore, 1) 65 | fscore = list(fscore.transpose((2,0,1))) 66 | for (q, w) in cfg.symmetry: 67 | fscore[q], fscore[w] = fscore[w], fscore[q] 68 | fscore = np.array(fscore) 69 | score_map[i] += fscore 70 | score_map[i] /= 2 71 | 72 | ids = meta['imgID'].numpy() 73 | det_scores = meta['det_scores'] 74 | for b in range(inputs.size(0)): 75 | details = meta['augmentation_details'] 76 | single_result_dict = {} 77 | single_result = [] 78 | 79 | single_map = score_map[b] 80 | r0 = single_map.copy() 81 | r0 /= 255 82 | r0 += 0.5 83 | v_score = np.zeros(17) 84 | for p in range(17): 85 | single_map[p] /= np.amax(single_map[p]) 86 | border = 10 87 | dr = np.zeros((cfg.output_shape[0] + 2*border, cfg.output_shape[1]+2*border)) 88 | dr[border:-border, border:-border] = single_map[p].copy() 89 | dr = cv2.GaussianBlur(dr, (21, 21), 0) 90 | lb = dr.argmax() 91 | y, x = np.unravel_index(lb, dr.shape) 92 | dr[y, x] = 0 93 | lb = dr.argmax() 94 | py, px = np.unravel_index(lb, dr.shape) 95 | y -= border 96 | x -= border 97 | py -= border + y 98 | px -= border + x 99 | ln = (px ** 2 + py ** 2) ** 0.5 100 | delta = 0.25 101 | if ln > 1e-3: 102 | x += delta * px / ln 103 | y += delta * py / ln 104 | x = max(0, min(x, cfg.output_shape[1] - 1)) 105 | y = max(0, min(y, cfg.output_shape[0] - 1)) 106 | resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) 107 | resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) 108 | v_score[p] = float(r0[p, int(round(y)+1e-10), int(round(x)+1e-10)]) 109 | single_result.append(resx) 110 | single_result.append(resy) 111 | single_result.append(1) 112 | if len(single_result) != 0: 113 | single_result_dict['image_id'] = int(ids[b]) 114 | single_result_dict['category_id'] = 1 115 | single_result_dict['keypoints'] = single_result 116 | single_result_dict['score'] = float(det_scores[b])*v_score.mean() 117 | full_result.append(single_result_dict) 118 | 119 | result_path = args.result 120 | if not isdir(result_path): 121 | mkdir_p(result_path) 122 | result_file = os.path.join(result_path, 'result.json') 123 | with open(result_file,'w') as wf: 124 | json.dump(full_result, wf) 125 | 126 | # evaluate on COCO 127 | eval_gt = COCO(cfg.ori_gt_path) 128 | eval_dt = eval_gt.loadRes(result_file) 129 | cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') 130 | cocoEval.evaluate() 131 | cocoEval.accumulate() 132 | cocoEval.summarize() 133 | 134 | if __name__ == '__main__': 135 | parser = argparse.ArgumentParser(description='PyTorch CPN Test') 136 | parser.add_argument('-j', '--workers', default=12, type=int, metavar='N', 137 | help='number of data loading workers (default: 12)') 138 | parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N', 139 | help='number of GPU to use (default: 1)') 140 | parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH', 141 | help='path to load checkpoint (default: checkpoint)') 142 | parser.add_argument('-f', '--flip', default=True, type=bool, 143 | help='flip input image during test (default: True)') 144 | parser.add_argument('-b', '--batch', default=128, type=int, 145 | help='test batch size (default: 128)') 146 | parser.add_argument('-t', '--test', default='CPN256x192', type=str, 147 | help='using which checkpoint to be tested (default: CPN256x192') 148 | parser.add_argument('-r', '--result', default='result', type=str, 149 | help='path to save save result (default: result)') 150 | main(parser.parse_args()) -------------------------------------------------------------------------------- /H36M-Toolbox/transform.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import cv2 3 | import numpy as np 4 | import os.path as osp 5 | import h5py 6 | 7 | 8 | def _infer_box(pose3d, camera, rootIdx): 9 | root_joint = pose3d[rootIdx, :] 10 | tl_joint = root_joint.copy() 11 | tl_joint[0] -= 1000.0 12 | tl_joint[1] -= 900.0 13 | br_joint = root_joint.copy() 14 | br_joint[0] += 1000.0 15 | br_joint[1] += 1100.0 16 | tl_joint = np.reshape(tl_joint, (1, 3)) 17 | br_joint = np.reshape(br_joint, (1, 3)) 18 | 19 | tl2d = _weak_project(tl_joint, camera['fx'], camera['fy'], camera['cx'], 20 | camera['cy']).flatten() 21 | 22 | br2d = _weak_project(br_joint, camera['fx'], camera['fy'], camera['cx'], 23 | camera['cy']).flatten() 24 | return np.array([tl2d[0], tl2d[1], br2d[0], br2d[1]]) 25 | 26 | 27 | def _weak_project(pose3d, fx, fy, cx, cy): 28 | pose2d = pose3d[:, :2] / pose3d[:, 2:3] 29 | pose2d[:, 0] *= fx 30 | pose2d[:, 1] *= fy 31 | pose2d[:, 0] += cx 32 | pose2d[:, 1] += cy 33 | return pose2d 34 | 35 | 36 | def get_3rd_point(a, b): 37 | direct = a - b 38 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 39 | 40 | 41 | def get_dir(src_point, rot_rad): 42 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 43 | 44 | src_result = [0, 0] 45 | src_result[0] = src_point[0] * cs - src_point[1] * sn 46 | src_result[1] = src_point[0] * sn + src_point[1] * cs 47 | 48 | return src_result 49 | 50 | 51 | def get_affine_transform( 52 | center, scale, rot, output_size, 53 | shift=np.array([0, 0], dtype=np.float32), inv=0 54 | ): 55 | center = np.array(center) 56 | scale = np.array(scale) 57 | 58 | scale_tmp = scale * 200.0 59 | src_w = scale_tmp[0] 60 | dst_w = output_size[0] 61 | dst_h = output_size[1] 62 | 63 | # rot_rad = np.pi * rot / 180 64 | 65 | # src_dir = get_dir([0, (src_w-1) * -0.5], rot_rad) 66 | src_dir = np.array([0, (src_w-1) * -0.5], np.float32) 67 | dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32) 68 | src = np.zeros((3, 2), dtype=np.float32) 69 | dst = np.zeros((3, 2), dtype=np.float32) 70 | src[0, :] = center + scale_tmp * shift 71 | src[1, :] = center + src_dir + scale_tmp * shift 72 | dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5] 73 | dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir 74 | 75 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 76 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 77 | 78 | if inv: 79 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 80 | else: 81 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 82 | 83 | return trans 84 | 85 | 86 | def affine_transform(pt, t): 87 | new_pt = np.array([pt[0], pt[1], 1.]).T 88 | new_pt = np.dot(t, new_pt) 89 | return new_pt[:2] 90 | 91 | 92 | def normalize_screen_coordinates(X, w, h): 93 | assert X.shape[-1] == 2 94 | 95 | # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio 96 | return X/w*2 - [1, h/w] 97 | 98 | # train_data = pickle.load(file=open('./h36m_train_hr.pkl', 'rb')) 99 | # img = train_data[0] 100 | # path = osp.join('images', img['image']) 101 | # c = img['center'] # [x, y] 从左上角开始计算 102 | # s = img['scale'] 103 | # print(s) 104 | # pose2d = img['joints_2d_gt_crop'] 105 | # print(pose2d) 106 | # print(c, s) 107 | # pose2d_1 = np.zeros_like(pose2d) 108 | # pose2d_1_inv = np.zeros_like(pose2d) 109 | # pose2d_2 = np.zeros_like(pose2d) 110 | # pose2d_2_inv = np.zeros_like(pose2d) 111 | # box = img['box'] 112 | # left_top = np.array(pose2d[5]) 113 | 114 | # data_numpy = cv2.imread( 115 | # path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 116 | # ) 117 | 118 | # h, w, _ = data_numpy.shape 119 | 120 | # trans = get_affine_transform(c, s, 0, [192, 256]) 121 | # trans_inv = get_affine_transform(c, s, 0, [192, 256], inv=1) 122 | # left_top = affine_transform(left_top, trans) 123 | # input = cv2.warpAffine( 124 | # data_numpy, 125 | # trans, 126 | # ([192, 256]), 127 | # flags=cv2.INTER_LINEAR) 128 | 129 | # for i in range(17): 130 | # pose2d_1[i] = affine_transform(pose2d[i], trans) 131 | # pose2d_2[i] = pose2d[i] / np.array([192, 256]) * np.array([288, 384]) 132 | # cv2.circle(input, (int(pose2d[i,0]), int(pose2d[i,1])), 3, (0, 255, 0), -1) 133 | # print(pose2d_1) 134 | # print(pose2d_1/[4,4]) 135 | # retval = cv2.imwrite("./demo/demo_00.jpg", input) 136 | 137 | # trans = get_affine_transform(c, s, 0, [288, 384]) 138 | # input = cv2.warpAffine( 139 | # data_numpy, 140 | # trans, 141 | # ([288, 384]), 142 | # flags=cv2.INTER_LINEAR) 143 | 144 | # for i in range(17): 145 | # pose2d_1[i] = affine_transform(pose2d[i], trans) 146 | # print(pose2d_2[i]) 147 | # cv2.circle(input, (int(pose2d_2[i,0]), int(pose2d_2[i,1])), 3, (0, 255, 0), -1) 148 | # print(pose2d_1) 149 | # print(pose2d_1/[4,4]) 150 | # retval = cv2.imwrite("./demo/demo_01.jpg", input) 151 | 152 | # trans = get_affine_transform(c, s, 0, [192//4, 256//4]) 153 | # input = cv2.warpAffine( 154 | # data_numpy, 155 | # trans, 156 | # ([192//4, 256//4]), 157 | # flags=cv2.INTER_LINEAR) 158 | # for i in range(17): 159 | # pose2d_2[i] = affine_transform(pose2d[i], trans) 160 | # cv2.circle(input, (int(pose2d_1[i,0]), int(pose2d_1[i,1])), 3, (255, 0, 0), -1) 161 | # print(pose2d_2) 162 | # print(pose2d_2-pose2d_1/[4,4]) 163 | # retval = cv2.imwrite("./demo/demo_01.jpg", input) 164 | 165 | 166 | # cv2.circle(input, (int(left_top[0]), int(left_top[1])), 10, (255, 0, 0), -1) 167 | # retval = cv2.imwrite("./demo/demo_00.jpg", input) 168 | 169 | # input = input[:, ::-1] 170 | # left_top = affine_transform(left_top, trans) 171 | # cv2.circle(input, (int(w-left_top[0]-1),int(left_top[1])), 10, (255, 0, 0), -1) 172 | # retval = cv2.imwrite(".demo/demo_new.jpg", input) # "/demo.jpg" 会保存到根目录 173 | 174 | 175 | 176 | # joints_left = [4, 5, 6, 11, 12, 13] 177 | # joints_right = [1, 2, 3, 14, 15, 16] 178 | 179 | # save_dir = osp.join('h36m_256x192', 'S1') 180 | # action = 'act_{:02d}_subact_{:02d}'.format(2, 1) 181 | # file = h5py.File(osp.join(save_dir, action+".h5"), "r") 182 | 183 | # image = file['data'][:][55, 0, 0] 184 | # pose2d = file['pose2d'][:][55, 0, 0] 185 | # # image_flip = file['data'][:][0, 0, 1] 186 | # image_flip = np.array(image[:, ::-1], copy=True) 187 | # # pose2d_flip = file['pose2d'][:][-1, 0, 1] 188 | # pose2d_flip = np.array(pose2d, copy=True) 189 | # pose2d_flip[:, 0] = image.shape[1] - pose2d_flip[:, 0] - 1 190 | # pose2d_flip[joints_left + joints_right] = pose2d_flip[joints_right + joints_left] 191 | # print(image.shape, pose2d.shape) 192 | # # print((image!=image_flip[:,::-1]).sum()) 193 | 194 | # for i in range(17): 195 | # cv2.circle(image, (int(pose2d[i, 0]), int(pose2d[i, 1])), 2, (255, 0, 0), -1) 196 | # cv2.circle(image_flip, (int(pose2d_flip[i, 0]), int(pose2d_flip[i, 1])), 2, (0, 0, 255), -1) 197 | 198 | # img = cv2.resize(image, (192//4,256//4)) 199 | # retval = cv2.imwrite("./demo/demo_2023_small.jpg", img) 200 | # retval = cv2.imwrite("./demo/demo_2023.jpg", image) 201 | # retval = cv2.imwrite("./demo/demo_2023_flip.jpg", image_flip) 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /ContextPose_mpi/common/cfg.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from easydict import EasyDict as edict 3 | import numpy as np 4 | import os 5 | 6 | config = edict() 7 | 8 | config.title = "human36m_vol_softmax_single" 9 | config.kind = "human36m" 10 | config.azureroot = "" 11 | config.logdir = "logs" 12 | config.batch_output = False 13 | config.vis_freq = 1000 14 | config.vis_n_elements = 10 15 | 16 | 17 | # model definition 18 | config.model = edict() 19 | config.model.name = "vol" 20 | config.model.kind = "mpii" 21 | config.model.image_shape = [384, 384] 22 | config.model.heatmap_shape = [96, 96] 23 | config.model.heatmap_softmax = True 24 | config.model.heatmap_multiplier = 100.0 25 | config.model.init_weights = True 26 | config.model.checkpoint = None 27 | 28 | config.model.backbone = edict() 29 | config.model.backbone.name = "resnet152" 30 | config.model.backbone.style = "simple" 31 | config.model.backbone.num_final_layer_channel = 17 32 | config.model.backbone.num_joints = 17 33 | config.model.backbone.num_layers = 152 34 | config.model.backbone.init_weights = True 35 | config.model.backbone.fix_weights = True 36 | # config.model.backbone.checkpoint = "dataset/pretrained/pose_hrnet_w32_256x192.pth" 37 | config.model.backbone.checkpoint = "dataset/pretrained/pose_hrnet_w48_256x192.pth" 38 | # config.model.backbone.checkpoint = "dataset/pretrained/CPN50_256x192.pth.tar" 39 | 40 | config.model.backbone.NUM_JOINTS = 17 41 | config.model.backbone.PRETRAINED_LAYERS = ['*'] 42 | config.model.backbone.STEM_INPLANES = 64 43 | config.model.backbone.FINAL_CONV_KERNEL = 1 44 | 45 | config.model.backbone.STAGE2 = edict() 46 | config.model.backbone.STAGE2.NUM_MODULES = 1 47 | config.model.backbone.STAGE2.NUM_BRANCHES = 2 48 | config.model.backbone.STAGE2.NUM_BLOCKS = [4, 4] 49 | config.model.backbone.STAGE2.NUM_CHANNELS = [48, 96] 50 | config.model.backbone.STAGE2.BLOCK = 'BASIC' 51 | config.model.backbone.STAGE2.FUSE_METHOD = 'SUM' 52 | 53 | config.model.backbone.STAGE3 = edict() 54 | config.model.backbone.STAGE3.NUM_MODULES = 4 55 | config.model.backbone.STAGE3.NUM_BRANCHES = 3 56 | config.model.backbone.STAGE3.NUM_BLOCKS = [4, 4, 4] 57 | config.model.backbone.STAGE3.NUM_CHANNELS = [48, 96, 192] 58 | config.model.backbone.STAGE3.BLOCK = 'BASIC' 59 | config.model.backbone.STAGE3.FUSE_METHOD = 'SUM' 60 | 61 | config.model.backbone.STAGE4 = edict() 62 | config.model.backbone.STAGE4.NUM_MODULES = 3 63 | config.model.backbone.STAGE4.NUM_BRANCHES = 4 64 | config.model.backbone.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 65 | config.model.backbone.STAGE4.NUM_CHANNELS = [48, 96, 192, 384] 66 | config.model.backbone.STAGE4.BLOCK = 'BASIC' 67 | config.model.backbone.STAGE4.FUSE_METHOD = 'SUM' 68 | 69 | config.model.volume_net = edict() 70 | config.model.volume_net.volume_aggregation_method = "softmax" 71 | config.model.volume_net.use_gt_pelvis = False 72 | config.model.volume_net.cuboid_size = 2500.0 73 | config.model.volume_net.volume_size = 64 74 | config.model.volume_net.volume_multiplier = 1.0 75 | config.model.volume_net.volume_softmax = True 76 | config.model.volume_net.use_feature_v2v = True 77 | config.model.volume_net.att_channels = 51 78 | config.model.volume_net.temperature = 1500 79 | 80 | config.model.poseformer = edict() 81 | config.model.poseformer.base_dim = 48 82 | config.model.poseformer.embed_dim_ratio = 96 83 | config.model.poseformer.depth = 4 84 | config.model.poseformer.levels = 4 85 | 86 | # loss related params 87 | config.loss = edict() 88 | config.loss.criterion = "MAE" 89 | config.loss.mse_smooth_threshold = 0 90 | config.loss.grad_clip = 0 91 | config.loss.scale_keypoints_3d = 0.1 92 | config.loss.use_volumetric_ce_loss = True 93 | config.loss.volumetric_ce_loss_weight = 0.01 94 | config.loss.use_global_attention_loss = True 95 | config.loss.global_attention_loss_weight = 1000000 96 | 97 | # dataset related params 98 | config.dataset = edict() 99 | config.dataset.kind = "human36m" 100 | config.dataset.data_format = '' 101 | config.dataset.transfer_cmu_to_human36m = False 102 | config.dataset.root = "../H36M-Toolbox/images/" 103 | config.dataset.extra_root = "data/human36m/extra" 104 | config.dataset.train_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy" 105 | config.dataset.val_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy" 106 | config.dataset.train_dataset = "multiview_human36m" 107 | config.dataset.val_dataset = "human36m" 108 | 109 | # train related params 110 | config.train = edict() 111 | config.train.n_objects_per_epoch = 15000 112 | config.train.n_epochs = 9999 113 | config.train.n_iters_per_epoch = 5000 114 | config.train.batch_size = 3 115 | config.train.optimizer = 'Adam' 116 | config.train.backbone_lr = 0.0001 117 | config.train.backbone_lr_step = [1000] 118 | config.train.backbone_lr_factor = 0.1 119 | config.train.process_features_lr = 0.001 120 | config.train.volume_net_lr = 0.001 121 | config.train.volume_net_lr_decay = 0.99 122 | config.train.volume_net_lr_step = [1000] 123 | config.train.volume_net_lr_factor = 0.5 124 | config.train.with_damaged_actions = True 125 | config.train.undistort_images = True 126 | config.train.scale_bbox = 1.0 127 | config.train.ignore_cameras = [] 128 | config.train.crop = True 129 | config.train.erase = False 130 | config.train.shuffle = True 131 | config.train.randomize_n_views = True 132 | config.train.min_n_views = 1 133 | config.train.max_n_views = 1 134 | config.train.num_workers = 8 135 | config.train.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5" 136 | config.train.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/train.pkl" 137 | 138 | # val related params 139 | config.val = edict() 140 | config.val.flip_test = True 141 | config.val.batch_size = 6 142 | config.val.with_damaged_actions = True 143 | config.val.undistort_images = True 144 | config.val.scale_bbox = 1.0 145 | config.val.ignore_cameras = [] 146 | config.val.crop = True 147 | config.val.erase = False 148 | config.val.shuffle = False 149 | config.val.randomize_n_views = True 150 | config.val.min_n_views = 1 151 | config.val.max_n_views = 1 152 | config.val.num_workers = 10 153 | config.val.retain_every_n_frames_in_test = 1 154 | config.val.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5" 155 | config.val.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/val.pkl" 156 | 157 | def update_dict(v, cfg): 158 | for kk, vv in v.items(): 159 | if kk in cfg: 160 | if isinstance(vv, dict): 161 | update_dict(vv, cfg[kk]) 162 | else: 163 | cfg[kk] = vv 164 | else: 165 | raise ValueError("{} not exist in cfg.py".format(kk)) 166 | 167 | 168 | def update_config(path): 169 | exp_config = None 170 | with open(path) as fin: 171 | exp_config = edict(yaml.safe_load(fin)) 172 | update_dict(exp_config, config) 173 | 174 | 175 | def handle_azureroot(config_dict, azureroot): 176 | for key in config_dict.keys(): 177 | if isinstance(config_dict[key], str): 178 | if config_dict[key].startswith('data/'): 179 | config_dict[key] = os.path.join(azureroot, config_dict[key]) 180 | elif isinstance(config_dict[key], dict): 181 | handle_azureroot(config_dict[key], azureroot) 182 | 183 | 184 | def update_dir(azureroot, logdir): 185 | config.azureroot = azureroot 186 | config.logdir = os.path.join(config.azureroot, logdir) 187 | if config.model.checkpoint != None and not config.model.checkpoint.startswith('data/'): 188 | config.model.checkpoint = os.path.join(config.azureroot, config.model.checkpoint) 189 | handle_azureroot(config, config.azureroot) 190 | 191 | -------------------------------------------------------------------------------- /ContextPose/mvn/models/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | def UNCERTAINTY(sigma_list, keypoints_pred, keypoints_gt): 8 | loss = 0.0 9 | diff = keypoints_pred - keypoints_gt 10 | for sigma in sigma_list: 11 | loss_term = torch.mean(torch.norm(diff / (sigma + 1e-6), dim=len(keypoints_gt.shape)-1)) + 0.01 * torch.mean(torch.log(sigma + 1e-6)) 12 | loss += loss_term 13 | return loss 14 | 15 | 16 | class MPJPE(nn.Module): 17 | def __init__(self): 18 | super().__init__() 19 | 20 | def forward(self, keypoints_pred, keypoints_gt): 21 | assert keypoints_pred.shape == keypoints_gt.shape 22 | return torch.mean(torch.norm(keypoints_pred - keypoints_gt, dim=len(keypoints_gt.shape)-1)) 23 | 24 | 25 | class P_MPJPE(nn.Module): 26 | def __init__(self): 27 | super().__init__() 28 | 29 | def forward(self, keypoints_pred, keypoints_gt): 30 | """ 31 | Pose error: MPJPE after rigid alignment (scale, rotation, and translation), 32 | often referred to as "Protocol #2" in many papers. 33 | """ 34 | assert keypoints_pred.shape == keypoints_gt.shape 35 | 36 | muX = np.mean(keypoints_gt, axis=1, keepdims=True) 37 | muY = np.mean(keypoints_pred, axis=1, keepdims=True) 38 | 39 | X0 = keypoints_gt - muX 40 | Y0 = keypoints_pred - muY 41 | 42 | normX = np.sqrt(np.sum(X0**2, axis=(1, 2), keepdims=True)) 43 | normY = np.sqrt(np.sum(Y0**2, axis=(1, 2), keepdims=True)) 44 | 45 | X0 /= normX 46 | Y0 /= normY 47 | 48 | H = np.matmul(X0.transpose(0, 2, 1), Y0) 49 | U, s, Vt = np.linalg.svd(H) 50 | V = Vt.transpose(0, 2, 1) 51 | R = np.matmul(V, U.transpose(0, 2, 1)) 52 | 53 | # Avoid improper rotations (reflections), i.e. rotations with det(R) = -1 54 | sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1)) 55 | V[:, :, -1] *= sign_detR 56 | s[:, -1] *= sign_detR.flatten() 57 | R = np.matmul(V, U.transpose(0, 2, 1)) # Rotation 58 | 59 | tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2) 60 | 61 | a = tr * normX / normY # Scale 62 | t = muX - a*np.matmul(muY, R) # Translation 63 | 64 | # Perform rigid transformation on the input 65 | keypoints_pred_aligned = a*np.matmul(keypoints_pred, R) + t 66 | 67 | # Return MPJPE 68 | return np.mean(np.linalg.norm(keypoints_pred_aligned - keypoints_gt, axis=len(keypoints_gt.shape)-1)) 69 | 70 | 71 | class N_MPJPE(nn.Module): 72 | def __init__(self): 73 | super().__init__() 74 | 75 | def forward(self, keypoints_pred, keypoints_gt): 76 | """ 77 | Normalized MPJPE (scale only), adapted from: 78 | https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py 79 | """ 80 | assert keypoints_pred.shape == keypoints_gt.shape 81 | 82 | norm_keypoints_pred = torch.mean(torch.sum(keypoints_pred**2, dim=3, keepdim=True), dim=2, keepdim=True) 83 | norm_keypoints_gt = torch.mean(torch.sum(keypoints_gt*keypoints_pred, dim=3, keepdim=True), dim=2, keepdim=True) 84 | scale = norm_keypoints_gt / norm_keypoints_pred 85 | return MPJPE()(scale * keypoints_pred, keypoints_gt)#[0] 86 | 87 | class MPJVE(nn.Module): 88 | def __init__(self): 89 | super().__init__() 90 | 91 | def forward(self, keypoints_pred, keypoints_gt): 92 | # def mean_velocity_error(predicted, target): 93 | """ 94 | Mean per-joint velocity error (i.e. mean Euclidean distance of the 1st derivative) 95 | """ 96 | assert keypoints_pred.shape == keypoints_gt.shape 97 | 98 | velocity_predicted = np.diff(keypoints_pred, axis=0) 99 | velocity_target = np.diff(keypoints_gt, axis=0) 100 | 101 | return np.mean(np.linalg.norm(velocity_predicted - velocity_target, axis=len(keypoints_gt.shape)-1)) 102 | 103 | 104 | class KeypointsMSELoss(nn.Module): 105 | def __init__(self): 106 | super().__init__() 107 | 108 | def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity): 109 | dimension = keypoints_pred.shape[-1] 110 | loss = torch.sum((keypoints_gt - keypoints_pred) ** 2 * keypoints_binary_validity) 111 | loss = loss / (dimension * max(1, torch.sum(keypoints_binary_validity).item())) 112 | return loss 113 | 114 | 115 | class KeypointsMSESmoothLoss(nn.Module): 116 | def __init__(self, threshold=400): 117 | super().__init__() 118 | 119 | self.threshold = threshold 120 | 121 | def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity): 122 | dimension = keypoints_pred.shape[-1] 123 | diff = (keypoints_gt - keypoints_pred) ** 2 * keypoints_binary_validity 124 | diff[diff > self.threshold] = torch.pow(diff[diff > self.threshold], 0.1) * (self.threshold ** 0.9) 125 | loss = torch.sum(diff) / (dimension * max(1, torch.sum(keypoints_binary_validity).item())) 126 | return loss 127 | 128 | 129 | class KeypointsMAELoss(nn.Module): 130 | def __init__(self): 131 | super().__init__() 132 | 133 | def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity): 134 | dimension = keypoints_pred.shape[-1] 135 | loss = torch.sum(torch.abs(keypoints_gt - keypoints_pred) * keypoints_binary_validity) 136 | loss = loss / (dimension * max(1, torch.sum(keypoints_binary_validity).item())) 137 | return loss 138 | 139 | 140 | class KeypointsL2Loss(nn.Module): 141 | def __init__(self): 142 | super().__init__() 143 | 144 | def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity): 145 | loss = torch.sum(torch.sqrt(torch.sum((keypoints_gt - keypoints_pred) ** 2 * keypoints_binary_validity, dim=2))) 146 | loss = loss / max(1, torch.sum(keypoints_binary_validity).item()) 147 | return loss 148 | 149 | 150 | class VolumetricCELoss(nn.Module): 151 | def __init__(self): 152 | super().__init__() 153 | 154 | def forward(self, coord_volumes_batch, volumes_batch_pred, keypoints_gt, keypoints_binary_validity): 155 | loss = 0.0 156 | n_losses = 0 157 | 158 | batch_size = volumes_batch_pred.shape[0] 159 | for batch_i in range(batch_size): 160 | coord_volume = coord_volumes_batch[batch_i] 161 | keypoints_gt_i = keypoints_gt[batch_i] 162 | 163 | coord_volume_unsq = coord_volume.unsqueeze(0) 164 | keypoints_gt_i_unsq = keypoints_gt_i.unsqueeze(1).unsqueeze(1).unsqueeze(1) 165 | 166 | dists = torch.sqrt(((coord_volume_unsq - keypoints_gt_i_unsq) ** 2).sum(-1)) 167 | dists = dists.view(dists.shape[0], -1) 168 | 169 | min_indexes = torch.argmin(dists, dim=-1).detach().cpu().numpy() 170 | min_indexes = np.stack(np.unravel_index(min_indexes, volumes_batch_pred.shape[-3:]), axis=1) 171 | 172 | for joint_i, index in enumerate(min_indexes): 173 | validity = keypoints_binary_validity[batch_i, joint_i] 174 | loss += validity[0] * (-torch.log(volumes_batch_pred[batch_i, joint_i, index[0], index[1], index[2]] + 1e-6)) 175 | n_losses += 1 176 | 177 | 178 | return loss / n_losses 179 | 180 | 181 | class LimbLengthError(nn.Module): 182 | """ Limb Length Loss: to let the """ 183 | def __init__(self): 184 | super(LimbLengthError, self).__init__() 185 | self.CONNECTIVITY_DICT = [(0, 1), (1, 2), (2, 6), (5, 4), (4, 3), (3, 6), (6, 7), (7, 8), (8, 16), (9, 16), (8, 12), (11, 12), (10, 11), (8, 13), (13, 14), (14, 15)] 186 | 187 | def forward(self, keypoints_3d_pred, keypoints_3d_gt): 188 | # (b, 17, 3) 189 | 190 | error = 0 191 | for (joint0, joint1) in self.CONNECTIVITY_DICT: 192 | limb_pred = keypoints_3d_pred[:, joint0] - keypoints_3d_pred[:, joint1] 193 | limb_gt = keypoints_3d_gt[:, joint0] - keypoints_3d_gt[:, joint1] 194 | if isinstance(limb_pred, np.ndarray): 195 | limb_pred = torch.from_numpy(limb_pred) 196 | limb_gt = torch.from_numpy(limb_gt) 197 | limb_length_pred = torch.norm(limb_pred, dim = 1) 198 | limb_length_gt = torch.norm(limb_gt, dim = 1) 199 | error += torch.abs(limb_length_pred - limb_length_gt).mean().cpu() 200 | 201 | return float(error)/len(self.CONNECTIVITY_DICT) 202 | -------------------------------------------------------------------------------- /ContextPose_mpi/common/load_data_3dhp_mae.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.utils.data as data 3 | import numpy as np 4 | 5 | from common.utils import deterministic_random 6 | from common.camera import world_to_camera, normalize_screen_coordinates 7 | from common.generator_3dhp import ChunkedGenerator 8 | 9 | class Fusion(data.Dataset): 10 | def __init__(self, opt, root_path, train=True, MAE=False): 11 | self.data_type = opt.dataset 12 | self.train = train 13 | self.keypoints_name = opt.keypoints 14 | self.root_path = root_path 15 | 16 | self.train_list = opt.subjects_train.split(',') 17 | self.test_list = opt.subjects_test.split(',') 18 | self.action_filter = None if opt.actions == '*' else opt.actions.split(',') 19 | self.downsample = opt.downsample 20 | self.subset = opt.subset 21 | self.stride = opt.stride 22 | self.crop_uv = opt.crop_uv 23 | self.test_aug = opt.test_augmentation 24 | self.pad = opt.pad 25 | self.MAE=MAE 26 | if self.train: 27 | self.poses_train, self.poses_train_2d, self.poses_train_2d_crop = self.prepare_data(opt.root_path, train=True) 28 | self.generator = ChunkedGenerator(opt.batchSize // opt.stride, None, self.poses_train, 29 | self.poses_train_2d, self.poses_train_2d_crop, None, chunk_length=self.stride, pad=self.pad, 30 | augment=False, reverse_aug=opt.reverse_augmentation, 31 | kps_left=self.kps_left, kps_right=self.kps_right, 32 | joints_left=self.joints_left, 33 | joints_right=self.joints_right, out_all=opt.out_all, MAE=MAE, train = True) 34 | print('INFO: Training on {} frames'.format(self.generator.num_frames())) 35 | else: 36 | self.poses_test, self.poses_test_2d, self.poses_test_2d_crop, self.valid_frame = self.prepare_data(opt.root_path, train=False) 37 | # self.cameras_test, self.poses_test, self.poses_test_2d = self.fetch(dataset, self.test_list, 38 | # subset=self.subset) 39 | self.generator = ChunkedGenerator(opt.batchSize // opt.stride, None, self.poses_test, 40 | self.poses_test_2d, self.poses_test_2d_crop, self.valid_frame, 41 | pad=self.pad, augment=False, kps_left=self.kps_left, 42 | kps_right=self.kps_right, joints_left=self.joints_left, 43 | joints_right=self.joints_right, MAE=MAE, train = False) 44 | self.key_index = self.generator.saved_index 45 | print('INFO: Testing on {} frames'.format(self.generator.num_frames())) 46 | 47 | def prepare_data(self, path, train=True): 48 | out_poses_3d = {} 49 | out_poses_2d = {} 50 | out_poses_2d_crop = {} 51 | valid_frame = {} 52 | 53 | self.kps_left, self.kps_right = [5, 6, 7, 11, 12, 13], [2, 3, 4, 8, 9, 10] 54 | self.joints_left, self.joints_right = [5, 6, 7, 11, 12, 13], [2, 3, 4, 8, 9, 10] 55 | 56 | if train == True: 57 | data = np.load("dataset/data_train_3dhp.npz",allow_pickle=True)['data'].item() 58 | for seq in data.keys(): 59 | for cam in data[seq][0].keys(): 60 | anim = data[seq][0][cam] 61 | 62 | subject_name, seq_name = seq.split(" ") 63 | 64 | data_3d = anim['data_3d'] 65 | data_3d[:, :14] -= data_3d[:, 14:15] 66 | data_3d[:, 15:] -= data_3d[:, 14:15] 67 | out_poses_3d[(subject_name, seq_name, cam)] = data_3d 68 | 69 | data_2d = anim['data_2d'] 70 | data_2d_crop = anim['data_2d_crop'] 71 | 72 | data_2d[..., :2] = normalize_screen_coordinates(data_2d[..., :2], w=2048, h=2048) 73 | out_poses_2d[(subject_name, seq_name, cam)] = data_2d 74 | out_poses_2d_crop[(subject_name, seq_name, cam)] = data_2d_crop 75 | 76 | return out_poses_3d, out_poses_2d, out_poses_2d_crop 77 | else: 78 | data = np.load("dataset/data_test_3dhp.npz", allow_pickle=True)['data'].item() 79 | for seq in data.keys(): 80 | 81 | anim = data[seq] 82 | 83 | valid_frame[seq] = anim["valid"] 84 | 85 | data_3d = anim['data_3d'] 86 | data_3d[:, :14] -= data_3d[:, 14:15] 87 | data_3d[:, 15:] -= data_3d[:, 14:15] 88 | out_poses_3d[seq] = data_3d 89 | 90 | data_2d = anim['data_2d'] 91 | data_2d_crop = anim['data_2d_crop'] 92 | 93 | if seq == "TS5" or seq == "TS6": 94 | width = 1920 95 | height = 1080 96 | else: 97 | width = 2048 98 | height = 2048 99 | data_2d[..., :2] = normalize_screen_coordinates(data_2d[..., :2], w=width, h=height) 100 | out_poses_2d[seq] = data_2d 101 | out_poses_2d_crop[seq] = data_2d_crop 102 | 103 | return out_poses_3d, out_poses_2d, out_poses_2d_crop, valid_frame 104 | 105 | def __len__(self): 106 | return len(self.generator.pairs) 107 | #return 200 108 | 109 | def __getitem__(self, index): 110 | seq_name, start_3d, end_3d, flip, reverse = self.generator.pairs[index] 111 | # ['S7' 'Seq2' '6'] 10782 10783 False False 112 | 113 | if self.MAE: 114 | pass 115 | # cam, input_2D, seq, subject, cam_ind = self.generator.get_batch(seq_name, start_3d, end_3d, flip, 116 | # reverse) 117 | # if self.train == False and self.test_aug: 118 | # _, input_2D_aug, _, _,_ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse) 119 | # input_2D = np.concatenate((np.expand_dims(input_2D,axis=0),np.expand_dims(input_2D_aug,axis=0)),0) 120 | else: 121 | cam, gt_3D, input_2D, input_2D_crop, img, seq, subject, cam_ind = self.generator.get_batch(seq_name, start_3d, end_3d, flip, reverse) 122 | 123 | # if self.train == False and self.test_aug: 124 | # _, _, input_2D_aug, _, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse) 125 | # _, _, _, input_2D_crop_aug, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse) 126 | # _, _, _, _, img_aug, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse) 127 | # input_2D = np.concatenate((np.expand_dims(input_2D,axis=0),np.expand_dims(input_2D_aug,axis=0)),0) 128 | # input_2D_crop = np.concatenate((np.expand_dims(input_2D_crop,axis=0),np.expand_dims(input_2D_crop_aug,axis=0)),0) 129 | # img = np.concatenate((np.expand_dims(img,axis=0),np.expand_dims(img,axis=0)),0) 130 | 131 | bb_box = np.array([0, 0, 1, 1]) 132 | 133 | scale = float(1.0) 134 | 135 | if self.MAE: 136 | if self.train == True: 137 | return cam, input_2D_update, seq, subject, scale, bb_box, cam_ind 138 | else: 139 | return cam, input_2D_update, seq, scale, bb_box 140 | else: 141 | if self.train == True: 142 | return cam, gt_3D, input_2D, input_2D_crop, img, seq, subject, scale, bb_box, cam_ind 143 | else: 144 | return cam, gt_3D, input_2D, input_2D_crop, img, seq, scale, bb_box 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /ContextPose/mvn/utils/cfg.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from easydict import EasyDict as edict 3 | import os 4 | 5 | config = edict() 6 | 7 | config.title = "human36m_vol_softmax_single" 8 | config.kind = "human36m" 9 | config.azureroot = "" 10 | config.logdir = "logs" 11 | config.batch_output = False 12 | config.vis_freq = 1000 13 | config.vis_n_elements = 10 14 | config.id = 600 15 | config.frame = 1 16 | 17 | # model definition 18 | config.model = edict() 19 | config.model.image_shape = [192, 256] 20 | config.model.init_weights = True 21 | config.model.checkpoint = None 22 | 23 | config.model.backbone = edict() 24 | config.model.backbone.type = 'hrnet_32' 25 | config.model.backbone.num_final_layer_channel = 17 26 | config.model.backbone.num_joints = 17 27 | config.model.backbone.num_layers = 152 28 | config.model.backbone.init_weights = True 29 | config.model.backbone.fix_weights = False 30 | config.model.backbone.checkpoint = "data/pretrained/human36m/pose_hrnet_w32_256x192.pth" 31 | 32 | # pose_hrnet related params 33 | # config.model.backbone = edict() 34 | config.model.backbone.NUM_JOINTS = 17 35 | config.model.backbone.PRETRAINED_LAYERS = ['*'] 36 | config.model.backbone.STEM_INPLANES = 64 37 | config.model.backbone.FINAL_CONV_KERNEL = 1 38 | 39 | config.model.backbone.STAGE2 = edict() 40 | config.model.backbone.STAGE2.NUM_MODULES = 1 41 | config.model.backbone.STAGE2.NUM_BRANCHES = 2 42 | config.model.backbone.STAGE2.NUM_BLOCKS = [4, 4] 43 | config.model.backbone.STAGE2.NUM_CHANNELS = [32, 64] 44 | # config.model.backbone.STAGE2.NUM_CHANNELS = [48, 96] 45 | config.model.backbone.STAGE2.BLOCK = 'BASIC' 46 | config.model.backbone.STAGE2.FUSE_METHOD = 'SUM' 47 | 48 | config.model.backbone.STAGE3 = edict() 49 | # config.model.backbone.STAGE3.NUM_MODULES = 1 50 | config.model.backbone.STAGE3.NUM_MODULES = 4 51 | config.model.backbone.STAGE3.NUM_BRANCHES = 3 52 | config.model.backbone.STAGE3.NUM_BLOCKS = [4, 4, 4] 53 | config.model.backbone.STAGE3.NUM_CHANNELS = [32, 64, 128] 54 | # config.model.backbone.STAGE3.NUM_CHANNELS = [48, 96, 192] 55 | config.model.backbone.STAGE3.BLOCK = 'BASIC' 56 | config.model.backbone.STAGE3.FUSE_METHOD = 'SUM' 57 | 58 | config.model.backbone.STAGE4 = edict() 59 | # config.model.backbone.STAGE4.NUM_MODULES = 1 60 | config.model.backbone.STAGE4.NUM_MODULES = 3 61 | config.model.backbone.STAGE4.NUM_BRANCHES = 4 62 | config.model.backbone.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 63 | config.model.backbone.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 64 | # config.model.backbone.STAGE4.NUM_CHANNELS = [48, 96, 192, 384] 65 | config.model.backbone.STAGE4.BLOCK = 'BASIC' 66 | config.model.backbone.STAGE4.FUSE_METHOD = 'SUM' 67 | 68 | # pose_resnet related params 69 | config.model.backbone.NUM_LAYERS = 50 70 | config.model.backbone.DECONV_WITH_BIAS = False 71 | config.model.backbone.NUM_DECONV_LAYERS = 3 72 | config.model.backbone.NUM_DECONV_FILTERS = [256, 256, 256] 73 | config.model.backbone.NUM_DECONV_KERNELS = [4, 4, 4] 74 | config.model.backbone.FINAL_CONV_KERNEL = 1 75 | config.model.backbone.PRETRAINED_LAYERS = ['*'] 76 | 77 | config.model.volume_net = edict() 78 | config.model.volume_net.volume_aggregation_method = "softmax" 79 | config.model.volume_net.use_gt_pelvis = False 80 | config.model.volume_net.cuboid_size = 2500.0 81 | config.model.volume_net.volume_size = 64 82 | config.model.volume_net.volume_multiplier = 1.0 83 | config.model.volume_net.volume_softmax = True 84 | config.model.volume_net.use_feature_v2v = True 85 | config.model.volume_net.att_channels = 51 86 | config.model.volume_net.temperature = 1500 87 | 88 | config.model.poseformer = edict() 89 | config.model.poseformer.base_dim = 32 90 | config.model.poseformer.embed_dim_ratio = 128 91 | config.model.poseformer.depth = 4 92 | config.model.poseformer.levels = 4 93 | 94 | # loss related params 95 | config.loss = edict() 96 | config.loss.criterion = "MAE" 97 | config.loss.mse_smooth_threshold = 0 98 | config.loss.grad_clip = 0 99 | config.loss.scale_keypoints_3d = 0.1 100 | config.loss.use_volumetric_ce_loss = True 101 | config.loss.volumetric_ce_loss_weight = 0.01 102 | config.loss.use_global_attention_loss = True 103 | config.loss.global_attention_loss_weight = 1000000 104 | 105 | # dataset related params 106 | config.dataset = edict() 107 | config.dataset.kind = "human36m" 108 | config.dataset.data_format = '' 109 | config.dataset.transfer_cmu_to_human36m = False 110 | config.dataset.root = "../H36M-Toolbox/images/" 111 | config.dataset.extra_root = "data/human36m/extra" 112 | config.dataset.train_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy" 113 | config.dataset.val_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy" 114 | config.dataset.train_dataset = "multiview_human36m" 115 | config.dataset.val_dataset = "human36m" 116 | 117 | # train related params 118 | config.train = edict() 119 | config.train.n_objects_per_epoch = 15000 120 | config.train.n_epochs = 9999 121 | config.train.n_iters_per_epoch = 5000 122 | config.train.batch_size = 3 123 | config.train.optimizer = 'Adam' 124 | config.train.backbone_lr = 0.0001 125 | config.train.backbone_lr_step = [1000] 126 | config.train.backbone_lr_factor = 0.1 127 | config.train.process_features_lr = 0.001 128 | config.train.volume_net_lr = 0.001 129 | config.train.volume_net_lr_decay = 0.99 130 | config.train.volume_net_lr_step = [1000] 131 | config.train.volume_net_lr_factor = 0.5 132 | config.train.with_damaged_actions = True 133 | config.train.undistort_images = True 134 | config.train.scale_bbox = 1.0 135 | config.train.ignore_cameras = [] 136 | config.train.crop = True 137 | config.train.erase = False 138 | config.train.shuffle = True 139 | config.train.randomize_n_views = True 140 | config.train.min_n_views = 1 141 | config.train.max_n_views = 1 142 | config.train.num_workers = 8 143 | config.train.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5" 144 | config.train.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/train.pkl" 145 | 146 | # val related params 147 | config.val = edict() 148 | config.val.flip_test = True 149 | config.val.batch_size = 6 150 | config.val.with_damaged_actions = True 151 | config.val.undistort_images = True 152 | config.val.scale_bbox = 1.0 153 | config.val.ignore_cameras = [] 154 | config.val.crop = True 155 | config.val.erase = False 156 | config.val.shuffle = False 157 | config.val.randomize_n_views = True 158 | config.val.min_n_views = 1 159 | config.val.max_n_views = 1 160 | config.val.num_workers = 10 161 | config.val.retain_every_n_frames_in_test = 1 162 | config.val.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5" 163 | config.val.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/val.pkl" 164 | 165 | 166 | def update_dict(v, cfg): 167 | for kk, vv in v.items(): 168 | if kk in cfg: 169 | if isinstance(vv, dict): 170 | update_dict(vv, cfg[kk]) 171 | else: 172 | cfg[kk] = vv 173 | else: 174 | raise ValueError("{} not exist in cfg.py".format(kk)) 175 | 176 | 177 | def update_config(path): 178 | exp_config = None 179 | with open(path) as fin: 180 | exp_config = edict(yaml.safe_load(fin)) 181 | update_dict(exp_config, config) 182 | 183 | 184 | def handle_azureroot(config_dict, azureroot): 185 | for key in config_dict.keys(): 186 | if isinstance(config_dict[key], str): 187 | if config_dict[key].startswith('data/'): 188 | config_dict[key] = os.path.join(azureroot, config_dict[key]) 189 | elif isinstance(config_dict[key], dict): 190 | handle_azureroot(config_dict[key], azureroot) 191 | 192 | 193 | def update_dir(azureroot, logdir): 194 | config.azureroot = azureroot 195 | config.logdir = os.path.join(config.azureroot, logdir) 196 | if config.model.checkpoint != None and not config.model.checkpoint.startswith('data/'): 197 | config.model.checkpoint = os.path.join(config.azureroot, config.model.checkpoint) 198 | handle_azureroot(config, config.azureroot) 199 | 200 | -------------------------------------------------------------------------------- /ContextPose/mvn/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from mvn.utils.img import image_batch_to_torch 5 | 6 | import os 7 | import zipfile 8 | import cv2 9 | import random 10 | 11 | 12 | joints_left = [4, 5, 6, 11, 12, 13] 13 | joints_right = [1, 2, 3, 14, 15, 16] 14 | 15 | class data_prefetcher(): 16 | def __init__(self, loader, device, is_train, flip_test, backbone): 17 | self.loader = iter(loader) 18 | self.stream = torch.cuda.Stream() 19 | self.device = device 20 | self.is_train = is_train 21 | self.flip_test = flip_test 22 | self.backbone = backbone 23 | 24 | if backbone in ['hrnet_32', 'hrnet_48']: 25 | self.mean = torch.tensor([0.485, 0.456, 0.406]).cuda().to(device) 26 | self.std = torch.tensor([0.229, 0.224, 0.225]).cuda().to(device) 27 | elif backbone == 'cpn': 28 | self.mean = torch.tensor([122.7717, 115.9465, 102.9801]).cuda().to(device).view(1, 1, 1, 3) 29 | self.mean /= 255. 30 | 31 | self.preload() 32 | 33 | def preload(self): 34 | try: 35 | self.next_batch = next(self.loader) 36 | except StopIteration: 37 | self.next_batch = None 38 | return 39 | with torch.cuda.stream(self.stream): 40 | for i in range(len(self.next_batch)): 41 | self.next_batch[i] = self.next_batch[i].cuda(non_blocking=True).to(self.device) 42 | 43 | images_batch, keypoints_3d_gt, keypoints_2d_batch_cpn, keypoints_2d_batch_cpn_crop = self.next_batch 44 | 45 | images_batch = torch.flip(images_batch, [-1]) 46 | 47 | if self.backbone in ['hrnet_32', 'hrnet_48']: 48 | images_batch = (images_batch / 255.0 - self.mean) / self.std 49 | elif self.backbone == 'cpn': 50 | images_batch = images_batch / 255.0 - self.mean # for CPN 51 | 52 | keypoints_3d_gt[:, :, 1:] -= keypoints_3d_gt[:, :, :1] 53 | keypoints_3d_gt[:, :, 0] = 0 54 | 55 | if random.random() <= 0.5 and self.is_train: 56 | images_batch = torch.flip(images_batch, [-2]) 57 | 58 | keypoints_2d_batch_cpn[..., 0] *= -1 59 | keypoints_2d_batch_cpn[..., joints_left + joints_right, :] = keypoints_2d_batch_cpn[..., joints_right + joints_left, :] 60 | 61 | keypoints_2d_batch_cpn_crop[:, :, 0] = 192 - keypoints_2d_batch_cpn_crop[:, :, 0] - 1 62 | keypoints_2d_batch_cpn_crop[:, joints_left + joints_right] = keypoints_2d_batch_cpn_crop[:, joints_right + joints_left] 63 | 64 | keypoints_3d_gt[:, :, :, 0] *= -1 65 | keypoints_3d_gt[:, :, joints_left + joints_right] = keypoints_3d_gt[:, :, joints_right + joints_left] 66 | 67 | if (not self.is_train) and self.flip_test: 68 | images_batch = torch.stack([images_batch, torch.flip(images_batch,[2])], dim=1) 69 | 70 | keypoints_2d_batch_cpn_flip = keypoints_2d_batch_cpn.clone() 71 | keypoints_2d_batch_cpn_flip[..., 0] *= -1 72 | keypoints_2d_batch_cpn_flip[..., joints_left + joints_right, :] = keypoints_2d_batch_cpn_flip[..., joints_right + joints_left, :] 73 | keypoints_2d_batch_cpn = torch.stack([keypoints_2d_batch_cpn, keypoints_2d_batch_cpn_flip], dim=1) 74 | 75 | keypoints_2d_batch_cpn_crop_flip = keypoints_2d_batch_cpn_crop.clone() 76 | keypoints_2d_batch_cpn_crop_flip[:, :, 0] = 192 - keypoints_2d_batch_cpn_crop_flip[:, :, 0] - 1 77 | keypoints_2d_batch_cpn_crop_flip[:, joints_left + joints_right] = keypoints_2d_batch_cpn_crop_flip[:, joints_right + joints_left] 78 | keypoints_2d_batch_cpn_crop = torch.stack([keypoints_2d_batch_cpn_crop, keypoints_2d_batch_cpn_crop_flip], dim=1) 79 | 80 | del keypoints_2d_batch_cpn_flip, keypoints_2d_batch_cpn_crop_flip 81 | 82 | self.next_batch = [images_batch.float(), keypoints_3d_gt.float(), keypoints_2d_batch_cpn.float(), keypoints_2d_batch_cpn_crop.float()] 83 | 84 | 85 | def next(self): 86 | torch.cuda.current_stream().wait_stream(self.stream) 87 | batch = self.next_batch 88 | self.preload() 89 | return batch 90 | 91 | 92 | def make_collate_fn(randomize_n_views=True, min_n_views=10, max_n_views=31): 93 | 94 | def collate_fn(items): 95 | items = list(filter(lambda x: x is not None, items)) 96 | if len(items) == 0: 97 | print("All items in batch are None") 98 | return None 99 | 100 | batch = dict() 101 | total_n_views = min(len(item['images']) for item in items) 102 | 103 | indexes = np.arange(total_n_views) 104 | if randomize_n_views: 105 | n_views = np.random.randint(min_n_views, min(total_n_views, max_n_views) + 1) 106 | indexes = np.random.choice(np.arange(total_n_views), size=n_views, replace=False) 107 | else: 108 | indexes = np.arange(total_n_views) 109 | 110 | batch['images'] = np.stack([np.stack([item['images'][i] for item in items], axis=0) for i in indexes], axis=0).swapaxes(0, 1) 111 | # batch['detections'] = np.array([[item['detections'][i] for item in items] for i in indexes]).swapaxes(0, 1) 112 | # batch['cameras'] = [[item['cameras'][i] for item in items] for i in indexes] 113 | 114 | batch['keypoints_3d'] = [item['keypoints_3d'] for item in items] 115 | batch['keypoints_2d_cpn'] = [item['keypoints_2d_cpn'] for item in items] 116 | batch['keypoints_2d_cpn_crop'] = [item['keypoints_2d_cpn_crop'] for item in items] 117 | # batch['cuboids'] = [item['cuboids'] for item in items] 118 | batch['indexes'] = [item['indexes'] for item in items] 119 | batch['subject'] = [item['subject'] for item in items] 120 | 121 | try: 122 | batch['pred_keypoints_3d'] = np.array([item['pred_keypoints_3d'] for item in items]) 123 | except: 124 | pass 125 | 126 | return batch 127 | 128 | return collate_fn 129 | 130 | 131 | def worker_init_fn(worker_id): 132 | np.random.seed(np.random.get_state()[1][0] + worker_id) 133 | 134 | 135 | def prepare_batch(batch, device, config): 136 | # images 137 | images_batch = [] 138 | for image_batch in batch['images']: 139 | image_batch = image_batch_to_torch(image_batch) 140 | image_batch = image_batch.to(device) 141 | images_batch.append(image_batch) 142 | 143 | images_batch = torch.stack(images_batch, dim=0) 144 | 145 | # 3D keypoints 146 | keypoints_3d_batch_gt = torch.from_numpy(np.stack(batch['keypoints_3d'], axis=0)[:, :, :3]).float().to(device) # (b, n_joints, 3) 147 | 148 | # 2D keypoints 149 | keypoints_2d_batch_cpn = torch.from_numpy(np.stack(batch['keypoints_2d_cpn'], axis=0)[:, :, :2]).float().to(device) # (b, n_joints, 3) 150 | keypoints_2d_batch_cpn_crop = torch.from_numpy(np.stack(batch['keypoints_2d_cpn_crop'], axis=0)[:, :, :2]).float().to(device) # (b, n_joints, 3) 151 | 152 | return images_batch, keypoints_3d_batch_gt, keypoints_2d_batch_cpn, keypoints_2d_batch_cpn_crop 153 | 154 | _im_zfile = [] 155 | 156 | 157 | def zipreader_imread(filename, flags=cv2.IMREAD_COLOR): 158 | global _im_zfile 159 | path = filename 160 | pos_at = path.index('@') 161 | if pos_at == -1: 162 | print("character '@' is not found from the given path '%s'" % (path)) 163 | assert 0 164 | path_zip = path[0:pos_at] 165 | if not os.path.isfile(path_zip): 166 | print("zip file '%s' is not found" % (path_zip)) 167 | assert 0 168 | for i in range(len(_im_zfile)): 169 | if _im_zfile[i]['path'] == path_zip: 170 | path_img = os.path.join(_im_zfile[i]['zipfile'].namelist()[0], path[pos_at+2:]) 171 | data = _im_zfile[i]['zipfile'].read(path_img) 172 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 173 | 174 | _im_zfile.append({ 175 | 'path': path_zip, 176 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 177 | }) 178 | path_img = os.path.join(_im_zfile[-1]['zipfile'].namelist()[0], path[pos_at+2:]) 179 | data = _im_zfile[-1]['zipfile'].read(path_img) 180 | 181 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) -------------------------------------------------------------------------------- /H36M-Toolbox/common/arguments.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import argparse 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Training script') 12 | 13 | # General arguments 14 | parser.add_argument('-d', '--dataset', default='h36m', type=str, metavar='NAME', help='target dataset') # h36m or humaneva 15 | parser.add_argument('-k', '--keypoints', default='cpn_ft_h36m_dbb', type=str, metavar='NAME', help='2D detections to use') 16 | parser.add_argument('-str', '--subjects-train', default='S1,S5,S6,S7,S8', type=str, metavar='LIST', 17 | help='training subjects separated by comma') 18 | parser.add_argument('-ste', '--subjects-test', default='S9,S11', type=str, metavar='LIST', help='test subjects separated by comma') 19 | parser.add_argument('-sun', '--subjects-unlabeled', default='', type=str, metavar='LIST', 20 | help='unlabeled subjects separated by comma for self-supervision') 21 | parser.add_argument('-a', '--actions', default='*', type=str, metavar='LIST', 22 | help='actions to train/test on, separated by comma, or * for all') 23 | parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH', 24 | help='checkpoint directory') 25 | parser.add_argument('--checkpoint-frequency', default=40, type=int, metavar='N', 26 | help='create a checkpoint every N epochs') 27 | parser.add_argument('-r', '--resume', default='', type=str, metavar='FILENAME', 28 | help='checkpoint to resume (file name)') 29 | parser.add_argument('--evaluate', default='', type=str, metavar='FILENAME', help='checkpoint to evaluate (file name)') 30 | parser.add_argument('--render', action='store_true', help='visualize a particular video') 31 | parser.add_argument('--by-subject', action='store_true', help='break down error by subject (on evaluation)') 32 | parser.add_argument('--export-training-curves', action='store_true', help='save training curves as .png images') 33 | parser.add_argument('-g', '--gpu', type=list, help='set gpu number') 34 | parser.add_argument('--local_rank', type=int, default=0, help='node rank for distributed training') 35 | parser.add_argument('--from-scratch', type=int, default=0, help='choose to train from scratch or not') 36 | parser.add_argument('--center-pose', type=int, default=0, help='choose fine-tuning task as 3d pose estimation') 37 | 38 | # Model arguments 39 | parser.add_argument('-s', '--stride', default=1, type=int, metavar='N', help='chunk size to use during training') 40 | parser.add_argument('-e', '--epochs', default=200, type=int, metavar='N', help='number of training epochs') 41 | parser.add_argument('-b', '--batch-size', default=1024, type=int, metavar='N', help='batch size in terms of predicted frames') 42 | parser.add_argument('-drop', '--dropout', default=0., type=float, metavar='P', help='dropout probability') 43 | parser.add_argument('-lr', '--learning-rate', default=0.0001, type=float, metavar='LR', help='initial learning rate') 44 | parser.add_argument('-lrd', '--lr-decay', default=0.99, type=float, metavar='LR', help='learning rate decay per epoch') 45 | parser.add_argument('-no-da', '--no-data-augmentation', dest='data_augmentation', action='store_false', 46 | help='disable train-time flipping') 47 | # parser.add_argument('-no-tta', '--no-test-time-augmentation', dest='test_time_augmentation', action='store_false', 48 | # help='disable test-time flipping') 49 | # parser.add_argument('-arc', '--architecture', default='3,3,3', type=str, metavar='LAYERS', help='filter widths separated by comma') 50 | parser.add_argument('-frame', '--number-of-frames', default='81', type=int, metavar='N', 51 | help='how many frames used as input') 52 | parser.add_argument('-frame-kept', '--number-of-kept-frames', default='27', type=int, metavar='N', 53 | help='how many frames are kept') 54 | parser.add_argument('-coeff-kept', '--number-of-kept-coeffs', type=int, metavar='N', help='how many coefficients are kept') 55 | # parser.add_argument('--causal', action='store_true', help='use causal convolutions for real-time processing') 56 | # parser.add_argument('-ch', '--channels', default=1024, type=int, metavar='N', help='number of channels in convolution layers') 57 | parser.add_argument('--depth', default=4, type=int, metavar='N', help='number of transformer blocks') 58 | parser.add_argument('--embed-dim-ratio', default=32, type=int, metavar='N', help='dimension of embedding ratio') 59 | parser.add_argument('-kd', type=int, default=0, help='choose to use knowledge distillation or not') 60 | parser.add_argument('-alpha', type=float, default=1.0, help='the weight for distillation loss') 61 | parser.add_argument('-std', type=float, default=0.0, help='the standard deviation for gaussian noise') 62 | 63 | # Experimental 64 | parser.add_argument('--subset', default=1, type=float, metavar='FRACTION', help='reduce dataset size by fraction') 65 | parser.add_argument('--downsample', default=1, type=int, metavar='FACTOR', help='downsample frame rate by factor (semi-supervised)') 66 | parser.add_argument('--warmup', default=1, type=int, metavar='N', help='warm-up epochs for semi-supervision') 67 | parser.add_argument('--no-eval', action='store_true', help='disable epoch evaluation while training (small speed-up)') 68 | parser.add_argument('--dense', action='store_true', help='use dense convolutions instead of dilated convolutions') 69 | parser.add_argument('--disable-optimizations', action='store_true', help='disable optimized model for single-frame predictions') 70 | parser.add_argument('--linear-projection', action='store_true', help='use only linear coefficients for semi-supervised projection') 71 | parser.add_argument('--no-bone-length', action='store_false', dest='bone_length_term', 72 | help='disable bone length term in semi-supervised settings') 73 | parser.add_argument('--no-proj', action='store_true', help='disable projection for semi-supervised setting') 74 | 75 | # Visualization 76 | parser.add_argument('--viz-subject', type=str, metavar='STR', help='subject to render') 77 | parser.add_argument('--viz-action', type=str, metavar='STR', help='action to render') 78 | parser.add_argument('--viz-camera', type=int, default=0, metavar='N', help='camera to render') 79 | parser.add_argument('--viz-video', type=str, metavar='PATH', help='path to input video') 80 | parser.add_argument('--viz-skip', type=int, default=0, metavar='N', help='skip first N frames of input video') 81 | parser.add_argument('--viz-output', type=str, metavar='PATH', help='output file name (.gif or .mp4)') 82 | parser.add_argument('--viz-export', type=str, metavar='PATH', help='output file name for coordinates') 83 | parser.add_argument('--viz-bitrate', type=int, default=3000, metavar='N', help='bitrate for mp4 videos') 84 | parser.add_argument('--viz-no-ground-truth', action='store_true', help='do not show ground-truth poses') 85 | parser.add_argument('--viz-limit', type=int, default=-1, metavar='N', help='only render first N frames') 86 | parser.add_argument('--viz-downsample', type=int, default=1, metavar='N', help='downsample FPS by a factor N') 87 | parser.add_argument('--viz-size', type=int, default=5, metavar='N', help='image size') 88 | 89 | parser.set_defaults(bone_length_term=True) 90 | parser.set_defaults(data_augmentation=True) 91 | parser.set_defaults(test_time_augmentation=True) 92 | # parser.set_defaults(test_time_augmentation=False) 93 | 94 | args = parser.parse_args() 95 | # Check invalid configuration 96 | if args.resume and args.evaluate: 97 | print('Invalid flags: --resume and --evaluate cannot be set at the same time') 98 | exit() 99 | 100 | if args.export_training_curves and args.no_eval: 101 | print('Invalid flags: --export-training-curves and --no-eval cannot be set at the same time') 102 | exit() 103 | 104 | return args -------------------------------------------------------------------------------- /ContextPose_mpi/common/generator_tds.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ChunkedGenerator: 5 | def __init__(self, batch_size, cameras, poses_3d, poses_2d, 6 | chunk_length=1, pad=0, causal_shift=0, 7 | shuffle=False, random_seed=1234, 8 | augment=False, reverse_aug= False,kps_left=None, kps_right=None, joints_left=None, joints_right=None, 9 | endless=False, out_all = False, MAE=False, tds=1): 10 | assert poses_3d is None or len(poses_3d) == len(poses_2d), (len(poses_3d), len(poses_2d)) 11 | assert cameras is None or len(cameras) == len(poses_2d) 12 | 13 | pairs = [] 14 | self.saved_index = {} 15 | start_index = 0 16 | 17 | for key in poses_2d.keys(): 18 | assert poses_3d is None or poses_3d[key].shape[0] == poses_3d[key].shape[0] 19 | n_chunks = (poses_2d[key].shape[0] + chunk_length - 1) // chunk_length 20 | offset = (n_chunks * chunk_length - poses_2d[key].shape[0]) // 2 21 | bounds = np.arange(n_chunks + 1) * chunk_length - offset 22 | augment_vector = np.full(len(bounds - 1), False, dtype=bool) 23 | reverse_augment_vector = np.full(len(bounds - 1), False, dtype=bool) 24 | keys = np.tile(np.array(key).reshape([1,3]),(len(bounds - 1),1)) 25 | pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector,reverse_augment_vector)) 26 | if reverse_aug: 27 | pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, ~reverse_augment_vector)) 28 | if augment: 29 | if reverse_aug: 30 | pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector,~reverse_augment_vector)) 31 | else: 32 | pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, reverse_augment_vector)) 33 | 34 | end_index = start_index + poses_3d[key].shape[0] 35 | self.saved_index[key] = [start_index,end_index] 36 | start_index = start_index + poses_3d[key].shape[0] 37 | 38 | 39 | if cameras is not None: 40 | self.batch_cam = np.empty((batch_size, cameras[key].shape[-1])) 41 | 42 | if poses_3d is not None: 43 | self.batch_3d = np.empty((batch_size, chunk_length, poses_3d[key].shape[-2], poses_3d[key].shape[-1])) 44 | self.batch_2d = np.empty((batch_size, chunk_length + 2 * pad, poses_2d[key].shape[-2], poses_2d[key].shape[-1])) 45 | 46 | self.num_batches = (len(pairs) + batch_size - 1) // batch_size 47 | self.batch_size = batch_size 48 | self.random = np.random.RandomState(random_seed) 49 | self.pairs = pairs 50 | self.shuffle = shuffle 51 | self.pad = pad 52 | self.causal_shift = causal_shift 53 | self.endless = endless 54 | self.state = None 55 | 56 | self.cameras = cameras 57 | if cameras is not None: 58 | self.cameras = cameras 59 | self.poses_3d = poses_3d 60 | self.poses_2d = poses_2d 61 | 62 | self.augment = augment 63 | self.kps_left = kps_left 64 | self.kps_right = kps_right 65 | self.joints_left = joints_left 66 | self.joints_right = joints_right 67 | self.out_all = out_all 68 | self.MAE = MAE 69 | self.tds = tds 70 | 71 | def num_frames(self): 72 | return self.num_batches * self.batch_size 73 | 74 | def random_state(self): 75 | return self.random 76 | 77 | def set_random_state(self, random): 78 | self.random = random 79 | 80 | def augment_enabled(self): 81 | return self.augment 82 | 83 | def next_pairs(self): 84 | if self.state is None: 85 | if self.shuffle: 86 | pairs = self.random.permutation(self.pairs) 87 | else: 88 | pairs = self.pairs 89 | return 0, pairs 90 | else: 91 | return self.state 92 | 93 | def get_batch(self, seq_i, start_3d, end_3d, flip, reverse): 94 | subject,action,cam_index = seq_i 95 | seq_name = (subject,action,int(cam_index)) 96 | start_2d = start_3d - self.pad * self.tds - self.causal_shift 97 | end_2d = end_3d + self.pad * self.tds - self.causal_shift 98 | 99 | seq_2d = self.poses_2d[seq_name].copy() 100 | low_2d = max(start_2d, 0) 101 | high_2d = min(end_2d, seq_2d.shape[0]) 102 | pad_left_2d = low_2d - start_2d 103 | pad_right_2d = end_2d - high_2d 104 | if pad_left_2d != 0: 105 | data_pad = np.repeat(seq_2d[0:1],pad_left_2d,axis=0) 106 | new_data = np.concatenate((data_pad, seq_2d[low_2d:high_2d]), axis=0) 107 | self.batch_2d = new_data[::self.tds] 108 | #self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0)), 'edge') 109 | 110 | elif pad_right_2d != 0: 111 | data_pad = np.repeat(seq_2d[seq_2d.shape[0]-1:seq_2d.shape[0]], pad_right_2d, axis=0) 112 | new_data = np.concatenate((seq_2d[low_2d:high_2d], data_pad), axis=0) 113 | self.batch_2d = new_data[::self.tds] 114 | #self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0)), 'edge') 115 | else: 116 | self.batch_2d = seq_2d[low_2d:high_2d:self.tds] 117 | 118 | if flip: 119 | self.batch_2d[ :, :, 0] *= -1 120 | self.batch_2d[ :, self.kps_left + self.kps_right] = self.batch_2d[ :, 121 | self.kps_right + self.kps_left] 122 | if reverse: 123 | self.batch_2d = self.batch_2d[::-1].copy() 124 | 125 | if not self.MAE: 126 | if self.poses_3d is not None: 127 | seq_3d = self.poses_3d[seq_name].copy() 128 | if self.out_all: 129 | low_3d = low_2d 130 | high_3d = high_2d 131 | pad_left_3d = pad_left_2d 132 | pad_right_3d = pad_right_2d 133 | else: 134 | low_3d = max(start_3d, 0) 135 | high_3d = min(end_3d, seq_3d.shape[0]) 136 | pad_left_3d = low_3d - start_3d 137 | pad_right_3d = end_3d - high_3d 138 | 139 | if pad_left_3d != 0: 140 | data_pad = np.repeat(seq_3d[0:1], pad_left_3d, axis=0) 141 | new_data = np.concatenate((data_pad, seq_3d[low_3d:high_3d]), axis=0) 142 | self.batch_3d = new_data[::self.tds] 143 | elif pad_right_3d != 0: 144 | data_pad = np.repeat(seq_3d[seq_3d.shape[0] - 1:seq_3d.shape[0]], pad_right_3d, axis=0) 145 | new_data = np.concatenate((seq_3d[low_3d:high_3d], data_pad), axis=0) 146 | self.batch_3d = new_data[::self.tds] 147 | # self.batch_3d = np.pad(seq_3d[low_3d:high_3d], 148 | # ((pad_left_3d, pad_right_3d), (0, 0), (0, 0)), 'edge') 149 | else: 150 | self.batch_3d = seq_3d[low_3d:high_3d:self.tds] 151 | 152 | if flip: 153 | self.batch_3d[ :, :, 0] *= -1 154 | self.batch_3d[ :, self.joints_left + self.joints_right] = \ 155 | self.batch_3d[ :, self.joints_right + self.joints_left] 156 | if reverse: 157 | self.batch_3d = self.batch_3d[::-1].copy() 158 | 159 | if self.cameras is not None: 160 | self.batch_cam = self.cameras[seq_name].copy() 161 | if flip: 162 | self.batch_cam[ 2] *= -1 163 | self.batch_cam[ 7] *= -1 164 | 165 | if self.MAE: 166 | return self.batch_cam, self.batch_2d.copy(), action, subject, int(cam_index) 167 | if self.poses_3d is None and self.cameras is None: 168 | return None, None, self.batch_2d.copy(), action, subject, int(cam_index) 169 | elif self.poses_3d is not None and self.cameras is None: 170 | return np.zeros(9), self.batch_3d.copy(), self.batch_2d.copy(),action, subject, int(cam_index) 171 | elif self.poses_3d is None: 172 | return self.batch_cam, None, self.batch_2d.copy(),action, subject, int(cam_index) 173 | else: 174 | return self.batch_cam, self.batch_3d.copy(), self.batch_2d.copy(),action, subject, int(cam_index) 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | --------------------------------------------------------------------------------