├── .gitignore
├── H36M-Toolbox
    ├── common
    │   ├── README.MD
    │   ├── quaternion.py
    │   ├── mocap_dataset.py
    │   ├── custom_dataset.py
    │   ├── utils.py
    │   ├── camera.py
    │   ├── skeleton.py
    │   ├── humaneva_dataset.py
    │   ├── model_stmo.py
    │   ├── model_stmo_pretrain.py
    │   ├── loss.py
    │   └── arguments.py
    ├── images
    │   └── README.MD
    ├── requirements.txt
    ├── videonames.mat
    ├── camera_data.pkl
    ├── data
    │   └── README.MD
    ├── config.ini.example
    ├── docker-compose.yml
    ├── video_to_images.py
    ├── metadata.py
    ├── README.md
    ├── extract_all.py
    ├── checksums.txt
    ├── download_all.py
    └── transform.py
├── ContextPose
    ├── mvn
    │   ├── models
    │   │   ├── networks
    │   │   │   ├── __init__.py
    │   │   │   ├── network.py
    │   │   │   ├── refineNet.py
    │   │   │   └── globalNet.py
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   ├── models.py
    │   │   │   └── default.py
    │   │   ├── cpn
    │   │   │   ├── config.py
    │   │   │   ├── test_config.py
    │   │   │   ├── train.py
    │   │   │   └── test.py
    │   │   ├── conpose.py
    │   │   └── loss.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   ├── logger.py
    │   │   └── cfg.py
    │   └── datasets
    │   │   ├── __init__.py
    │   │   ├── _init_path.py
    │   │   └── utils.py
    ├── data
    │   └── pretrained
    │   │   └── coco
    │   │       └── README.MD
    ├── requirements.txt
    ├── conda-requirements.txt
    └── experiments
    │   └── human36m
    │       └── human36m.yaml
├── ContextPose_mpi
    ├── dataset
    │   ├── README.txt
    │   ├── mpi_inf_3dhp
    │   │   ├── util
    │   │   │   ├── mpii_config_paths.m
    │   │   │   ├── mpii_get_camera_set.m
    │   │   │   ├── mpii_get_joint_set.m
    │   │   │   └── mpii_get_sequence_info.m
    │   │   ├── license.txt
    │   │   ├── .README.txt.swp
    │   │   ├── get_testset.sh
    │   │   ├── conf.ig
    │   │   ├── get_dataset.sh
    │   │   └── README.txt
    │   ├── pretrained
    │   │   └── README.txt
    │   ├── process_data.sh
    │   └── data_util
    │   │   └── video_to_images.py
    ├── checkpoint
    │   └── README.txt
    ├── 3dhp_test
    │   ├── annot-test.h5
    │   ├── mpii_3dhp_evaluation_sequencewise.xlsx
    │   ├── test_util
    │   │   ├── mpii_3D_error.m
    │   │   ├── mpii_get_activity_name.m
    │   │   ├── camera_calibration
    │   │   │   ├── ts1-4cameras.calib
    │   │   │   └── ts5-6cameras.calib
    │   │   ├── mpii_get_pck_auc_joint_groups.m
    │   │   ├── mpii_perspective_correction_code.m
    │   │   ├── mpii_test_predictions_py.m
    │   │   ├── mpii_get_joints.m
    │   │   ├── mpii_evaluate_errors.m
    │   │   └── mpii_compute_3d_pck.m
    │   └── README.txt
    ├── common
    │   ├── utils_3dhp.py
    │   ├── camera.py
    │   ├── skeleton.py
    │   ├── opt.py
    │   ├── cfg.py
    │   ├── load_data_3dhp_mae.py
    │   └── generator_tds.py
    ├── model
    │   └── conpose.py
    └── README.md
└── images
    ├── teaser.png
    └── framework.png


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .DS_Store


--------------------------------------------------------------------------------
/H36M-Toolbox/common/README.MD:
--------------------------------------------------------------------------------
1 |  
2 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/README.txt:
--------------------------------------------------------------------------------
1 | Put the datasets in this folder.


--------------------------------------------------------------------------------
/ContextPose/mvn/models/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/checkpoint/README.txt:
--------------------------------------------------------------------------------
1 | Place the model checkpoint in this folder.


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_config_paths.m:
--------------------------------------------------------------------------------
1 | mpii_data_path = '../';
2 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/images/README.MD:
--------------------------------------------------------------------------------
1 | RGB images from Human3.6M should be under this directory. 
2 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/pretrained/README.txt:
--------------------------------------------------------------------------------
1 | Put the pre-trained weights of HRNet in this folder.


--------------------------------------------------------------------------------
/images/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/images/teaser.png


--------------------------------------------------------------------------------
/ContextPose/data/pretrained/coco/README.MD:
--------------------------------------------------------------------------------
1 | Download the pretrained weights for 2D pose detectors here.
2 | 


--------------------------------------------------------------------------------
/images/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/images/framework.png


--------------------------------------------------------------------------------
/H36M-Toolbox/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.13.3
2 | tqdm==4.19.8
3 | h5py==2.7.1
4 | spacepy==0.1.6
5 | requests==2.20.0
6 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/videonames.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/H36M-Toolbox/videonames.mat


--------------------------------------------------------------------------------
/H36M-Toolbox/camera_data.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/H36M-Toolbox/camera_data.pkl


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/annot-test.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/3dhp_test/annot-test.h5


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/license.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/dataset/mpi_inf_3dhp/license.txt


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/.README.txt.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/dataset/mpi_inf_3dhp/.README.txt.swp


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/mpii_3dhp_evaluation_sequencewise.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QitaoZhao/ContextAware-PoseFormer/HEAD/ContextPose_mpi/3dhp_test/mpii_3dhp_evaluation_sequencewise.xlsx


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_3D_error.m:
--------------------------------------------------------------------------------
1 | function out_struct = mpii_3D_error(method_name, error_vector)
2 |        out_struct = struct('method', method_name, 'error', error_vector);
3 | end


--------------------------------------------------------------------------------
/H36M-Toolbox/data/README.MD:
--------------------------------------------------------------------------------
1 | Please refer to [VideoPose3D](https://github.com/facebookresearch/VideoPose3D/blob/main/DATASETS.md) to download `data_2d_h36m_cpn_ft_h36m_dbb.npz` to this directory. 
2 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/config.ini.example:
--------------------------------------------------------------------------------
1 | [General]
2 | 
3 | # Get your PHPSESSID by logging into http://vision.imar.ro/human3.6m/ and inspecting the cookies
4 | # with your web browser.
5 | PHPSESSID=xxxxxxxxxxxxxxxxxxxxxxxxxx
6 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2.3'
 2 | services:
 3 |   main:
 4 |     build: .
 5 |     volumes:
 6 |       - .:/app
 7 |       - /etc/localtime:/etc/localtime:ro
 8 |     environment:
 9 |       - PYTHONIOENCODING=utf_8
10 |     init: true
11 |     network_mode: host
12 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from mvn.datasets.human36m import Human36MMultiViewDataset as multiview_human36m
4 | from mvn.datasets.human36m import Human36MSingleViewDataset as human36m
5 | from mvn.datasets.human36m import Human36MKeypointDataset as keypoint_human36m
6 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/datasets/_init_path.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os.path as osp
 6 | import sys
 7 | 
 8 | 
 9 | def add_path(path):
10 |     if path not in sys.path:
11 |         sys.path.insert(0, path)
12 | 
13 | 
14 | this_dir = osp.dirname(__file__)
15 | 
16 | lib_path = osp.join(this_dir, '..', '..')
17 | add_path(lib_path)


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_get_activity_name.m:
--------------------------------------------------------------------------------
 1 | function [activity_names] = mpii_get_activity_name(activity_id)
 2 | 
 3 | activities{1} = 'Standing/Walking';
 4 | activities{2} = 'Exercising';
 5 | activities{3} = 'Sitting';
 6 | activities{4} = 'Reaching/Crouching';
 7 | activities{5} = 'On The Floor';
 8 | activities{6} = 'Sports';
 9 | activities{7} = 'Miscellaneous';
10 | 
11 | activity_names = activities(activity_id);
12 | end


--------------------------------------------------------------------------------
/ContextPose/mvn/models/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from .default import _C as cfg
 8 | from .default import update_config
 9 | from .models import MODEL_EXTRAS
10 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/camera_calibration/ts1-4cameras.calib:
--------------------------------------------------------------------------------
 1 | tc camera calibration v0.3
 2 | camera	0	cam_8
 3 | 	frame	0
 4 | 		sensorSize	10	10	# in mm
 5 | 		focalLength	7.32506	# in mm
 6 | 		pixelAspect	1.00044
 7 | 		centerOffset	-0.0322884	0.0929296	# in mm (positive values move right and down)
 8 | 		distortionModel	OpenCV
 9 | 		distortion		0.0 0.0 0.0 0.0 0.0
10 | 			origin	3427.28	1387.86	309.42
11 | 			up	-0.208215	0.976233	0.06014
12 | 			right	0.000575281	0.0616098	-0.9981
13 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_get_pck_auc_joint_groups.m:
--------------------------------------------------------------------------------
 1 | function [joint_groups] = mpii_get_pck_auc_joint_groups()
 2 | 
 3 | joint_groups = { %'Head', [1,17];
 4 |                  'Head', [1];
 5 |                  'Neck', [2];
 6 |                  'Shou', [3,6];
 7 |                  'Elbow', [4,7];
 8 |                  'Wrist', [5,8];
 9 |                  %'spine', [16];
10 |                  'Hip', [9,12];
11 |                  'Knee', [10,13];
12 |                  'Ankle', [11,14];
13 |                  };
14 | end


--------------------------------------------------------------------------------
/ContextPose_mpi/common/utils_3dhp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def mpii_get_sequence_info(subject_id, sequence):
 4 | 
 5 |     switcher = {
 6 |         "1 1": [6416,25],
 7 |         "1 2": [12430,50],
 8 |         "2 1": [6502,25],
 9 |         "2 2": [6081,25],
10 |         "3 1": [12488,50],
11 |         "3 2": [12283,50],
12 |         "4 1": [6171,25],
13 |         "4 2": [6675,25],
14 |         "5 1": [12820,50],
15 |         "5 2": [12312,50],
16 |         "6 1": [6188,25],
17 |         "6 2": [6145,25],
18 |         "7 1": [6239,25],
19 |         "7 2": [6320,25],
20 |         "8 1": [6468,25],
21 |         "8 2": [6054,25],
22 | 
23 |     }
24 |     return switcher.get(subject_id+" "+sequence)
25 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/get_testset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to fetch and unzip the test set  
 3 | echo "Reading configuration from ./config....." >&2
 4 | source ./conf.ig
 5 | if [[ $ready_to_download -eq 0 ]]; then
 6 |   echo "Please read the documentation and edit the config file accordingly." >&2
 7 |   exit 1
 8 | fi
 9 | source_path="http://gvv.mpi-inf.mpg.de/3dhp-dataset"
10 | if [ ! -f "./mpi_inf_3dhp_test_set.zip" ]; then
11 |   wget "$source_path/mpi_inf_3dhp_test_set.zip"  
12 | fi
13 | if [ -f "./mpi_inf_3dhp_test_set.zip" ]; then
14 |   if [ ! -d "$destination" ]; then
15 |       mkdir "$destination"
16 |   fi
17 |   unzip "./mpi_inf_3dhp_test_set.zip" -d "$destination/mpi_inf_3dhp_test_set"
18 |   rm "./mpi_inf_3dhp_test_set.zip"
19 | fi
20 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/process_data.sh:
--------------------------------------------------------------------------------
 1 | cd dataset/mpi_inf_3dhp
 2 | 
 3 | # Download raw videos and annotations
 4 | bash get_dataset.sh
 5 | bash get_testset.sh
 6 | 
 7 | mv mpi_inf_3dhp_test_set/mpi_inf_3dhp_test_set ../
 8 | rm -r mpi_inf_3dhp_test_set
 9 | 
10 | # Prepare labels
11 | cd ../../
12 | if [ ! -f "dataset/data_train_3dhp.npz" ]; then
13 |     python dataset/data_util/data_to_npz_3dhp.py
14 | fi
15 | 
16 | if [ ! -f "dataset/data_test_3dhp.npz" ]; then
17 |     python dataset/data_util/data_to_npz_3dhp_test.py
18 | fi
19 | 
20 | # Convert raw videos to images for the training set
21 | python dataset/data_util/video_to_images.py
22 | 
23 | # Crop images to a smaller size (256x192)
24 | python dataset/data_util/convert_to_small.py
25 | python dataset/data_util/convert_to_small_test.py


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_get_camera_set.m:
--------------------------------------------------------------------------------
 1 | function [camera_set] = mpii_get_camera_set(camera_set_name)
 2 |  
 3 | switch camera_set_name
 4 |     
 5 |     case 'regular'  
 6 |         camera_set = 0:13; %Cameras with regular lenses, not fisheye
 7 |     case 'relevant'
 8 |         camera_set = 0:10; %All cameras except the ceiling mounted ones
 9 | 	case 'ceiling'
10 |         camera_set = 11:13;   %Top down views
11 |     case 'vnect'
12 |         camera_set = [0, 1, 2, 4, 5, 6, 7, 8];   %Chest high, knee high and 2 cameras angled down. Use for VNect @ SIGGRAPH 17
13 |     case 'mm3d_chest'
14 |         camera_set = [0, 2, 4, 7, 8]; %Subset of chest high, used in "Monocular 3D Human Pose Estimation in-the-wild Using Improved CNN supervision"
15 |     otherwise
16 |         camera_set = [];
17 | end
18 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/conf.ig:
--------------------------------------------------------------------------------
 1 | # The data would be downloaded to this path
 2 | # Make sure you have approx 25GB space in this
 3 | # path to download the complete training set.
 4 | # The test set needs another 7GB and can be
 5 | # downloaded with get_testset.sh
 6 | destination='./'
 7 | # The subjects you want to download the train data for.
 8 | # Start with a few if all you want to do is examine the data
 9 | subjects=(1 2 3 4 5 6 7 8)
10 | # Set if you want to download the camera views
11 | # that were not used for VNect
12 | download_extra_wall_cameras=0
13 | download_extra_ceiling_cameras=0
14 | # Unset if you don't want to download the segmentation
15 | # masks for the sequences
16 | download_masks=0
17 | # Set if you agree with the license conditions and want
18 | # to proceed with downloading the dataset
19 | ready_to_download=1
20 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import json
 3 | 
 4 | 
 5 | def config_to_str(config):
 6 |     return yaml.dump(yaml.safe_load(json.dumps(config)))  # fuck yeah
 7 | 
 8 | 
 9 | class AverageMeter(object):
10 |     """Computes and stores the average and current value"""
11 |     def __init__(self):
12 |         self.reset()
13 | 
14 |     def reset(self):
15 |         self.val = 0
16 |         self.avg = 0
17 |         self.sum = 0
18 |         self.count = 0
19 | 
20 |     def update(self, val, n=1):
21 |         self.val = val
22 |         self.sum += val * n
23 |         self.count += n
24 |         self.avg = self.sum / self.count
25 | 
26 | 
27 | def calc_gradient_norm(named_parameters):
28 |     total_norm = 0.0
29 |     for name, p in named_parameters:
30 |         param_norm = p.grad.data.norm(2)
31 |         total_norm += param_norm.item() ** 2
32 | 
33 |     total_norm = total_norm ** (1. / 2)
34 | 
35 |     return total_norm
36 | 


--------------------------------------------------------------------------------
/ContextPose/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2022.9.24
 2 | charset-normalizer==3.1.0
 3 | cycler==0.11.0
 4 | easydict==1.10
 5 | einops==0.6.1
 6 | fonttools==4.39.4
 7 | idna==3.4
 8 | imageio==2.31.1
 9 | kiwisolver==1.4.4
10 | lazy_loader==0.2
11 | matplotlib==3.5.2
12 | networkx==2.8.8
13 | numpy==1.24.4
14 | opencv-python==4.6.0.66
15 | packaging==23.1
16 | Pillow==10.0.0
17 | protobuf==3.20.3
18 | pyparsing==3.0.9
19 | python-dateutil==2.8.2
20 | PyWavelets==1.3.0
21 | PyYAML==6.0
22 | requests==2.31.0
23 | scikit-image==0.20.0
24 | scipy==1.9.1
25 | six==1.16.0
26 | tensorboardX==2.6
27 | tifffile==2023.7.10
28 | timm==0.6.7
29 | torch @ http://download.pytorch.org/whl/cu113/torch-1.11.0%2Bcu113-cp38-cp38-linux_x86_64.whl#sha256=b6a799bdb6ee3d914e5e62bddb4276d4a10248c1af4f2d217738e5f9ee27485b
30 | torchvision @ http://download.pytorch.org/whl/cu113/torchvision-0.12.0%2Bcu113-cp38-cp38-linux_x86_64.whl#sha256=37133e8c5b0ec2f01999e59116f6d0e36d9afb1c7f8f58bd0c3dc8996f835419
31 | tqdm==4.66.1
32 | typing_extensions==4.6.2
33 | urllib3==2.0.2
34 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_perspective_correction_code.m:
--------------------------------------------------------------------------------
 1 | test_subject_id = [1,2,3,4,5,6];
 2 | focalL{1} = (2048/10)*7.320339203; % res/sensorsize*focalLengthMM
 3 | focalL{2} = focalL{1};
 4 | focalL{3} = focalL{1};
 5 | focalL{4} = focalL{1};
 6 | focalL{5} = (1920/10)*8.770747185; % res/sensorsize*focalLengthMM
 7 | focalL{6} = focalL{5};
 8 | 
 9 | for ts = 1:6
10 | 
11 |            %Fancy predictions here: predict_2d and predict_3d. predict_2d is in the uncropped(?) image space
12 | 
13 |             focalLengthInPX = focalL{ts};
14 | 
15 |             resolutionXInPX = image_size{ts}(2);  %I can't seem to remember which one is x or why. Try both until something works :)
16 |             resolutionYInPX = image_size{ts}(1);
17 |             principalPointX = resolutionXInPX/2;
18 |             principalPointY = resolutionYInPX/2;
19 |             center = predict_2d(15,:) - [principalPointX, principalPointY]; % (pelvis location)
20 |             R = mpii_perspective_correction(center(1), 0, focalLengthInPX);
21 |             predict_3d = R * predict_3d;
22 | end


--------------------------------------------------------------------------------
/ContextPose/mvn/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | 
 5 | class Logger():
 6 | 	def __init__(self, log_path, level="DEBUG"):
 7 | 		self.logger = logging.getLogger()
 8 | 		self.logger.setLevel(level)
 9 | 		self.log_path = log_path
10 | 		self.add_handler()
11 | 
12 | 	def console_handler(self,level="DEBUG"):
13 | 		console_handler = logging.StreamHandler()
14 | 		console_handler.setLevel(level)
15 | 
16 | 		console_handler.setFormatter(self.get_formatter()[0])
17 | 
18 | 		return console_handler
19 | 
20 | 	def file_handler(self, level="DEBUG"):
21 | 			file_handler = logging.FileHandler(os.path.join(self.log_path, "log.txt"),mode="a",encoding="utf-8")
22 | 			file_handler.setLevel(level)
23 | 
24 | 			file_handler.setFormatter(self.get_formatter()[1])
25 | 
26 | 			return file_handler
27 | 
28 | 	def get_formatter(self):
29 | 		console_fmt = logging.Formatter(fmt="%(asctime)s: %(message)s")
30 | 		file_fmt = logging.Formatter(fmt="%(asctime)s: %(message)s")
31 | 
32 | 		return console_fmt,file_fmt
33 | 
34 | 	def add_handler(self):
35 | 		self.logger.addHandler(self.console_handler())
36 | 		self.logger.addHandler(self.file_handler())
37 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/quaternion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | import torch
 9 | 
10 | def qrot(q, v):
11 |     """
12 |     Rotate vector(s) v about the rotation described by quaternion(s) q.
13 |     Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
14 |     where * denotes any number of dimensions.
15 |     Returns a tensor of shape (*, 3).
16 |     """
17 |     assert q.shape[-1] == 4
18 |     assert v.shape[-1] == 3
19 |     assert q.shape[:-1] == v.shape[:-1]
20 | 
21 |     qvec = q[..., 1:]
22 |     uv = torch.cross(qvec, v, dim=len(q.shape)-1)
23 |     uuv = torch.cross(qvec, uv, dim=len(q.shape)-1)
24 |     return (v + 2 * (q[..., :1] * uv + uuv))
25 |     
26 |     
27 | def qinverse(q, inplace=False):
28 |     # We assume the quaternion to be normalized
29 |     if inplace:
30 |         q[..., 1:] *= -1
31 |         return q
32 |     else:
33 |         w = q[..., :1]
34 |         xyz = q[..., 1:]
35 |         return torch.cat((w, -xyz), dim=len(q.shape)-1)


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/README.txt:
--------------------------------------------------------------------------------
 1 | Details:
 2 | The test set has 6 sequences and a dedicated folder for each sequence.
 3 | Each folder contains a .mat file with the following information
 4 | 
 5 | valid_frame: Indicates whether the frame is valid or not. Invalid frames
 6 | won't be used for evaluation. Refer to mpii_test_predictions.m for more.
 7 | 
 8 | activity_annnotation: Activity annotations per frame, used for
 9 | generating activitywise error reports
10 | 
11 | univ_annot3: A 3x17x1xn matrix containing the 3D annotations in mm for
12 | 17 joints. The annotations are scaled to the height of the universal
13 | skeleton used by Human3.6m. The order and the names of the joints can be
14 | observed using [~,~,~,joint_names] = mpii_get_joints('relevant');
15 | 
16 | The file mpii_test_predictions.m should be a helpful starting point.
17 | Additionally, information about the crops (the original size in the frame)
18 | is available in the mat/zip file attached with the email. 
19 | 
20 | If you want to evaluate by scene setting, you can use the sequencewise evaluation
21 | to convert to these numbers by doing  
22 | #1:Studio with Green Screen (TS1*603 + TS2 *540)/ (603+540)    
23 | #2:Studio without Green Screen (TS3*505+TS4*553)/(505+553)   
24 | #3:Outdoor (TS5*276+TS6*452)/(276+452)
25 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/data_util/video_to_images.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os.path as osp
 3 | from scipy.io import loadmat
 4 | from subprocess import call
 5 | from os import makedirs
 6 | 
 7 | 
 8 | subject_list = [1, 2, 3, 4, 5, 6, 7, 8]
 9 | sequence_list = [1, 2]
10 | camera_list = [0, 1, 2, 4, 5, 6, 7, 8]
11 | 
12 | makedirs('dataset/mpi_inf_3dhp/images', exist_ok=True)
13 | 
14 | cnt = 0
15 | for s in subject_list:
16 |     for se in sequence_list:
17 |         for c in camera_list:
18 |             subdir_format = 's_{:02d}_seq_{:02d}_ca_{:02d}'
19 | 
20 |             subdir = subdir_format.format(s, se, c)
21 |             makedirs(osp.join('dataset/mpi_inf_3dhp/images', subdir), exist_ok=True)
22 | 
23 |             fileformat = 'dataset/mpi_inf_3dhp/images' + '/' + subdir + '/' + subdir + '_%06d.jpg'
24 | 
25 |             videopath = 'dataset/mpi_inf_3dhp/S{:01d}/Seq{:01d}/imageSequence/video_{:01d}.avi'.format(s, se, c)
26 |             # print(videoname.split('.')[0])
27 |             subject = 'S' + str(s)
28 | 
29 |             print(videopath)
30 |             cnt += 1
31 |             call([
32 |                 'ffmpeg',
33 |                 '-nostats',
34 |                 '-i', videopath,
35 |                 '-qscale:v', '3',
36 |                 fileformat
37 |                     ])
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/cpn/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | def add_pypath(path):
 7 |     if path not in sys.path:
 8 |         sys.path.insert(0, path)
 9 |         
10 | class Config:
11 |     cur_dir = os.path.dirname(os.path.abspath(__file__))
12 |     this_dir_name = cur_dir.split('/')[-1]
13 |     root_dir = os.path.join(cur_dir, '..')
14 | 
15 |     model = 'CPN50'
16 | 
17 |     lr = 5e-4
18 |     lr_gamma = 0.5
19 |     lr_dec_epoch = list(range(6,40,6))
20 | 
21 |     batch_size = 32
22 |     weight_decay = 1e-5
23 | 
24 |     num_class = 17
25 |     img_path = os.path.join(root_dir, 'data', 'COCO2017', 'train2017')
26 |     symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16)]
27 |     bbox_extend_factor = (0.1, 0.15) # x, y
28 | 
29 |     # data augmentation setting
30 |     scale_factor=(0.7, 1.35)
31 |     rot_factor=45
32 | 
33 |     pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB
34 |     data_shape = (256, 192)
35 |     output_shape = (64, 48)
36 |     gaussain_kernel = (7, 7)
37 |     
38 |     gk15 = (15, 15)
39 |     gk11 = (11, 11)
40 |     gk9 = (9, 9)
41 |     gk7 = (7, 7)
42 | 
43 |     gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'COCO_2017_train.json')
44 | 
45 | cfg = Config()
46 | add_pypath(cfg.root_dir)
47 | 
48 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/networks/network.py:
--------------------------------------------------------------------------------
 1 | from .resnet import *
 2 | import torch.nn as nn
 3 | from .globalNet import globalNet
 4 | from .refineNet import refineNet
 5 | 
 6 | __all__ = ['CPN50', 'CPN101']
 7 | 
 8 | class CPN(nn.Module):
 9 |     def __init__(self, resnet, output_shape, num_class, pretrained=True):
10 |         super(CPN, self).__init__()
11 |         channel_settings = [2048, 1024, 512, 256]
12 |         self.resnet = resnet
13 |         self.global_net = globalNet(channel_settings, output_shape, num_class)
14 |         self.refine_net = refineNet(channel_settings[-1], output_shape, num_class)
15 | 
16 |     def forward(self, x):
17 |         res_out = self.resnet(x)
18 |         # global_fms, global_outs = self.global_net(res_out)
19 |         global_fms = self.global_net(res_out)
20 |         refine_out = self.refine_net(global_fms)
21 | 
22 |         return refine_out
23 |         # return global_outs, refine_out
24 | 
25 | def CPN50(out_size,num_class,pretrained=True):
26 |     res50 = resnet50(pretrained=pretrained)
27 |     model = CPN(res50, output_shape=out_size,num_class=num_class, pretrained=pretrained)
28 |     return model
29 | 
30 | def CPN101(out_size,num_class,pretrained=True):
31 |     res101 = resnet101(pretrained=pretrained)
32 |     model = CPN(res101, output_shape=out_size,num_class=num_class, pretrained=pretrained)
33 |     return model
34 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/cpn/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | def add_pypath(path):
 7 |     if path not in sys.path:
 8 |         sys.path.insert(0, path)
 9 |         
10 | class Config:
11 |     cur_dir = os.path.dirname(os.path.abspath(__file__))
12 |     this_dir_name = cur_dir.split('/')[-1]
13 |     root_dir = os.path.join(cur_dir, '..')
14 | 
15 |     model = 'CPN50' # option 'CPN50', 'CPN101'
16 | 
17 |     num_class = 17
18 |     img_path = os.path.join(root_dir, 'data', 'COCO2017', 'val2017')
19 |     symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16)]
20 |     bbox_extend_factor = (0.1, 0.15) # x, y
21 | 
22 |     pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB
23 |     data_shape = (256, 192)
24 |     output_shape = (64, 48)
25 | 
26 |     use_GT_bbox = True
27 |     if use_GT_bbox:
28 |         gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'COCO_2017_val.json')
29 |     else:
30 |         # if False, make sure you have downloaded the val_dets.json and place it into annotation folder
31 |         gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'val_dets.json')
32 |     ori_gt_path = os.path.join(root_dir, 'data', 'COCO2017', 'annotations', 'person_keypoints_val2017.json')
33 | 
34 | cfg = Config()
35 | add_pypath(cfg.root_dir)
36 | add_pypath(os.path.join(cfg.root_dir, 'cocoapi/PythonAPI'))


--------------------------------------------------------------------------------
/ContextPose_mpi/model/conpose.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import numpy as np
 3 | import pickle
 4 | import random
 5 | from time import time
 6 | 
 7 | import torch
 8 | from torch import nn
 9 | import torch.nn.functional as F
10 | 
11 | from model import pose_hrnet
12 | from model.pose_dformer import PoseTransformer
13 | 
14 | 
15 | class VolumetricTriangulationNet(nn.Module):
16 |     def __init__(self, config, device='cuda:0'):
17 |         super().__init__()
18 | 
19 |         self.num_joints = config.model.backbone.num_joints
20 | 
21 |         self.backbone = pose_hrnet.get_pose_net(config.model.backbone)
22 | 
23 |         if config.model.backbone.fix_weights:
24 |             print("model backbone weights are fixed")
25 |             for p in self.backbone.parameters():
26 |                 p.requires_grad = False
27 | 
28 |         self.volume_net = PoseTransformer(config.model.poseformer)
29 | 
30 | 
31 |     def forward(self, images, keypoints_2d_cpn, keypoints_2d_cpn_crop):
32 |         device = images.device
33 |         images = images.permute(0, 3, 1, 2).contiguous()
34 | 
35 |         keypoints_2d_cpn_crop[..., :2] /= torch.tensor([192//2, 256//2], device=device)
36 |         keypoints_2d_cpn_crop[..., :2] -= torch.tensor([1, 1], device=device)
37 | 
38 |         # forward backbone
39 |         features_list = self.backbone(images) 
40 |         keypoints_3d = self.volume_net(keypoints_2d_cpn, keypoints_2d_cpn_crop, features_list)
41 | 
42 |         return keypoints_3d
43 | 
44 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/mocap_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | import numpy as np
 9 | from common.skeleton import Skeleton
10 | 
11 | class MocapDataset:
12 |     def __init__(self, fps, skeleton):
13 |         self._skeleton = skeleton
14 |         self._fps = fps
15 |         self._data = None # Must be filled by subclass
16 |         self._cameras = None # Must be filled by subclass
17 |     
18 |     def remove_joints(self, joints_to_remove):
19 |         kept_joints = self._skeleton.remove_joints(joints_to_remove)
20 |         for subject in self._data.keys():
21 |             for action in self._data[subject].keys():
22 |                 s = self._data[subject][action]
23 |                 if 'positions' in s:
24 |                     s['positions'] = s['positions'][:, kept_joints]
25 |                 
26 |         
27 |     def __getitem__(self, key):
28 |         return self._data[key]
29 |         
30 |     def subjects(self):
31 |         return self._data.keys()
32 |     
33 |     def fps(self):
34 |         return self._fps
35 |     
36 |     def skeleton(self):
37 |         return self._skeleton
38 |         
39 |     def cameras(self):
40 |         return self._cameras
41 |     
42 |     def supports_semi_supervised(self):
43 |         # This method can be overridden
44 |         return False


--------------------------------------------------------------------------------
/H36M-Toolbox/video_to_images.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os.path as osp
 3 | from scipy.io import loadmat
 4 | from subprocess import call
 5 | from os import makedirs
 6 | 
 7 | 
 8 | subject_list = [1, 5, 6, 7, 8, 9, 11]
 9 | action_list = [x for x in range(2, 17)]
10 | subaction_list = [x for x in range(1, 3)]
11 | camera_list = [x for x in range(1, 5)]
12 | 
13 | 
14 | from metadata import load_h36m_metadata
15 | metadata = load_h36m_metadata()
16 | 
17 | makedirs('images', exist_ok=True)
18 | 
19 | 
20 | cnt = 0
21 | for s in subject_list:
22 |     for a in action_list:
23 |         for sa in subaction_list:
24 |             for c in camera_list:
25 |                 subdir_format = 's_{:02d}_act_{:02d}_subact_{:02d}_ca_{:02d}'
26 | 
27 |                 subdir = subdir_format.format(s, a, sa, c)
28 |                 makedirs(osp.join('images', subdir), exist_ok=True)
29 | 
30 |                 fileformat = 'images' + '/' + subdir + '/' + subdir + '_%06d.jpg'
31 | 
32 |                 basename = metadata.get_base_filename('S{:d}'.format(s), '{:d}'.format(a), '{:d}'.format(sa), metadata.camera_ids[c-1])
33 |                 videoname = basename + '.mp4'
34 |                 subject = 'S' + str(s)
35 |                 videopath = osp.join('extracted', subject, 'Videos', videoname)
36 | 
37 |                 print(videopath)
38 |                 cnt += 1
39 |                 call([
40 |                     'ffmpeg',
41 |                     '-nostats',
42 |                     '-i', videopath,
43 |                     '-qscale:v', '3',
44 |                     fileformat
45 |                         ])
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/ContextPose/conda-requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | _libgcc_mutex=0.1=main
 5 | _openmp_mutex=5.1=1_gnu
 6 | ca-certificates=2023.08.22=h06a4308_0
 7 | certifi=2022.9.24=pypi_0
 8 | charset-normalizer=3.1.0=pypi_0
 9 | cycler=0.11.0=pypi_0
10 | easydict=1.10=pypi_0
11 | einops=0.6.1=pypi_0
12 | fonttools=4.39.4=pypi_0
13 | idna=3.4=pypi_0
14 | imageio=2.31.1=pypi_0
15 | kiwisolver=1.4.4=pypi_0
16 | lazy-loader=0.2=pypi_0
17 | ld_impl_linux-64=2.38=h1181459_1
18 | libffi=3.3=he6710b0_2
19 | libgcc-ng=11.2.0=h1234567_1
20 | libgomp=11.2.0=h1234567_1
21 | libstdcxx-ng=11.2.0=h1234567_1
22 | matplotlib=3.5.2=pypi_0
23 | ncurses=6.4=h6a678d5_0
24 | networkx=2.8.8=pypi_0
25 | numpy=1.24.4=pypi_0
26 | opencv-python=4.6.0.66=pypi_0
27 | openssl=1.1.1w=h7f8727e_0
28 | packaging=23.1=pypi_0
29 | pillow=10.0.0=pypi_0
30 | pip=23.2.1=py38h06a4308_0
31 | protobuf=3.20.3=pypi_0
32 | pyparsing=3.0.9=pypi_0
33 | python=3.8.10=h12debd9_8
34 | python-dateutil=2.8.2=pypi_0
35 | pywavelets=1.3.0=pypi_0
36 | pyyaml=6.0=pypi_0
37 | readline=8.2=h5eee18b_0
38 | requests=2.31.0=pypi_0
39 | scikit-image=0.20.0=pypi_0
40 | scipy=1.9.1=pypi_0
41 | setuptools=68.0.0=py38h06a4308_0
42 | six=1.16.0=pypi_0
43 | sqlite=3.41.2=h5eee18b_0
44 | tensorboardx=2.6=pypi_0
45 | tifffile=2023.7.10=pypi_0
46 | timm=0.6.7=pypi_0
47 | tk=8.6.12=h1ccaba5_0
48 | torch=1.11.0+cu113=pypi_0
49 | torchvision=0.12.0+cu113=pypi_0
50 | tqdm=4.66.1=pypi_0
51 | typing-extensions=4.6.2=pypi_0
52 | urllib3=2.0.2=pypi_0
53 | wheel=0.41.2=py38h06a4308_0
54 | xz=5.4.2=h5eee18b_0
55 | zlib=1.2.13=h5eee18b_0
56 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import xml.etree.ElementTree as ET
 4 | 
 5 | 
 6 | class H36M_Metadata:
 7 |     def __init__(self, metadata_file):
 8 |         self.subjects = []
 9 |         self.sequence_mappings = {}
10 |         self.action_names = {}
11 |         self.camera_ids = []
12 | 
13 |         tree = ET.parse(metadata_file)
14 |         root = tree.getroot()
15 | 
16 |         for i, tr in enumerate(root.find('mapping')):
17 |             if i == 0:
18 |                 _, _, *self.subjects = [td.text for td in tr]
19 |                 self.sequence_mappings = {subject: {} for subject in self.subjects}
20 |             elif i < 33:
21 |                 action_id, subaction_id, *prefixes = [td.text for td in tr]
22 |                 for subject, prefix in zip(self.subjects, prefixes):
23 |                     self.sequence_mappings[subject][(action_id, subaction_id)] = prefix
24 | 
25 |         for i, elem in enumerate(root.find('actionnames')):
26 |             action_id = str(i + 1)
27 |             self.action_names[action_id] = elem.text
28 | 
29 |         self.camera_ids = [elem.text for elem in root.find('dbcameras/index2id')]
30 | 
31 |     def get_base_filename(self, subject, action, subaction, camera):
32 |         return '{}.{}'.format(self.sequence_mappings[subject][(action, subaction)], camera)
33 | 
34 | 
35 | def load_h36m_metadata():
36 |     return H36M_Metadata('metadata.xml')
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     metadata = load_h36m_metadata()
41 |     print(metadata.subjects)
42 |     print(metadata.sequence_mappings)
43 |     print(metadata.action_names)
44 |     print(metadata.camera_ids)
45 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/conpose.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from mvn.models import pose_hrnet
 5 | from mvn.models.networks import network
 6 | from mvn.models.cpn.test_config import cfg
 7 | from mvn.models.pose_dformer import PoseTransformer
 8 | 
 9 | 
10 | class CA_PF(nn.Module):
11 |     def __init__(self, config, device='cuda:0'):
12 |         super().__init__()
13 | 
14 |         self.num_joints = config.model.backbone.num_joints
15 | 
16 |         if config.model.backbone.type in ['hrnet_32', 'hrnet_48']:
17 |             self.backbone = pose_hrnet.get_pose_net(config.model.backbone)
18 | 
19 |         elif config.model.backbone.type == 'cpn':
20 |             self.backbone = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False)
21 | 
22 |         if config.model.backbone.fix_weights:
23 |             print("model backbone weights are fixed")
24 |             for p in self.backbone.parameters():
25 |                 p.requires_grad = False
26 | 
27 |         self.volume_net = PoseTransformer(config.model.poseformer, backbone=config.model.backbone.type)
28 | 
29 | 
30 |     def forward(self, images, keypoints_2d_cpn, keypoints_2d_cpn_crop):
31 |         device = keypoints_2d_cpn.device
32 |         images = images.permute(0, 3, 1, 2).contiguous()
33 | 
34 |         keypoints_2d_cpn_crop[..., :2] /= torch.tensor([192//2, 256//2], device=device)
35 |         keypoints_2d_cpn_crop[..., :2] -= torch.tensor([1, 1], device=device)
36 | 
37 |         # forward backbone
38 |         features_list = self.backbone(images) 
39 | 
40 |         keypoints_3d = self.volume_net(keypoints_2d_cpn, keypoints_2d_cpn_crop, features_list)
41 | 
42 |         return keypoints_3d
43 | 
44 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/README.md:
--------------------------------------------------------------------------------
 1 | This code is built on top of
 2 | https://github.com/anibali/h36m-fetch
 3 | 
 4 | 
 5 | [Human3.6M](http://vision.imar.ro/human3.6m/description.php) is a 3D
 6 | human pose dataset containing 3.6 million human poses and corresponding
 7 | images. The scripts in this repository make it easy to download,
 8 | extract, and preprocess the images and annotations from Human3.6M.
 9 | 
10 | **Please do not ask me for a copy of the Human3.6M dataset. I do not own
11 | the data, nor do I have permission to redistribute it. Please visit
12 | http://vision.imar.ro/human3.6m/ in order to request access and contact
13 | the maintainers of the dataset.**
14 | 
15 | ## Requirements
16 | 
17 | * Python 3
18 | * [`axel`](https://github.com/axel-download-accelerator/axel)
19 | * CDF (https://www.scivision.dev/spacepy-install-anaconda-python/)
20 | 
21 | ## Usage
22 | 
23 | 1. Firstly, you will need to create an account at
24 |    http://vision.imar.ro/human3.6m/ to gain access to the dataset.
25 | 2. Once your account has been approved, log in and inspect your cookies
26 |    to find your PHPSESSID.
27 | 3. Copy the configuration file `config.ini.example` to `config.ini`
28 |    and fill in your PHPSESSID.
29 | 4. download_all.py -> extract_all.py -> video_to_images.py -> generate_labels.py
30 | 
31 | 
32 | ## License
33 | 
34 | The code in this repository is licensed under the terms of the
35 | [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0).
36 | 
37 | Please read the
38 | [license agreement](http://vision.imar.ro/human3.6m/eula.php) for the
39 | Human3.6M dataset itself, which specifies citations you must make when
40 | using the data in your own research. The file `metadata.xml` is directly
41 | copied from the "Visualisation and large scale prediction software"
42 | bundle from the Human3.6M website, and is subject to the same license
43 | agreement.
44 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/extract_all.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from os import path, makedirs
 4 | import tarfile
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | subjects = ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11']
 9 | 
10 | 
11 | # https://stackoverflow.com/a/6718435
12 | def commonprefix(m):
13 |     s1 = min(m)
14 |     s2 = max(m)
15 |     for i, c in enumerate(s1):
16 |         if c != s2[i]:
17 |             return s1[:i]
18 |     return s1
19 | 
20 | 
21 | def extract_tgz(tgz_file, dest):
22 |     if path.exists(dest):
23 |         return
24 |     with tarfile.open(tgz_file, 'r:gz') as tar:
25 |         members = [m for m in tar.getmembers() if m.isreg()]
26 |         member_dirs = [path.dirname(m.name).split(path.sep) for m in members]
27 |         base_path = path.sep.join(commonprefix(member_dirs))
28 |         for m in members:
29 |             m.name = path.relpath(m.name, base_path)
30 |         tar.extractall(dest)
31 | 
32 | 
33 | def extract_all():
34 |   for subject_id in tqdm(subjects, ascii=True):
35 |       out_dir = path.join('extracted', subject_id)
36 |       makedirs(out_dir, exist_ok=True)
37 |       extract_tgz('archives/Poses_D2_Positions_{}.tgz'.format(subject_id),
38 |                   path.join(out_dir, 'Poses_D2_Positions'))
39 |       extract_tgz('archives/Poses_D3_Positions_{}.tgz'.format(subject_id),
40 |                   path.join(out_dir, 'Poses_D3_Positions')),
41 |       extract_tgz('archives/Poses_D3_Positions_mono_{}.tgz'.format(subject_id),
42 |                   path.join(out_dir, 'Poses_D3_Positions_mono')),
43 |       extract_tgz('archives/Poses_D3_Positions_mono_universal_{}.tgz'.format(subject_id),
44 |                   path.join(out_dir, 'Poses_D3_Positions_mono_universal')),
45 |       extract_tgz('archives/Videos_{}.tgz'.format(subject_id),
46 |                   path.join(out_dir, 'Videos'))
47 | 
48 | 
49 | if __name__ == '__main__':
50 |   extract_all()
51 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_get_joint_set.m:
--------------------------------------------------------------------------------
 1 | function [joint_idx, joint_parents_o1, joint_parents_o2, joint_names] = mpii_get_joint_set(joint_set_name)
 2 |                    
 3 |     all_joint_names = {'spine3', 'spine4', 'spine2', 'spine', 'pelvis', ...     %5       
 4 |         'neck', 'head', 'head_top', 'left_clavicle', 'left_shoulder', 'left_elbow', ... %11
 5 |        'left_wrist', 'left_hand',  'right_clavicle', 'right_shoulder', 'right_elbow', 'right_wrist', ... %17
 6 |        'right_hand', 'left_hip', 'left_knee', 'left_ankle', 'left_foot', 'left_toe', ...        %23   
 7 |        'right_hip' , 'right_knee', 'right_ankle', 'right_foot', 'right_toe'}; 
 8 |       
 9 |   %The O1 and O2 indices are relaive to the joint_idx, regardless of the joint set 
10 |                    
11 | switch joint_set_name
12 |     case 'all'
13 |         joint_idx = 1:28;              
14 |         joint_parents_o1 = [3, 1, 4, 5, 5, 2, 6, 7, 6, 9, 10, 11, 12, 6, 14, 15, 16, 17, 5, 19, 20, 21, 22, 5, 24, 25, 26, 27 ];
15 |         joint_parents_o2 = [4, 3, 5, 5, 5, 1, 2, 6, 2, 6, 9, 10, 11, 2, 6, 14, 15, 16, 4, 5, 19, 20, 21, 4, 5, 24, 25, 26];
16 |         joint_names = all_joint_names;
17 |         
18 |     case 'relevant' %Human3.6m compatible joint set in Our order
19 |         joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7];
20 |         joint_parents_o1 = [ 2, 16, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15, 15, 2];
21 |         joint_parents_o2 = [ 16, 15, 16, 2, 3, 16, 2, 6, 16, 15, 9, 16, 15, 12, 15, 15, 16];
22 |         joint_names = all_joint_names(joint_idx);
23 |     case 'extended' %Human3.6m compatible joint set in Our order + End effectors for Hands and Feet
24 |         joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7, 18, 13, 28, 23];
25 |         joint_parents_o1 = [ 2, 16, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15, 15, 2, 5, 8, 11, 14];
26 |         joint_parents_o2 = [ 16, 15, 16, 2, 3, 16, 2, 6, 16, 15, 9, 16, 15, 12, 15, 15, 16, 4, 7, 10, 13];
27 |         joint_names = all_joint_names(joint_idx);
28 |     otherwise
29 | end
30 | end
31 | 


--------------------------------------------------------------------------------
/ContextPose/experiments/human36m/human36m.yaml:
--------------------------------------------------------------------------------
 1 | title: "human36m"
 2 | kind: "human36m"
 3 | azureroot: ""
 4 | batch_output: true
 5 | vis_freq: 1000
 6 | vis_n_elements: 10
 7 | 
 8 | model:  
 9 |   image_shape: [192, 256]
10 | 
11 |   init_weights: false
12 |   checkpoint: ""
13 | 
14 |   backbone:
15 |     type: "hrnet_32"  # ["hrnet_48", "cpn"]
16 |     num_final_layer_channel: 17
17 |     num_joints: 17
18 |     num_layers: 152
19 |     
20 |     init_weights: true
21 |     fix_weights: true
22 |     checkpoint: "data/pretrained/coco/pose_hrnet_w32_256x192.pth"
23 |     # checkpoint: "data/pretrained/coco/pose_hrnet_w48_256x192.pth"
24 |     # checkpoint: "data/pretrained/coco/CPN50_256x192.pth.tar"
25 | 
26 |   poseformer:
27 |     embed_dim_ratio: 128
28 |     depth: 4
29 | 
30 | loss:
31 |   criterion: "MPJPE"
32 |   scale_keypoints_3d: 0.1
33 | 
34 |   use_volumetric_ce_loss: true
35 |   volumetric_ce_loss_weight: 0.01
36 | 
37 |   use_global_attention_loss: True
38 |   global_attention_loss_weight: 1000000
39 | 
40 | dataset:
41 |   kind: "human36m"
42 |   data_format: ""
43 |   root: "../H36M-Toolbox/images/"
44 |   train_labels_path: "data/h36m_train.pkl"
45 |   val_labels_path: "data/h36m_validation.pkl"
46 |   
47 | train:
48 |   n_objects_per_epoch: 15000
49 |   n_epochs: 9999
50 | 
51 |   batch_size: 512  # 512 for other backbones, 256 for cpn
52 | 
53 |   optimizer: 'Adam'
54 |   backbone_lr: 0.0
55 |   backbone_lr_step: [1000]
56 |   backbone_lr_factor: 0.1
57 |   process_features_lr: 0.001
58 |   volume_net_lr: 0.00064  # 0.00032 for cpn
59 |   volume_net_lr_decay: 0.99
60 |   volume_net_lr_step: [1000]
61 |   volume_net_lr_factor: 0.5
62 | 
63 |   with_damaged_actions: true
64 |   undistort_images: true
65 | 
66 |   scale_bbox: 1.0
67 | 
68 |   shuffle: true
69 |   randomize_n_views: true
70 |   min_n_views: 1
71 |   max_n_views: 1
72 |   num_workers: 14
73 | 
74 | val:
75 |   batch_size: 512  # 512 for fixed backbone, 256 for cpn
76 | 
77 |   flip_test: true
78 |   with_damaged_actions: true
79 |   undistort_images: true
80 | 
81 |   scale_bbox: 1.0
82 | 
83 |   shuffle: false
84 |   randomize_n_views: true
85 |   min_n_views: 1
86 |   max_n_views: 1
87 |   num_workers: 14
88 |   retain_every_n_frames_in_test: 1


--------------------------------------------------------------------------------
/ContextPose_mpi/README.md:
--------------------------------------------------------------------------------
 1 | ## MPI-INF-3DHP
 2 | 
 3 | We heavily borrowed code from [P-STMO](https://github.com/paTRICK-swk/P-STMO) to train and evaluate our model on MPI-INF-3DHP.
 4 | 
 5 | **Note:** We did not use Deformable Context Extraction for this dataset as our input is ground truth 2D keypoint.
 6 | 
 7 | ### Dataset Preparation
 8 | 
 9 | 1. Download and pre-process data by running (which may take a while to complete):
10 | 
11 |     ~~~shell
12 |     bash dataset/process_data.sh
13 |     ~~~
14 | 
15 |     This handles (1) Data download, (2) Extracting labels, and (3) Processing raw videos. 
16 | 
17 | 2. Download (COCO) pre-trained weights for HRNet-32/HRNet-48 from https://drive.google.com/drive/folders/1nzM_OBV9LbAEA7HClC0chEyf_7ECDXYA and place it under `dataset/pretrained/`.
18 | 
19 | 
20 | 3. Your `dataset` directory should look like this if you follow the previous steps correctly.
21 | 
22 | ```bash
23 | dataset/ 
24 | ├── process_data.sh
25 | ├── data_train_3dhp.npz
26 | ├── data_test_3dhp.npz
27 | ├── data_util/
28 | └── mpi_inf_3dhp/
29 |     ├── ...
30 |     └── images/
31 | └── mpi_inf_3dhp_test_set/
32 |     ├── ...
33 |     └── images/
34 | └── pretrained/
35 |     ├── pose_hrnet_w32_256x192.pth
36 |     └── pose_hrnet_w48_256x192.pth
37 | ```
38 | 
39 | ### Train
40 | 
41 | Use the following command to train our HRNet-32 model:
42 | 
43 | ```
44 | python run_3dhp.py -f 1 -b 160 --train 1 --lr 0.0007 -lrd 0.97 --backbone hrnet_32
45 | ```
46 | 
47 | Similarly, for HRNet-48, run the following command:
48 | 
49 | ```
50 | python run_3dhp.py -f 1 -b 160 --train 1 --lr 0.0007 -lrd 0.97 --backbone hrnet_48
51 | ```
52 | 
53 | ### Evaluation
54 | 
55 | A simple evaluation can be done by running:
56 | 
57 | ```
58 | python run_3dhp.py -f 1 -b 160 --train 0 --reload 1 --backbone hrnet_32
59 | ```
60 | 
61 | Likewise, run this for the HRNet-48 model:
62 | 
63 | ```
64 | python run_3dhp.py -f 1 -b 160 --train 0 --reload 1 --backbone hrnet_48
65 | ```
66 | 
67 | Our checkpoints are released [here,](https://drive.google.com/drive/folders/1O_i3OUTnqlkLWFu_3WKPU7YepWhItd59?usp=drive_link) and we assume you placed them (`HRNet_32_64_no_refine_24_3214.pth` or `HRNet_48_96_no_refine_45_3125.pth`) under `checkpoint/`. For more metrics (e.g., PCK), please follow the instructions in the [original repo](https://github.com/paTRICK-swk/P-STMO?tab=readme-ov-file#mpi-inf-3dhp).
68 | 
69 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/config/models.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from yacs.config import CfgNode as CN
12 | 
13 | 
14 | # pose_resnet related params
15 | POSE_RESNET = CN()
16 | POSE_RESNET.NUM_LAYERS = 50
17 | POSE_RESNET.DECONV_WITH_BIAS = False
18 | POSE_RESNET.NUM_DECONV_LAYERS = 3
19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
21 | POSE_RESNET.FINAL_CONV_KERNEL = 1
22 | POSE_RESNET.PRETRAINED_LAYERS = ['*']
23 | 
24 | # pose_multi_resoluton_net related params
25 | POSE_HIGH_RESOLUTION_NET = CN()
26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64
28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
29 | 
30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN()
31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64]
35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC'
36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
37 | 
38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN()
39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128]
43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC'
44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
45 | 
46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN()
47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC'
52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
53 | 
54 | 
55 | MODEL_EXTRAS = {
56 |     'pose_resnet': POSE_RESNET,
57 |     'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET,
58 | }
59 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/checksums.txt:
--------------------------------------------------------------------------------
 1 | 69e038858ace96ba5f6c5ccea52e95e8  archives/Poses_D2_Positions_S1.tgz
 2 | d4ae2827d0227dea8c88e6a082763d0a  archives/Poses_D3_Positions_S1.tgz
 3 | 4c844740ba583517c74b6c496c190761  archives/Poses_D3_Positions_mono_S1.tgz
 4 | 3c75f06fdf3c4f3b8fb1f8f11d18a10e  archives/Poses_D3_Positions_mono_universal_S1.tgz
 5 | d517e6c0b1112427b2a39fcbd732281c  archives/Videos_S1.tgz
 6 | 7ac8c4830468a1ed3464076ee9603632  archives/Poses_D2_Positions_S5.tgz
 7 | 7a0bd0f458612decc9de0a04e0b589cc  archives/Poses_D3_Positions_S5.tgz
 8 | 4e14165ed00b7aff1111a81c1ca4b7b3  archives/Poses_D3_Positions_mono_S5.tgz
 9 | a0c821f5501fcc450e28c38e5ebd0c17  archives/Poses_D3_Positions_mono_universal_S5.tgz
10 | 02ef041813c3a37b137f86df24419e5a  archives/Videos_S5.tgz
11 | 5f9706d5259f648cca802c069dec9681  archives/Poses_D2_Positions_S6.tgz
12 | 0970a30cbc947c3c0454c834db9b84e0  archives/Poses_D3_Positions_S6.tgz
13 | 9681696b33a0d487493330e825b408d6  archives/Poses_D3_Positions_mono_S6.tgz
14 | dce0fb2f44b487b2bd36f603d1ff894a  archives/Poses_D3_Positions_mono_universal_S6.tgz
15 | a4b8690e5320c5854f99f60bf31cbabc  archives/Videos_S6.tgz
16 | 543c4053c962db54d1d7361d4accffb4  archives/Poses_D2_Positions_S7.tgz
17 | abeea2a40650517cefb7cd911caa6472  archives/Poses_D3_Positions_S7.tgz
18 | 807109c1a304ce67c6f0cc06a94846fc  archives/Poses_D3_Positions_mono_S7.tgz
19 | 848717a95a96336ec7707b20ec463965  archives/Poses_D3_Positions_mono_universal_S7.tgz
20 | 79caf93c6ec31b1c14cd1d31d5f292e0  archives/Videos_S7.tgz
21 | e9de190d782452edc954ac191907adcf  archives/Poses_D2_Positions_S8.tgz
22 | 5695796fe478579ffe9b9ff09203dd27  archives/Poses_D3_Positions_S8.tgz
23 | da8b6c948e7dcd280061cd4d99d7352f  archives/Poses_D3_Positions_mono_S8.tgz
24 | 8f5182924c29721d9c4227aa43e3d7b3  archives/Poses_D3_Positions_mono_universal_S8.tgz
25 | 18818148e68fcd80fce1efa82f98126d  archives/Videos_S8.tgz
26 | 232c2244afae96cb900908c6825d478c  archives/Poses_D2_Positions_S9.tgz
27 | fce28bb66bf9908016e2d9738e5cb2db  archives/Poses_D3_Positions_S9.tgz
28 | 0fad285a69fdcdf4958cc4c80d93abbc  archives/Poses_D3_Positions_mono_S9.tgz
29 | bbc436bc0f35bd09e272ad0ed1f188e2  archives/Poses_D3_Positions_mono_universal_S9.tgz
30 | 3e7d923d5c573ac833334a31b5f8a797  archives/Videos_S9.tgz
31 | df1fde6b5656729336f54dcd79ab6e47  archives/Poses_D2_Positions_S11.tgz
32 | 729e93d4e50c806f4a55fd1b87e2ff52  archives/Poses_D3_Positions_S11.tgz
33 | 944a8bca62a933f5d630a835868fba23  archives/Poses_D3_Positions_mono_S11.tgz
34 | c00b5b22ed1b88de5a536433e300503e  archives/Poses_D3_Positions_mono_universal_S11.tgz
35 | 13a24f30eb4e7cc505cbf80410c90ffe  archives/Videos_S11.tgz
36 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/custom_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | import numpy as np
 9 | import copy
10 | from common.skeleton import Skeleton
11 | from common.mocap_dataset import MocapDataset
12 | from common.camera import normalize_screen_coordinates, image_coordinates
13 | from common.h36m_dataset import h36m_skeleton
14 |        
15 | 
16 | custom_camera_params = {
17 |     'id': None,
18 |     'res_w': None, # Pulled from metadata
19 |     'res_h': None, # Pulled from metadata
20 |     
21 |     # Dummy camera parameters (taken from Human3.6M), only for visualization purposes
22 |     'azimuth': 70, # Only used for visualization
23 |     'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
24 |     'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
25 | }
26 | 
27 | class CustomDataset(MocapDataset):
28 |     def __init__(self, detections_path, remove_static_joints=True):
29 |         super().__init__(fps=None, skeleton=h36m_skeleton)        
30 |         
31 |         # Load serialized dataset
32 |         data = np.load(detections_path, allow_pickle=True)
33 |         resolutions = data['metadata'].item()['video_metadata']
34 |         
35 |         self._cameras = {}
36 |         self._data = {}
37 |         for video_name, res in resolutions.items():
38 |             cam = {}
39 |             cam.update(custom_camera_params)
40 |             cam['orientation'] = np.array(cam['orientation'], dtype='float32')
41 |             cam['translation'] = np.array(cam['translation'], dtype='float32')
42 |             cam['translation'] = cam['translation']/1000 # mm to meters
43 |             
44 |             cam['id'] = video_name
45 |             cam['res_w'] = res['w']
46 |             cam['res_h'] = res['h']
47 |             
48 |             self._cameras[video_name] = [cam]
49 |         
50 |             self._data[video_name] = {
51 |                 'custom': {
52 |                     'cameras': cam
53 |                 }
54 |             }
55 |                 
56 |         if remove_static_joints:
57 |             # Bring the skeleton to 17 joints instead of the original 32
58 |             self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
59 |             
60 |             # Rewire shoulders to the correct parents
61 |             self._skeleton._parents[11] = 8
62 |             self._skeleton._parents[14] = 8
63 |             
64 |     def supports_semi_supervised(self):
65 |         return False
66 |    


--------------------------------------------------------------------------------
/ContextPose_mpi/common/camera.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | def normalize_screen_coordinates(X, w, h):
 6 |     assert X.shape[-1] == 2
 7 |     return X / w * 2 - [1, h / w]
 8 | 
 9 | 
10 | def image_coordinates(X, w, h):
11 | 	assert X.shape[-1] == 2
12 | 
13 | 	# Reverse camera frame normalization
14 | 	return (X + [1, h / w]) * w / 2
15 | 
16 | def world_to_camera(X, R, t):
17 |     Rt = wrap(qinverse, R) 
18 |     return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t) 
19 | 
20 | def camera_to_world(X, R, t):
21 |     return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t
22 | 
23 | def wrap(func, *args, unsqueeze=False):
24 | 	args = list(args)
25 | 	for i, arg in enumerate(args):
26 | 	    if type(arg) == np.ndarray:
27 | 	        args[i] = torch.from_numpy(arg)
28 | 	        if unsqueeze:
29 | 	            args[i] = args[i].unsqueeze(0)
30 | 
31 | 	result = func(*args)
32 | 
33 | 	if isinstance(result, tuple):
34 | 	    result = list(result)
35 | 	    for i, res in enumerate(result):
36 | 	        if type(res) == torch.Tensor:
37 | 	            if unsqueeze:
38 | 	                res = res.squeeze(0)
39 | 	            result[i] = res.numpy()
40 | 	    return tuple(result)
41 | 	elif type(result) == torch.Tensor:
42 | 	    if unsqueeze:
43 | 	        result = result.squeeze(0)
44 | 	    return result.numpy()
45 | 	else:
46 | 	    return result
47 | 
48 | def qrot(q, v):
49 | 	assert q.shape[-1] == 4
50 | 	assert v.shape[-1] == 3
51 | 	assert q.shape[:-1] == v.shape[:-1]
52 | 
53 | 	qvec = q[..., 1:]
54 | 	uv = torch.cross(qvec, v, dim=len(q.shape) - 1)
55 | 	uuv = torch.cross(qvec, uv, dim=len(q.shape) - 1)
56 | 	return (v + 2 * (q[..., :1] * uv + uuv))
57 | 
58 | 
59 | def qinverse(q, inplace=False):
60 |     if inplace:
61 |         q[..., 1:] *= -1
62 |         return q
63 |     else:
64 |         w = q[..., :1]
65 |         xyz = q[..., 1:]
66 |         return torch.cat((w, -xyz), dim=len(q.shape) - 1)
67 | 
68 | 
69 | def get_uvd2xyz(uvd, gt_3D, cam):
70 |     N, T, V,_ = uvd.size()
71 | 
72 |     dec_out_all = uvd.view(-1, T, V, 3).clone()  
73 |     root = gt_3D[:, :, 0, :].unsqueeze(-2).repeat(1, 1, V, 1).clone()
74 |     enc_in_all = uvd[:, :, :, :2].view(-1, T, V, 2).clone() 
75 | 
76 |     cam_f_all = cam[..., :2].view(-1,1,1,2).repeat(1,T,V,1)
77 |     cam_c_all = cam[..., 2:4].view(-1,1,1,2).repeat(1,T,V,1)
78 | 
79 |     z_global = dec_out_all[:, :, :, 2]
80 |     z_global[:, :, 0] = root[:, :, 0, 2]
81 |     z_global[:, :, 1:] = dec_out_all[:, :, 1:, 2] + root[:, :, 1:, 2]
82 |     z_global = z_global.unsqueeze(-1) 
83 |     
84 |     uv = enc_in_all - cam_c_all 
85 |     xy = uv * z_global.repeat(1, 1, 1, 2) / cam_f_all  
86 |     xyz_global = torch.cat((xy, z_global), -1)
87 |     xyz_offset = (xyz_global - xyz_global[:, :, 0, :].unsqueeze(-2).repeat(1, 1, V, 1))
88 | 
89 |     return xyz_offset
90 | 
91 | 
92 |         


--------------------------------------------------------------------------------
/ContextPose_mpi/common/skeleton.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Skeleton:
 6 |     def __init__(self, parents, joints_left, joints_right):
 7 |         assert len(joints_left) == len(joints_right)
 8 | 
 9 |         self._parents = np.array(parents)
10 |         self._joints_left = joints_left
11 |         self._joints_right = joints_right
12 |         self._compute_metadata()
13 | 
14 |     def num_joints(self):
15 |         return len(self._parents)
16 | 
17 |     def parents(self):
18 |         return self._parents
19 | 
20 |     def has_children(self):
21 |         return self._has_children
22 | 
23 |     def children(self):
24 |         return self._children
25 | 
26 |     def remove_joints(self, joints_to_remove):
27 | 
28 |         valid_joints = []
29 |         for joint in range(len(self._parents)):
30 |             if joint not in joints_to_remove:
31 |                 valid_joints.append(joint)
32 | 
33 |         for i in range(len(self._parents)):
34 |             while self._parents[i] in joints_to_remove:
35 |                 self._parents[i] = self._parents[self._parents[i]]
36 | 
37 |         index_offsets = np.zeros(len(self._parents), dtype=int)
38 |         new_parents = []
39 |         for i, parent in enumerate(self._parents):
40 |             if i not in joints_to_remove:
41 |                 new_parents.append(parent - index_offsets[parent])
42 |             else:
43 |                 index_offsets[i:] += 1
44 |         self._parents = np.array(new_parents)
45 | 
46 |         if self._joints_left is not None:
47 |             new_joints_left = []
48 |             for joint in self._joints_left:
49 |                 if joint in valid_joints:
50 |                     new_joints_left.append(joint - index_offsets[joint])
51 |             self._joints_left = new_joints_left
52 |         if self._joints_right is not None:
53 |             new_joints_right = []
54 |             for joint in self._joints_right:
55 |                 if joint in valid_joints:
56 |                     new_joints_right.append(joint - index_offsets[joint])
57 |             self._joints_right = new_joints_right
58 | 
59 |         self._compute_metadata()
60 | 
61 |         return valid_joints
62 | 
63 |     def joints_left(self):
64 |         return self._joints_left
65 | 
66 |     def joints_right(self):
67 |         return self._joints_right
68 | 
69 |     def _compute_metadata(self):
70 |         self._has_children = np.zeros(len(self._parents)).astype(bool)
71 |         for i, parent in enumerate(self._parents):
72 |             if parent != -1:
73 |                 self._has_children[parent] = True
74 | 
75 |         self._children = []
76 |         for i, parent in enumerate(self._parents):
77 |             self._children.append([])
78 |         for i, parent in enumerate(self._parents):
79 |             if parent != -1:
80 |                 self._children[parent].append(i)
81 | 
82 | 
83 |                 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | import torch
 9 | import numpy as np
10 | import hashlib
11 | 
12 | def wrap(func, *args, unsqueeze=False):
13 |     """
14 |     Wrap a torch function so it can be called with NumPy arrays.
15 |     Input and return types are seamlessly converted.
16 |     """
17 |     
18 |     # Convert input types where applicable
19 |     args = list(args)
20 |     for i, arg in enumerate(args):
21 |         if type(arg) == np.ndarray:
22 |             args[i] = torch.from_numpy(arg)
23 |             if unsqueeze:
24 |                 args[i] = args[i].unsqueeze(0)
25 |         
26 |     result = func(*args)
27 |     
28 |     # Convert output types where applicable
29 |     if isinstance(result, tuple):
30 |         result = list(result)
31 |         for i, res in enumerate(result):
32 |             if type(res) == torch.Tensor:
33 |                 if unsqueeze:
34 |                     res = res.squeeze(0)
35 |                 result[i] = res.numpy()
36 |         return tuple(result)
37 |     elif type(result) == torch.Tensor:
38 |         if unsqueeze:
39 |             result = result.squeeze(0)
40 |         return result.numpy()
41 |     else:
42 |         return result
43 |     
44 | def deterministic_random(min_value, max_value, data):
45 |     digest = hashlib.sha256(data.encode()).digest()
46 |     raw_value = int.from_bytes(digest[:4], byteorder='little', signed=False)
47 |     return int(raw_value / (2**32 - 1) * (max_value - min_value)) + min_value
48 | 
49 | def load_pretrained_weights(model, checkpoint):
50 |     """Load pretrianed weights to model
51 |     Incompatible layers (unmatched in name or size) will be ignored
52 |     Args:
53 |     - model (nn.Module): network model, which must not be nn.DataParallel
54 |     - weight_path (str): path to pretrained weights
55 |     """
56 |     import collections
57 |     if 'state_dict' in checkpoint:
58 |         state_dict = checkpoint['state_dict']
59 |     else:
60 |         state_dict = checkpoint
61 |     model_dict = model.state_dict()
62 |     new_state_dict = collections.OrderedDict()
63 |     matched_layers, discarded_layers = [], []
64 |     for k, v in state_dict.items():
65 |         # If the pretrained state_dict was saved as nn.DataParallel,
66 |         # keys would contain "module.", which should be ignored.
67 |         if k.startswith('module.'):
68 |             k = k[7:]
69 |         if k in model_dict and model_dict[k].size() == v.size():
70 |             new_state_dict[k] = v
71 |             matched_layers.append(k)
72 |         else:
73 |             discarded_layers.append(k)
74 |     # new_state_dict.requires_grad = False
75 |     model_dict.update(new_state_dict)
76 | 
77 |     model.load_state_dict(model_dict)
78 |     print('load_weight', len(matched_layers))
79 |     # model.state_dict(model_dict).requires_grad = False
80 |     return model
81 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/camera.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | from common.utils import wrap
12 | from common.quaternion import qrot, qinverse
13 | 
14 | def normalize_screen_coordinates(X, w, h): 
15 |     assert X.shape[-1] == 2
16 |     
17 |     # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
18 |     return X/w*2 - [1, h/w]
19 | 
20 |     
21 | def image_coordinates(X, w, h):
22 |     assert X.shape[-1] == 2
23 |     
24 |     # Reverse camera frame normalization
25 |     return (X + [1, h/w])*w/2
26 |     
27 | 
28 | def world_to_camera(X, R, t):
29 |     Rt = wrap(qinverse, R) # Invert rotation
30 |     return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t) # Rotate and translate
31 | 
32 |     
33 | def camera_to_world(X, R, t):
34 |     return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t
35 | 
36 |     
37 | def project_to_2d(X, camera_params):
38 |     """
39 |     Project 3D points to 2D using the Human3.6M camera projection function.
40 |     This is a differentiable and batched reimplementation of the original MATLAB script.
41 |     
42 |     Arguments:
43 |     X -- 3D points in *camera space* to transform (N, *, 3)
44 |     camera_params -- intrinsic parameteres (N, 2+2+3+2=9)
45 |     """
46 |     assert X.shape[-1] == 3
47 |     assert len(camera_params.shape) == 2
48 |     assert camera_params.shape[-1] == 9
49 |     assert X.shape[0] == camera_params.shape[0]
50 |     
51 |     while len(camera_params.shape) < len(X.shape):
52 |         camera_params = camera_params.unsqueeze(1)
53 |         
54 |     f = camera_params[..., :2]
55 |     c = camera_params[..., 2:4]
56 |     k = camera_params[..., 4:7]
57 |     p = camera_params[..., 7:]
58 |     
59 |     XX = torch.clamp(X[..., :2] / X[..., 2:], min=-1, max=1)
60 |     r2 = torch.sum(XX[..., :2]**2, dim=len(XX.shape)-1, keepdim=True)
61 | 
62 |     radial = 1 + torch.sum(k * torch.cat((r2, r2**2, r2**3), dim=len(r2.shape)-1), dim=len(r2.shape)-1, keepdim=True)
63 |     tan = torch.sum(p*XX, dim=len(XX.shape)-1, keepdim=True)
64 | 
65 |     XXX = XX*(radial + tan) + p*r2
66 |     
67 |     return f*XXX + c
68 | 
69 | def project_to_2d_linear(X, camera_params):
70 |     """
71 |     Project 3D points to 2D using only linear parameters (focal length and principal point).
72 |     
73 |     Arguments:
74 |     X -- 3D points in *camera space* to transform (N, *, 3)
75 |     camera_params -- intrinsic parameteres (N, 2+2+3+2=9)
76 |     """
77 |     assert X.shape[-1] == 3
78 |     assert len(camera_params.shape) == 2
79 |     assert camera_params.shape[-1] == 9
80 |     assert X.shape[0] == camera_params.shape[0]
81 |     
82 |     while len(camera_params.shape) < len(X.shape):
83 |         camera_params = camera_params.unsqueeze(1)
84 |         
85 |     f = camera_params[..., :2]
86 |     c = camera_params[..., 2:4]
87 |     
88 |     XX = torch.clamp(X[..., :2] / X[..., 2:], min=-1, max=1)
89 |     
90 |     return f*XX + c


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_test_predictions_py.m:
--------------------------------------------------------------------------------
 1 | 
 2 | test_subject_id = [1,2,3,4,5,6];
 3 | test_data_path = '../';  %Change to wherever you put this data.
 4 | data_base_path = [test_data_path filesep 'TS'];
 5 | % addpath('<YOUR_CAFFE_PATH_HERE>')
 6 | % addpath('../util');
 7 | 
 8 | [~,o1,o2,relevant_labels] = mpii_get_joints('relevant');  
 9 | 
10 | % net_base = 'PathToYourAwesomeMethod';
11 | % snapshot_base = net_base;
12 | 
13 | %% Load the nets, or plug in your method and do predictions
14 | net_path = {};
15 | % Usage:  net_path = [net_path; {<net_folder_name>, <net_snapshot_iteration>}];
16 | % Example: net_path = [net_path; {'MyAwesome3DNet', 135000}];
17 | % caffe.set_mode_gpu()
18 | % caffe.set_device(0)
19 | 
20 | % for n = 1:size(net_path,1)
21 | %n = 1; 
22 | %NN or whatever fancy method you are using
23 | % caffe.reset_all();
24 | % net = caffe.Net(fullfile(net_base, net_path{n,1}, 'deploy_net.prototxt'), fullfile(snapshot_base, net_path{n,1}, sprintf('snapshot_iter_%d.caffemodel', net_path{n,2})), 'test');
25 | % fprintf('Doing %s \n', net_path{n,1});
26 | %%
27 | load('..\..\checkpoint\inference_data.mat')
28 | sequencewise_per_joint_error = cell(6,1);
29 | sequencewise_activity_labels = cell(6,1);
30 | for i = 1:length(test_subject_id)
31 |    dat = load([data_base_path int2str(test_subject_id(i)) filesep 'annot_data.mat']);
32 |    num_test_points = sum(dat.valid_frame(:));
33 |    per_joint_error = zeros(17,1,num_test_points);
34 |    pje_idx = 1;
35 |    sequencewise_activity_labels{i} = dat.activity_annotation(dat.valid_frame == 1);
36 | %%
37 |     seq_name = ['TS',int2str(test_subject_id(i))];
38 |     pred_seq = eval(seq_name);
39 |     for j = 1:length(dat.valid_frame)
40 |         if(dat.valid_frame(j))
41 |             fprintf('Image %d of %d for Test ID %d\n',j, length(dat.annot2), test_subject_id(i));
42 |             error = zeros(17,1);
43 | 			
44 |             %img = imread([data_base_path int2str(test_subject_id(i)) filesep 'imageSequence' filesep sprintf('img_%06d.jpg',j)]);
45 | 			%The GT has 17 joints, and the order and the annotation of the joints can be observed through the 'relevant_labels' variable
46 |             P = dat.univ_annot3(:,:,:,j)-repmat(dat.univ_annot3(:,15,:,j),1,17);
47 | 
48 |             %<predict something here>			
49 |             pred_p = pred_seq(:,:,:,pje_idx); %Replace with the actual prediction formatted as 3x17; 
50 |             error_p = (pred_p - P).^2;
51 |             error_p = sqrt(sum(error_p, 1));
52 |             error(:,1) = error(:,1) + error_p(:);
53 | 
54 | 
55 |             per_joint_error(:,:,pje_idx) = error;
56 |             pje_idx = pje_idx +1;
57 |         end
58 |     end
59 |     sequencewise_per_joint_error{i} = per_joint_error;
60 |     
61 | end
62 | 
63 | save([test_data_path filesep 'mpii_3dhp_prediction.mat'], 'sequencewise_per_joint_error', 'sequencewise_activity_labels');
64 | [seq_table, activity_table] = mpii_evaluate_errors(sequencewise_per_joint_error, sequencewise_activity_labels);
65 | 
66 | out_file = [test_data_path filesep 'mpii_3dhp_evaluation'];
67 | writetable(cell2table(seq_table), [out_file '_sequencewise.csv']);
68 | writetable(cell2table(activity_table), [out_file '_activitywise.csv']);
69 | 
70 | % end
71 |   
72 | % 
73 | % 
74 | % 
75 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/skeleton.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | import numpy as np
 9 | 
10 | class Skeleton:
11 |     def __init__(self, parents, joints_left, joints_right):
12 |         assert len(joints_left) == len(joints_right)
13 |         
14 |         self._parents = np.array(parents)
15 |         self._joints_left = joints_left
16 |         self._joints_right = joints_right
17 |         self._compute_metadata()
18 |     
19 |     def num_joints(self):
20 |         return len(self._parents)
21 |     
22 |     def parents(self):
23 |         return self._parents
24 |     
25 |     def has_children(self):
26 |         return self._has_children
27 |     
28 |     def children(self):
29 |         return self._children
30 |     
31 |     def remove_joints(self, joints_to_remove):
32 |         """
33 |         Remove the joints specified in 'joints_to_remove'.
34 |         """
35 |         valid_joints = []
36 |         for joint in range(len(self._parents)):
37 |             if joint not in joints_to_remove:
38 |                 valid_joints.append(joint)
39 | 
40 |         for i in range(len(self._parents)):
41 |             while self._parents[i] in joints_to_remove:
42 |                 self._parents[i] = self._parents[self._parents[i]]
43 |                 
44 |         index_offsets = np.zeros(len(self._parents), dtype=int)
45 |         new_parents = []
46 |         for i, parent in enumerate(self._parents):
47 |             if i not in joints_to_remove:
48 |                 new_parents.append(parent - index_offsets[parent])
49 |             else:
50 |                 index_offsets[i:] += 1
51 |         self._parents = np.array(new_parents)
52 |         
53 |         
54 |         if self._joints_left is not None:
55 |             new_joints_left = []
56 |             for joint in self._joints_left:
57 |                 if joint in valid_joints:
58 |                     new_joints_left.append(joint - index_offsets[joint])
59 |             self._joints_left = new_joints_left
60 |         if self._joints_right is not None:
61 |             new_joints_right = []
62 |             for joint in self._joints_right:
63 |                 if joint in valid_joints:
64 |                     new_joints_right.append(joint - index_offsets[joint])
65 |             self._joints_right = new_joints_right
66 | 
67 |         self._compute_metadata()
68 |         
69 |         return valid_joints
70 |     
71 |     def joints_left(self):
72 |         return self._joints_left
73 |     
74 |     def joints_right(self):
75 |         return self._joints_right
76 |         
77 |     def _compute_metadata(self):
78 |         self._has_children = np.zeros(len(self._parents)).astype(bool)
79 |         for i, parent in enumerate(self._parents):
80 |             if parent != -1:
81 |                 self._has_children[parent] = True
82 | 
83 |         self._children = []
84 |         for i, parent in enumerate(self._parents):
85 |             self._children.append([])
86 |         for i, parent in enumerate(self._parents):
87 |             if parent != -1:
88 |                 self._children[parent].append(i)


--------------------------------------------------------------------------------
/ContextPose/mvn/models/networks/refineNet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class Bottleneck(nn.Module):
 4 |     expansion = 4
 5 | 
 6 |     def __init__(self, inplanes, planes, stride=1):
 7 |         super(Bottleneck, self).__init__()
 8 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 9 |         self.bn1 = nn.BatchNorm2d(planes)
10 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
11 |                                padding=1, bias=False)
12 |         self.bn2 = nn.BatchNorm2d(planes)
13 |         self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
14 |         self.bn3 = nn.BatchNorm2d(planes * 2)
15 |         self.relu = nn.ReLU(inplace=True)
16 | 
17 |         self.downsample = nn.Sequential(
18 |                 nn.Conv2d(inplanes, planes * 2,
19 |                           kernel_size=1, stride=stride, bias=False),
20 |                 nn.BatchNorm2d(planes * 2),
21 |             )
22 |  
23 |         self.stride = stride
24 | 
25 |     def forward(self, x):
26 |         residual = x
27 | 
28 |         out = self.conv1(x)
29 |         out = self.bn1(out)
30 |         out = self.relu(out)
31 | 
32 |         out = self.conv2(out)
33 |         out = self.bn2(out)
34 |         out = self.relu(out)
35 | 
36 |         out = self.conv3(out)
37 |         out = self.bn3(out)
38 | 
39 |         if self.downsample is not None:
40 |             residual = self.downsample(x)
41 | 
42 |         out += residual
43 |         out = self.relu(out)
44 | 
45 |         return out
46 | 
47 | class refineNet(nn.Module):
48 |     def __init__(self, lateral_channel, out_shape, num_class):
49 |         super(refineNet, self).__init__()
50 |         cascade = []
51 |         num_cascade = 4
52 |         for i in range(num_cascade):
53 |             cascade.append(self._make_layer(lateral_channel, num_cascade-i-1, out_shape))
54 |         self.cascade = nn.ModuleList(cascade)
55 |         self.final_predict = self._predict(4*lateral_channel, num_class)
56 | 
57 |     def _make_layer(self, input_channel, num, output_shape):
58 |         layers = []
59 |         for i in range(num):
60 |             layers.append(Bottleneck(input_channel, 128))
61 |         layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True))
62 |         return nn.Sequential(*layers)
63 | 
64 |     def _predict(self, input_channel, num_class):
65 |         layers = []
66 |         layers.append(Bottleneck(input_channel, 128))
67 |         layers.append(nn.Conv2d(256, num_class,
68 |             kernel_size=3, stride=1, padding=1, bias=False))
69 |         layers.append(nn.BatchNorm2d(num_class))
70 |         return nn.Sequential(*layers)
71 | 
72 |     def forward(self, x):
73 |         refine_fms = []
74 |         for i in range(4):
75 |             refine_fms.append(self.cascade[i](x[i]))
76 |         # for i in range(4):
77 |         #     print(refine_fms[i].shape)
78 |         # out = torch.cat(refine_fms, dim=1)
79 |         # print(out.shape)
80 |         # out = self.final_predict(out)
81 |         # print(out.shape)
82 |         # torch.Size([256, 256, 64, 48])
83 |         # torch.Size([256, 256, 64, 48])
84 |         # torch.Size([256, 256, 64, 48])
85 |         # torch.Size([256, 256, 64, 48])
86 |         # torch.Size([256, 1024, 64, 48])
87 |         # torch.Size([256, 17, 64, 48])
88 |         return refine_fms
89 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/camera_calibration/ts5-6cameras.calib:
--------------------------------------------------------------------------------
 1 | tc camera calibration v0.3
 2 | camera	0	GOPR0046_intrinsics.MP4
 3 | 	colorCorrection
 4 | 		red   0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
 5 | 		green 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
 6 | 		blue  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
 7 | 	frame	0
 8 | 		sensorSize		 10.000000000	  5.625000000	# in mm
 9 | 		focalLength		  8.770747185					# in mm
10 | 		pixelAspect		  0.993236423					# y / x
11 | 		centerOffset	 -0.104908645	  0.104899704	# in mm (positive values move right and down)
12 | 		distortionModel	OpenCV
13 | 		distortion		 -0.276859611	  0.131125256	 -0.000360494	 -0.001149441	 -0.049318332	
14 | 			origin		   -2104.3074	    1038.6707	   -4596.6367
15 | 			up			  0.025272345	  0.995038509	  0.096227370
16 | 			right		 -0.939647257	 -0.009210289	  0.342020929
17 | 
18 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_get_joints.m:
--------------------------------------------------------------------------------
 1 | function [joint_idx, joint_parents_o1, joint_parents_o2, joint_names] = mpii_get_joints(joint_set_name)
 2 | 
 3 |     original_joint_idx = [10, 13, 16, 19, 22, 25, 28, 29, 31, 36, 40, 42, 43, 45, 50, 54, 56, 57,  63, 64, 69, 70, 71, 77, 78, 83, 84, 85];              %                            
 4 |         
 5 |     original_joint_names = {'spine3', 'spine4', 'spine2', 'spine1', 'spine', ...     %5       
 6 |                         'neck', 'head', 'head_top', 'left_shoulder', 'left_arm', 'left_forearm', ... %11
 7 |                        'left_hand', 'left_hand_ee',  'right_shoulder', 'right_arm', 'right_forearm', 'right_hand', ... %17
 8 |                        'right_hand_ee', 'left_leg_up', 'left_leg', 'left_foot', 'left_toe', 'left_ee', ...        %23   
 9 |                        'right_leg_up' , 'right_leg', 'right_foot', 'right_toe', 'right_ee'};  
10 |                    
11 |                    
12 |     all_joint_names = {'spine3', 'spine4', 'spine2', 'spine', 'pelvis', ...     %5       
13 |         'neck', 'head', 'head_top', 'left_clavicle', 'left_shoulder', 'left_elbow', ... %11
14 |        'left_wrist', 'left_hand',  'right_clavicle', 'right_shoulder', 'right_elbow', 'right_wrist', ... %17
15 |        'right_hand', 'left_hip', 'left_knee', 'left_ankle', 'left_foot', 'left_toe', ...        %23   
16 |        'right_hip' , 'right_knee', 'right_ankle', 'right_foot', 'right_toe'}; 
17 |    
18 |    
19 |   %The O1 and O2 indices are relaive to the joint_idx, regardless of the joint set 
20 |                    
21 | switch joint_set_name
22 |     %For internal use only!!!
23 |     case 'original'  %%These give the original indices from the dumped out mddd file, the remaining joint sets are wrt the 'all' labels
24 |         joint_idx = original_joint_idx;              %                            
25 |         joint_parents_o1 = [3, 1, 4, 5, 5, 2, 6, 7, 6, 9, 10, 11, 12, 6, 14, 15, 16, 17, 5, 19, 20, 21, 22, 5, 24, 25, 26, 27 ];
26 |         joint_parents_o2 = [4, 3, 5, 5, 5, 1, 2, 6, 2, 6, 9, 10, 11, 2, 6, 14, 15, 16, 4, 5, 19, 20, 21, 4, 5, 24, 25, 26];
27 |         joint_names = original_joint_names;  
28 |     %Use joint sets from here    
29 |     case 'all'
30 |         joint_idx = 1:28;              %These index into the joints extracted in the original set
31 |         joint_parents_o1 = [3, 1, 4, 5, 5, 2, 6, 7, 6, 9, 10, 11, 12, 6, 14, 15, 16, 17, 5, 19, 20, 21, 22, 5, 24, 25, 26, 27 ];
32 |         joint_parents_o2 = [4, 3, 5, 5, 5, 1, 2, 6, 2, 6, 9, 10, 11, 2, 6, 14, 15, 16, 4, 5, 19, 20, 21, 4, 5, 24, 25, 26];
33 |         joint_names = all_joint_names;
34 |         
35 |     case 'cpm'  %CPM Joints in CPM Order
36 |         joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5];
37 |         joint_parents_o1 = [ 2, 15, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15];
38 |         joint_parents_o2 = [15, 15, 15, 2, 3, 15, 2, 6, 2, 15, 9, 2, 15, 12, 15];  
39 |         joint_names = all_joint_names(joint_idx);
40 |         
41 |     case 'relevant' %Human3.6m joints in CPM order
42 |         joint_idx = [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7];
43 |         joint_parents_o1 = [ 2, 16, 2, 3, 4, 2, 6, 7, 15, 9, 10, 15, 12, 13, 15, 15, 2];
44 |         joint_parents_o2 = [ 16, 15, 16, 2, 3, 16, 2, 6, 16, 15, 9, 16, 15, 12, 15, 15, 16];
45 |         joint_names = all_joint_names(joint_idx);
46 |              
47 |     otherwise
48 | end
49 | end
50 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/networks/globalNet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import math
 4 | 
 5 | class globalNet(nn.Module):
 6 |     def __init__(self, channel_settings, output_shape, num_class):
 7 |         super(globalNet, self).__init__()
 8 |         self.channel_settings = channel_settings
 9 |         laterals, upsamples, predict = [], [], []
10 |         for i in range(len(channel_settings)):
11 |             laterals.append(self._lateral(channel_settings[i]))
12 |             predict.append(self._predict(output_shape, num_class))
13 |             if i != len(channel_settings) - 1:
14 |                 upsamples.append(self._upsample())
15 |         self.laterals = nn.ModuleList(laterals)
16 |         self.upsamples = nn.ModuleList(upsamples)
17 |         self.predict = nn.ModuleList(predict)
18 | 
19 |         for m in self.modules():
20 |             if isinstance(m, nn.Conv2d):
21 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
22 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
23 |                 if m.bias is not None:
24 |                     m.bias.data.zero_()
25 |             elif isinstance(m, nn.BatchNorm2d):
26 |                 m.weight.data.fill_(1)
27 |                 m.bias.data.zero_()
28 | 
29 |     def _lateral(self, input_size):
30 |         layers = []
31 |         layers.append(nn.Conv2d(input_size, 256,
32 |             kernel_size=1, stride=1, bias=False))
33 |         layers.append(nn.BatchNorm2d(256))
34 |         layers.append(nn.ReLU(inplace=True))
35 | 
36 |         return nn.Sequential(*layers)
37 | 
38 |     def _upsample(self):
39 |         layers = []
40 |         layers.append(torch.nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))
41 |         layers.append(torch.nn.Conv2d(256, 256,
42 |             kernel_size=1, stride=1, bias=False))
43 |         layers.append(nn.BatchNorm2d(256))
44 | 
45 |         return nn.Sequential(*layers)
46 | 
47 |     def _predict(self, output_shape, num_class):
48 |         layers = []
49 |         layers.append(nn.Conv2d(256, 256,
50 |             kernel_size=1, stride=1, bias=False))
51 |         layers.append(nn.BatchNorm2d(256))
52 |         layers.append(nn.ReLU(inplace=True))
53 | 
54 |         layers.append(nn.Conv2d(256, num_class,
55 |             kernel_size=3, stride=1, padding=1, bias=False))
56 |         layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True))
57 |         layers.append(nn.BatchNorm2d(num_class))
58 | 
59 |         return nn.Sequential(*layers)
60 | 
61 |     def forward(self, x):
62 |         global_fms, global_outs = [], []
63 |         for i in range(len(self.channel_settings)):
64 |             if i == 0:
65 |                 feature = self.laterals[i](x[i])
66 |             else:
67 |                 feature = self.laterals[i](x[i]) + up
68 |             global_fms.append(feature)
69 |             if i != len(self.channel_settings) - 1:
70 |                 up = self.upsamples[i](feature)
71 |             feature = self.predict[i](feature)
72 |             # global_outs.append(feature)
73 | 
74 |             # 0 torch.Size([512, 256, 8, 6])
75 |             # 0 torch.Size([512, 17, 64, 48])
76 |             # 1 torch.Size([512, 256, 16, 12])
77 |             # 1 torch.Size([512, 17, 64, 48])
78 |             # 2 torch.Size([512, 256, 32, 24])
79 |             # 2 torch.Size([512, 17, 64, 48])
80 |             # 3 torch.Size([512, 256, 64, 48])
81 |             # 3 torch.Size([512, 17, 64, 48])
82 | 
83 |         return global_fms # , global_outs
84 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_evaluate_errors.m:
--------------------------------------------------------------------------------
 1 | function [sequencewise_table, activitywise_table] = mpii_evaluate_errors(sequencewise_error, sequencewise_activity)
 2 | 
 3 | joint_groups = mpii_get_pck_auc_joint_groups();
 4 | [~,~,~,joint_names] = mpii_get_joints('relevant');
 5 | all_errors = [];
 6 | all_activities = [];
 7 | sequencewise_pck = {};
 8 | sequencewise_auc = {};
 9 | nj = length(joint_names);
10 | sequencewise_mpjpe = cell(length(sequencewise_error)+1,nj+2);
11 | sequencewise_mpjpe(1,2:(nj+1)) = joint_names;
12 | sequencewise_mpjpe{1,(nj+2)} = 'Average';
13 |  %Generate MPJPE and PCK/AUC By sequence first
14 |  %error_dat = {};
15 |  %delete('error_dat');
16 |  for i = 1:length(sequencewise_error)
17 |      if(isempty(all_errors))
18 |          all_errors = sequencewise_error{i}(:,1,:);
19 |      else
20 |          all_errors = cat(3,all_errors, sequencewise_error{i}(:,1,:));
21 |      end
22 |      all_activities = [all_activities; sequencewise_activity{i}(:)];
23 |          
24 |      error_dat(i) = mpii_3D_error(['TestSeq' int2str(i)], sequencewise_error{i}(:,1,:));
25 |      sequencewise_mpjpe{i+1,1}= ['TestSeq' int2str(i)];
26 |      mpjpe = mean(sequencewise_error{i}(:,1,:),3);
27 |      sequencewise_mpjpe(i+1,2:(nj+1)) = num2cell(mpjpe');
28 |      sequencewise_mpjpe{i+1,(nj+2)} = mean(mpjpe(:));
29 |  end
30 |  [pck, auc] = mpii_compute_3d_pck(error_dat, joint_groups, []);
31 |  sequencewise_pck = [sequencewise_pck; pck];
32 |  sequencewise_pck{1,1} = 'PCK';
33 |  sequencewise_auc = [sequencewise_auc; auc];
34 |  sequencewise_auc{1,1} = 'AUC';
35 | 
36 |  activitywise_pck = {};
37 | activitywise_auc = {};
38 | activitywise_mpjpe = cell(7+2,nj+2);
39 | activitywise_mpjpe(1,2:(nj+1)) = joint_names;
40 | activitywise_mpjpe{1,(nj+2)} = 'Average';
41 |  %Generate MPJPE and PCK/AUC By activity
42 |  %error_dat = {};
43 |  clear('error_dat');
44 |  for i = 1:7
45 |      error_dat(i) = mpii_3D_error(mpii_get_activity_name(i), all_errors(:,:,all_activities == i));
46 |      activitywise_mpjpe{i+1,1} = mpii_get_activity_name(i);
47 |      mpjpe = mean(all_errors(:,:,all_activities == i),3);
48 |      activitywise_mpjpe(i+1,2:(nj+1)) = num2cell(mpjpe');
49 |      activitywise_mpjpe{i+1,(nj+2)} = mean(mpjpe(:));
50 |  end
51 |  overall_mpjpe  = mean(all_errors,3);
52 |  activitywise_mpjpe{end,1} = 'All';
53 |  activitywise_mpjpe(end,2:(nj+1)) = num2cell(overall_mpjpe');
54 |  activitywise_mpjpe{end,(nj+2)} = mean(overall_mpjpe(:));
55 | [pck, auc] = mpii_compute_3d_pck(error_dat, joint_groups, []);
56 |  activitywise_pck = [activitywise_pck; pck];
57 |  activitywise_pck{1,1} = 'PCK';
58 |  activitywise_auc = [activitywise_auc; auc];
59 |  activitywise_auc{1,1} = 'AUC';
60 |  clear('error_dat');
61 |  error_dat(1) = mpii_3D_error('All', all_errors);
62 | [pck, auc] = mpii_compute_3d_pck(error_dat, joint_groups, []);
63 | activitywise_pck = [activitywise_pck; pck(2:end,:)];
64 | activitywise_auc = [activitywise_auc; auc(2:end,:)];
65 |      
66 | sequencewise_table = sequencewise_mpjpe;
67 | sequencewise_table(size(sequencewise_table,1)+1:size(sequencewise_table,1)+size(sequencewise_pck,1),1:size(sequencewise_pck,2)) = sequencewise_pck;
68 | sequencewise_table(size(sequencewise_table,1)+1:size(sequencewise_table,1)+size(sequencewise_auc,1),1:size(sequencewise_auc,2)) = sequencewise_auc;
69 | activitywise_table = activitywise_mpjpe;
70 | activitywise_table(size(activitywise_table,1)+1:size(activitywise_table,1)+size(activitywise_pck,1),1:size(activitywise_pck,2)) = activitywise_pck;
71 | activitywise_table(size(activitywise_table,1)+1:size(activitywise_table,1)+size(activitywise_auc,1),1:size(activitywise_auc,2)) = activitywise_auc;
72 | 
73 | 
74 |      
75 | end


--------------------------------------------------------------------------------
/H36M-Toolbox/download_all.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | from subprocess import call
  4 | from os import path, makedirs
  5 | import hashlib
  6 | from tqdm import tqdm
  7 | import configparser
  8 | import requests
  9 | 
 10 | 
 11 | BASE_URL = 'http://vision.imar.ro/human3.6m/filebrowser.php'
 12 | 
 13 | subjects = [
 14 |     ('S1', 1),
 15 |     ('S5', 6),
 16 |     ('S6', 7),
 17 |     ('S7', 2),
 18 |     ('S8', 3),
 19 |     ('S9', 4),
 20 |     ('S11', 5),
 21 | ]
 22 | 
 23 | 
 24 | def md5(filename):
 25 |     hash_md5 = hashlib.md5()
 26 |     with open(filename, 'rb') as f:
 27 |         for chunk in iter(lambda: f.read(4096), b''):
 28 |             hash_md5.update(chunk)
 29 |     return hash_md5.hexdigest()
 30 | 
 31 | 
 32 | def download_file(url, dest_file, phpsessid):
 33 |     call(['axel',
 34 |           '-a',
 35 |           '-n', '24',
 36 |           '-H', 'COOKIE: PHPSESSID=' + phpsessid,
 37 |           '-o', dest_file,
 38 |           url])
 39 | 
 40 | 
 41 | def get_phpsessid():
 42 |     config = configparser.ConfigParser()
 43 |     config.read('config.ini')
 44 |     try:
 45 |         phpsessid = config['General']['PHPSESSID']
 46 |     except (KeyError, configparser.NoSectionError):
 47 |         print('Could not read PHPSESSID from `config.ini`.')
 48 |         phpsessid = input('Enter PHPSESSID: ')
 49 |     return phpsessid
 50 | 
 51 | 
 52 | def verify_phpsessid(phpsessid):
 53 |     requests.packages.urllib3.disable_warnings()
 54 |     test_url = 'http://vision.imar.ro/human3.6m/filebrowser.php'
 55 |     resp = requests.get(test_url, verify=False, cookies=dict(PHPSESSID=phpsessid))
 56 |     fail_message = 'Failed to verify your PHPSESSID. Please ensure that you ' \
 57 |                    'are currently logged in at http://vision.imar.ro/human3.6m/ ' \
 58 |                    'and that you have copied the PHPSESSID cookie correctly.'
 59 |     assert resp.url == test_url, fail_message
 60 | 
 61 | 
 62 | def download_all(phpsessid):
 63 |     checksums = {}
 64 |     with open('checksums.txt', 'r') as f:
 65 |         for line in f.read().splitlines(keepends=False):
 66 |             v, k = line.split('  ')
 67 |             checksums[k] = v
 68 | 
 69 |     files = []
 70 |     for subject_id, id in subjects:
 71 |         files += [
 72 |             ('Poses_D2_Positions_{}.tgz'.format(subject_id),
 73 |              'download=1&filepath=Poses/D2_Positions&filename=SubjectSpecific_{}.tgz'.format(id)),
 74 |             ('Poses_D3_Positions_{}.tgz'.format(subject_id),
 75 |              'download=1&filepath=Poses/D3_Positions&filename=SubjectSpecific_{}.tgz'.format(id)),
 76 |             ('Poses_D3_Positions_mono_{}.tgz'.format(subject_id),
 77 |              'download=1&filepath=Poses/D3_Positions_mono&filename=SubjectSpecific_{}.tgz'.format(id)),
 78 |             ('Poses_D3_Positions_mono_universal_{}.tgz'.format(subject_id),
 79 |              'download=1&filepath=Poses/D3_Positions_mono_universal&filename=SubjectSpecific_{}.tgz'.format(id)),
 80 |             ('Videos_{}.tgz'.format(subject_id),
 81 |              'download=1&filepath=Videos&filename=SubjectSpecific_{}.tgz'.format(id)),
 82 |         ]
 83 | 
 84 |     out_dir = 'archives'
 85 |     makedirs(out_dir, exist_ok=True)
 86 | 
 87 |     for filename, query in tqdm(files, ascii=True):
 88 |         out_file = path.join(out_dir, filename)
 89 | 
 90 |         if path.isfile(out_file):
 91 |             checksum = md5(out_file)
 92 |             if checksums.get(out_file, None) == checksum:
 93 |                 continue
 94 | 
 95 |         download_file(BASE_URL + '?' + query, out_file, phpsessid)
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     phpsessid = get_phpsessid()
100 |     verify_phpsessid(phpsessid)
101 |     download_all(phpsessid)
102 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/get_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to fetch and unzip the available data.  
 3 | # It uses config as the configuration file (duh!)
 4 | echo "Reading configuration from ./config....." >&2
 5 | source ./conf.ig
 6 | if [[ $ready_to_download -eq 0 ]]; then
 7 |   echo "Please read the documentation and edit the config file accordingly." >&2
 8 |   exit 1
 9 | fi
10 | if [ ! -d "$destination" ]; then
11 |     mkdir "$destination"
12 | fi
13 | seq_sets=('imageSequence')
14 | if [[ $download_masks -eq 1 ]]; then
15 |    seq_sets=('imageSequence' 'FGmasks' 'ChairMasks')
16 | fi
17 | source_path="http://gvv.mpi-inf.mpg.de/3dhp-dataset"
18 | echo "Download destination set to $destination " >&2
19 | 
20 | for subject in ${subjects[@]}; do 
21 |   if [ ! -d "$destination/S$subject" ]; then
22 |       mkdir "$destination/S$subject"
23 |   fi
24 |   for seq in 1 2; do 
25 |       if [ ! -d "$destination/S$subject/Seq$seq" ]; then
26 |           mkdir "$destination/S$subject/Seq$seq"
27 |       fi
28 |       echo "Downloading Subject $subject, Sequence $seq ... " >&2
29 |       wget "$source_path/S$subject/Seq$seq/annot.mat"  
30 |       mv "./annot.mat" "$destination/S$subject/Seq$seq/annot.mat"
31 |       wget "$source_path/S$subject/Seq$seq/camera.calibration"  
32 |       mv "./camera.calibration" "$destination/S$subject/Seq$seq/camera.calibration"
33 | 
34 |     #Download the videos first, and then unzip them
35 |     for im in "${seq_sets[@]}"; do 
36 |       echo "... $im ... " >&2
37 |       if [ ! -d "$destination/S$subject/Seq$seq/$im" ]; then
38 |           mkdir "$destination/S$subject/Seq$seq/$im"
39 |       fi
40 |       #One could check here if the downloaded videos are available unzipped, but whatever, download if
41 |       #zip is missing
42 |       if [ ! -f "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" ]; then
43 |           wget "$source_path/S$subject/Seq$seq/$im/vnect_cameras.zip"  
44 |           mv "./vnect_cameras.zip" "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip"
45 |       fi
46 |       if [ $download_extra_wall_cameras -ne 0 ]; then
47 |           if [ ! -f "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" ]; then
48 |               wget "$source_path/S$subject/Seq$seq/$im/other_angled_cameras.zip"  
49 |               mv "./other_angled_cameras.zip" "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip"
50 |           fi
51 |       fi
52 |       if [ $download_extra_ceiling_cameras -ne 0 ]; then
53 |           if [ ! -f "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" ]; then
54 |               wget "$source_path/S$subject/Seq$seq/$im/ceiling_cameras.zip"  
55 |               mv "./ceiling_cameras.zip" "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip"
56 |           fi
57 |       fi
58 |     done
59 |     #Unzip the videos now
60 |     for im in "${seq_sets[@]}"; do 
61 |       echo "... $im ... " >&2
62 |       if [ ! -d "$destination/S$subject/Seq$seq/$im" ]; then
63 |           mkdir "$destination/S$subject/Seq$seq/$im"
64 |       fi
65 |       if [ -f "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" ]; then
66 |           unzip -j "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip" -d "$destination/S$subject/Seq$seq/$im/"
67 |           rm "$destination/S$subject/Seq$seq/$im/vnect_cameras.zip"
68 |       fi
69 |       if [ -f "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" ]; then
70 |           unzip -j "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip" -d "$destination/S$subject/Seq$seq/$im/"
71 |           rm "$destination/S$subject/Seq$seq/$im/other_angled_cameras.zip"
72 |       fi
73 |       if [ -f "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" ]; then
74 |           unzip -j "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip" -d "$destination/S$subject/Seq$seq/$im/"
75 |           rm "$destination/S$subject/Seq$seq/$im/ceiling_cameras.zip"
76 |       fi
77 |     done
78 | 
79 |   done #Seq
80 | done #Subject
81 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/util/mpii_get_sequence_info.m:
--------------------------------------------------------------------------------
  1 | function [bg_augmentable, ub_augmentable, lb_augmentable, chair_augmentable, fps, num_frames] = mpii_get_sequence_info(subject_id, sequence)
  2 | ub_augmentable = false;
  3 | lb_augmentable = false;
  4 | bg_augmentable = false;
  5 | chair_augmentable = false;
  6 | fps = 25;
  7 | switch subject_id
  8 |     case 1
  9 |         switch sequence
 10 |             case 1       
 11 |                 bg_augmentable = true;
 12 |                 chair_augmentable = true;
 13 | 				num_frames = 6416;
 14 |             case 2
 15 |                 ub_augmentable = true; %The LB masks are bad, so skip putting textures there and in the BG
 16 |                 chair_augmentable = true;
 17 | 				num_frames = 12430;
 18 | 				fps = 50;
 19 |             otherwise
 20 |         end
 21 |     case 2
 22 |         switch sequence
 23 |             case 1
 24 |                 bg_augmentable = true;
 25 |                 chair_augmentable = true;
 26 | 				num_frames = 6502;
 27 |             case 2
 28 |                 bg_augmentable = true;
 29 |                 chair_augmentable = true;
 30 |                 ub_augmentable = true;
 31 |                 lb_augmentable = true;
 32 | 				num_frames = 6081;
 33 |         end        
 34 |     case 3
 35 |         switch sequence
 36 | 			fps = 50;
 37 |             case 1
 38 |                 bg_augmentable = true;
 39 |                 chair_augmentable = true;
 40 | 				num_frames = 12488;
 41 |             case 2
 42 |                 bg_augmentable = true;
 43 |                 chair_augmentable = true;
 44 |                 ub_augmentable = true;
 45 |                 lb_augmentable = true;
 46 | 				num_frames = 12283;
 47 |         end 
 48 |     case 4
 49 |         switch sequence
 50 |             case 1       
 51 |                 bg_augmentable = true;
 52 |                 chair_augmentable = true;
 53 | 				num_frames = 6171;
 54 |             case 2
 55 |                 chair_augmentable = true; %The LB masks are bad, so skip putting textures there and in the BG
 56 |                 ub_augmentable = true;
 57 | 				num_frames = 6675;
 58 |         end
 59 |     case 5
 60 |         switch sequence
 61 | 			fps = 50;
 62 |             case 1       
 63 |                 bg_augmentable = true;
 64 |                 chair_augmentable = true;
 65 | 				num_frames = 12820;
 66 |             case 2
 67 |                 chair_augmentable = true;
 68 |                 ub_augmentable = true;
 69 |                 bg_augmentable = true;
 70 |                 lb_augmentable = true;
 71 | 				num_frames = 12312;
 72 |             otherwise
 73 |         end
 74 |     case 6
 75 |         switch sequence
 76 |             case 1       
 77 |                 bg_augmentable = true;
 78 |                 chair_augmentable = true;
 79 | 				num_frames = 6188;
 80 |             case 2
 81 |                 ub_augmentable = true;
 82 |                 lb_augmentable = true;
 83 |                 bg_augmentable = true;
 84 |                 chair_augmentable = true;
 85 | 				num_frames = 6145;
 86 |             otherwise
 87 |         end
 88 |     case 7
 89 |         switch sequence
 90 |             case 1
 91 |                 bg_augmentable = true;
 92 |                 chair_augmentable = true;
 93 |                 ub_augmentable = true;
 94 |                 lb_augmentable = true;
 95 | 				num_frames = 6239;
 96 |             case 2
 97 |                 bg_augmentable = true;
 98 |                 chair_augmentable = true;
 99 | 				num_frames = 6320;
100 |         end        
101 |     case 8
102 |         switch sequence
103 |             case 1       
104 |                 bg_augmentable = true;
105 |                 chair_augmentable = true;
106 |                 ub_augmentable = true;
107 |                 lb_augmentable = true;
108 | 				num_frames = 6468;
109 |             case 2
110 |                 bg_augmentable = true;
111 |                 chair_augmentable = true;
112 | 				num_frames = 6054;
113 |         end        
114 | end
115 | end
116 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/3dhp_test/test_util/mpii_compute_3d_pck.m:
--------------------------------------------------------------------------------
  1 | function [pck_table, auc_table] = mpii_compute_3d_pck(error_data, joint_groups, output_base_path)
  2 | 
  3 | %Input
  4 | %error_data is a struct array of type mpii_3d_error
  5 | %The struct zcarries information about the name of the method as well as an
  6 | %nj x 1 x nf matrix with the joint errors.
  7 | %joint_groups is an ng x 2 cell, where ng is the number of groups. It
  8 | %carries the name of the group as well as the indices of the joints that
  9 | %belong to the group.
 10 | 
 11 | %If the error_data array has multiple inputs, there are additional
 12 | %comparative AUC plots output per joint in addition to the individual ones.
 13 | ng = size(joint_groups,1);
 14 | 
 15 | 
 16 | pck_curve_array = cell(length(error_data), ng+1); %Contains the PCK results per joint group, per error_data cell
 17 | pck_array = cell(length(error_data), ng+1); %Contains the AUC results per joint group
 18 | auc_array = cell(length(error_data), ng+1); %Contains the AUC results per joint group
 19 | %thresh = 0:5:200;
 20 | thresh = 0:5:150;
 21 | pck_thresh = 150;
 22 | 
 23 | 
 24 | for i = 1:length(error_data)
 25 |     joint_count = 0;
 26 |     nf = size(error_data(i).error,3);
 27 |      for j = 1:ng
 28 |          for ti =1:length(thresh)
 29 |              t = thresh(ti);
 30 |              pck_curve_array{i,j} = [pck_curve_array{i,j}, sum(sum(error_data(i).error(joint_groups{j,2},1,:) < t, 3),1) / (length(joint_groups{j,2}) *nf)];
 31 |          end
 32 |          
 33 |          joint_count = joint_count + length(joint_groups{j,2});
 34 |          if(isempty(pck_curve_array{i,ng+1}))
 35 |              pck_curve_array{i,ng+1} = pck_curve_array{i,j} * length(joint_groups{j,2});
 36 |          else
 37 |              pck_curve_array{i,ng+1} = pck_curve_array{i,ng+1} + pck_curve_array{i,j} * length(joint_groups{j,2});
 38 |          end
 39 |          auc_array{i,j} = 100* sum(pck_curve_array{i,j}(:))/ length(thresh);
 40 |          pck_array{i,j} = 100* sum(sum(error_data(i).error(joint_groups{j,2},1,:) < pck_thresh, 3),1) / (length(joint_groups{j,2}) *nf);
 41 |          if(isempty(pck_array{i,ng+1}))
 42 |              pck_array{i,ng+1} = pck_array{i,j} * length(joint_groups{j,2});
 43 |          else
 44 |              pck_array{i,ng+1} = pck_array{i,ng+1} + pck_array{i,j} * length(joint_groups{j,2});
 45 |          end
 46 |      end
 47 |      pck_array{i,ng+1} = pck_array{i,ng+1} / joint_count;
 48 |      pck_curve_array{i,ng+1} = pck_curve_array{i,ng+1} / joint_count;
 49 |      auc_array{i,ng+1} = 100* sum(pck_curve_array{i,ng+1}(:))/ length(thresh);
 50 | end
 51 |          
 52 | pck_table = cell(length(error_data)+1, ng+2);
 53 | pck_table{1,ng+2} = 'Total';
 54 | for i = 1:length(error_data)
 55 |     pck_table{1+i,1} = error_data(i).method;
 56 | end
 57 | for i = 1:ng
 58 |     pck_table{1,i+1} = joint_groups{i,1};
 59 | end
 60 | auc_table = pck_table;
 61 | auc_table(2:end,2:end) = auc_array;
 62 | pck_table(2:end,2:end) = pck_array;
 63 | 
 64 | 
 65 | if(~isempty(output_base_path))
 66 | %Generate and save plots to output_path
 67 | %First generate individual plots from each row of the pck_curve_array
 68 | colormap default;
 69 | 
 70 | for i = 1:length(error_data)
 71 |     all_plot = [];
 72 |     for j = 1:ng+1
 73 |         figure(1);
 74 |         cla;
 75 |         plot(thresh,pck_curve_array{i,j},'LineWidth',2);
 76 |         all_plot = [all_plot; pck_curve_array{i,j}];
 77 |         axis([0 150 0 1]);
 78 |         title([pck_table{1,j+1} '  PCK150mm']);
 79 |         output_dir = [output_base_path filesep error_data(i).method];
 80 |         if(exist(output_dir,'dir') ~= 7)
 81 |             mkdir(output_dir);
 82 |         end
 83 |         saveas(gcf,[output_dir filesep pck_table{1,j+1}], 'fig');
 84 |         saveas(gcf,[output_dir filesep pck_table{1,j+1}], 'svg');
 85 |         saveas(gcf,[output_dir filesep pck_table{1,j+1}], 'png');
 86 |         
 87 |     end
 88 |     figure(2);
 89 |     cla;
 90 |     plot(thresh,all_plot,'LineWidth',2);
 91 |     axis([0 150 0 1]);
 92 |     hold off;
 93 |     legend(pck_table(1,2:end));
 94 |     saveas(gcf,[output_dir filesep 'All'], 'fig');
 95 |     saveas(gcf,[output_dir filesep 'All'], 'svg');
 96 |     saveas(gcf,[output_dir filesep 'All'], 'png');
 97 | end
 98 | end
 99 | 
100 | end
101 | %Then group the plots by methods 
102 | 
103 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/config/default.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # ------------------------------------------------------------------------------
  3 | # Copyright (c) Microsoft
  4 | # Licensed under the MIT License.
  5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | 
 14 | from yacs.config import CfgNode as CN
 15 | 
 16 | 
 17 | _C = CN()
 18 | 
 19 | _C.OUTPUT_DIR = ''
 20 | _C.LOG_DIR = ''
 21 | _C.DATA_DIR = ''
 22 | _C.GPUS = (0,)
 23 | _C.WORKERS = 4
 24 | _C.PRINT_FREQ = 20
 25 | _C.AUTO_RESUME = False
 26 | _C.PIN_MEMORY = True
 27 | _C.RANK = 0
 28 | 
 29 | # Cudnn related params
 30 | _C.CUDNN = CN()
 31 | _C.CUDNN.BENCHMARK = True
 32 | _C.CUDNN.DETERMINISTIC = False
 33 | _C.CUDNN.ENABLED = True
 34 | 
 35 | # common params for NETWORK
 36 | _C.MODEL = CN()
 37 | _C.MODEL.NAME = 'pose_hrnet'
 38 | _C.MODEL.INIT_WEIGHTS = True
 39 | _C.MODEL.PRETRAINED = ''
 40 | _C.MODEL.NUM_JOINTS = 17
 41 | _C.MODEL.TAG_PER_JOINT = True
 42 | _C.MODEL.TARGET_TYPE = 'gaussian'
 43 | _C.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
 44 | _C.MODEL.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
 45 | _C.MODEL.SIGMA = 2
 46 | _C.MODEL.EXTRA = CN(new_allowed=True)
 47 | 
 48 | _C.LOSS = CN()
 49 | _C.LOSS.USE_OHKM = False
 50 | _C.LOSS.TOPK = 8
 51 | _C.LOSS.USE_TARGET_WEIGHT = True
 52 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
 53 | 
 54 | # DATASET related params
 55 | _C.DATASET = CN()
 56 | _C.DATASET.ROOT = ''
 57 | _C.DATASET.DATASET = 'mpii'
 58 | _C.DATASET.TRAIN_SET = 'train'
 59 | _C.DATASET.TEST_SET = 'valid'
 60 | _C.DATASET.DATA_FORMAT = 'jpg'
 61 | _C.DATASET.HYBRID_JOINTS_TYPE = ''
 62 | _C.DATASET.SELECT_DATA = False
 63 | 
 64 | # training data augmentation
 65 | _C.DATASET.FLIP = True
 66 | _C.DATASET.SCALE_FACTOR = 0.25
 67 | _C.DATASET.ROT_FACTOR = 30
 68 | _C.DATASET.PROB_HALF_BODY = 0.0
 69 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8
 70 | _C.DATASET.COLOR_RGB = False
 71 | 
 72 | # train
 73 | _C.TRAIN = CN()
 74 | 
 75 | _C.TRAIN.LR_FACTOR = 0.1
 76 | _C.TRAIN.LR_STEP = [90, 110]
 77 | _C.TRAIN.LR = 0.001
 78 | 
 79 | _C.TRAIN.OPTIMIZER = 'adam'
 80 | _C.TRAIN.MOMENTUM = 0.9
 81 | _C.TRAIN.WD = 0.0001
 82 | _C.TRAIN.NESTEROV = False
 83 | _C.TRAIN.GAMMA1 = 0.99
 84 | _C.TRAIN.GAMMA2 = 0.0
 85 | 
 86 | _C.TRAIN.BEGIN_EPOCH = 0
 87 | _C.TRAIN.END_EPOCH = 140
 88 | 
 89 | _C.TRAIN.RESUME = False
 90 | _C.TRAIN.CHECKPOINT = ''
 91 | 
 92 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32
 93 | _C.TRAIN.SHUFFLE = True
 94 | 
 95 | # testing
 96 | _C.TEST = CN()
 97 | 
 98 | # size of images for each device
 99 | _C.TEST.BATCH_SIZE_PER_GPU = 32
100 | # Test Model Epoch
101 | _C.TEST.FLIP_TEST = False
102 | _C.TEST.POST_PROCESS = False
103 | _C.TEST.SHIFT_HEATMAP = False
104 | 
105 | _C.TEST.USE_GT_BBOX = False
106 | 
107 | # nms
108 | _C.TEST.IMAGE_THRE = 0.1
109 | _C.TEST.NMS_THRE = 0.6
110 | _C.TEST.SOFT_NMS = False
111 | _C.TEST.OKS_THRE = 0.5
112 | _C.TEST.IN_VIS_THRE = 0.0
113 | _C.TEST.COCO_BBOX_FILE = ''
114 | _C.TEST.BBOX_THRE = 1.0
115 | _C.TEST.MODEL_FILE = ''
116 | 
117 | # debug
118 | _C.DEBUG = CN()
119 | _C.DEBUG.DEBUG = False
120 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
121 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
122 | _C.DEBUG.SAVE_HEATMAPS_GT = False
123 | _C.DEBUG.SAVE_HEATMAPS_PRED = False
124 | 
125 | 
126 | def update_config(cfg, args):
127 |     cfg.defrost()
128 |     cfg.merge_from_file(args.cfg)
129 |     cfg.merge_from_list(args.opts)
130 | 
131 |     if args.modelDir:
132 |         cfg.OUTPUT_DIR = args.modelDir
133 | 
134 |     if args.logDir:
135 |         cfg.LOG_DIR = args.logDir
136 | 
137 |     if args.dataDir:
138 |         cfg.DATA_DIR = args.dataDir
139 | 
140 |     cfg.DATASET.ROOT = os.path.join(
141 |         cfg.DATA_DIR, cfg.DATASET.ROOT
142 |     )
143 | 
144 |     cfg.MODEL.PRETRAINED = os.path.join(
145 |         cfg.DATA_DIR, cfg.MODEL.PRETRAINED
146 |     )
147 | 
148 |     if cfg.TEST.MODEL_FILE:
149 |         cfg.TEST.MODEL_FILE = os.path.join(
150 |             cfg.DATA_DIR, cfg.TEST.MODEL_FILE
151 |         )
152 | 
153 |     cfg.freeze()
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     import sys
158 |     with open(sys.argv[1], 'w') as f:
159 |         print(_C, file=f)
160 | 
161 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/humaneva_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | 
  8 | import numpy as np
  9 | import copy
 10 | from common.skeleton import Skeleton
 11 | from common.mocap_dataset import MocapDataset
 12 | from common.camera import normalize_screen_coordinates, image_coordinates
 13 |        
 14 | humaneva_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 1, 5, 6, 0, 8, 9, 0, 11, 12, 1],
 15 |        joints_left=[2, 3, 4, 8, 9, 10],
 16 |        joints_right=[5, 6, 7, 11, 12, 13])
 17 | 
 18 | humaneva_cameras_intrinsic_params = [
 19 |     {
 20 |         'id': 'C1',
 21 |         'res_w': 640,
 22 |         'res_h': 480,
 23 |         'azimuth': 0, # Only used for visualization
 24 |     },
 25 |     {
 26 |         'id': 'C2',
 27 |         'res_w': 640,
 28 |         'res_h': 480,
 29 |         'azimuth': -90, # Only used for visualization
 30 |     },
 31 |     {
 32 |         'id': 'C3',
 33 |         'res_w': 640,
 34 |         'res_h': 480,
 35 |         'azimuth': 90, # Only used for visualization
 36 |     },
 37 | ]
 38 | 
 39 | humaneva_cameras_extrinsic_params = {
 40 |     'S1': [
 41 |         {
 42 |             'orientation': [0.424207, -0.4983646, -0.5802981, 0.4847012],
 43 |             'translation': [4062.227,  663.2477, 1528.397],
 44 |         },
 45 |         {
 46 |             'orientation': [0.6503354, -0.7481602, -0.0919284, 0.0941766],
 47 |             'translation': [844.8131, -3805.2092,  1504.9929],
 48 |         },
 49 |         {
 50 |             'orientation': [0.0664734, -0.0690535, 0.7416416, -0.6639132],
 51 |             'translation': [-797.67377, 3916.3174, 1433.6602],
 52 |         },
 53 |     ],
 54 |     'S2': [
 55 |         {
 56 |             'orientation': [ 0.4214752, -0.4961493, -0.5838273, 0.4851187 ],
 57 |             'translation': [ 4112.9121,   626.4929,  1545.2988], 
 58 |         },
 59 |         {
 60 |             'orientation': [ 0.6501393, -0.7476588, -0.0954617, 0.0959808 ],
 61 |             'translation': [  923.5740, -3877.9243,  1504.5518], 
 62 |         },
 63 |         {
 64 |             'orientation': [ 0.0699353, -0.0712403, 0.7421637, -0.662742 ],
 65 |             'translation': [ -781.4915,  3838.8853,  1444.9929], 
 66 |         },
 67 |     ],
 68 |     'S3': [
 69 |         {
 70 |             'orientation': [ 0.424207, -0.4983646, -0.5802981, 0.4847012 ],
 71 |             'translation': [ 4062.2271,   663.2477,  1528.3970], 
 72 |         },
 73 |         {
 74 |             'orientation': [ 0.6503354, -0.7481602, -0.0919284, 0.0941766 ],
 75 |             'translation': [  844.8131, -3805.2092,  1504.9929], 
 76 |         },
 77 |         {
 78 |             'orientation': [ 0.0664734, -0.0690535, 0.7416416, -0.6639132 ],
 79 |             'translation': [ -797.6738,  3916.3174,  1433.6602], 
 80 |         },
 81 |     ],
 82 |     'S4': [
 83 |         {},
 84 |         {},
 85 |         {},
 86 |     ],
 87 |     
 88 | }
 89 | 
 90 | class HumanEvaDataset(MocapDataset):
 91 |     def __init__(self, path):
 92 |         super().__init__(fps=60, skeleton=humaneva_skeleton)
 93 |         
 94 |         self._cameras = copy.deepcopy(humaneva_cameras_extrinsic_params)
 95 |         for cameras in self._cameras.values():
 96 |             for i, cam in enumerate(cameras):
 97 |                 cam.update(humaneva_cameras_intrinsic_params[i])
 98 |                 for k, v in cam.items():
 99 |                     if k not in ['id', 'res_w', 'res_h']:
100 |                         cam[k] = np.array(v, dtype='float32')
101 |                 if 'translation' in cam:
102 |                     cam['translation'] = cam['translation']/1000 # mm to meters
103 |                 
104 |         for subject in list(self._cameras.keys()):
105 |             data = self._cameras[subject]
106 |             del self._cameras[subject]
107 |             for prefix in ['Train/', 'Validate/', 'Unlabeled/Train/', 'Unlabeled/Validate/', 'Unlabeled/']:
108 |                 self._cameras[prefix + subject] = data
109 |         
110 |         # Load serialized dataset
111 |         data = np.load(path, allow_pickle=True)['positions_3d'].item()
112 |         
113 |         self._data = {}
114 |         for subject, actions in data.items():
115 |             self._data[subject] = {}
116 |             for action_name, positions in actions.items():
117 |                 self._data[subject][action_name] = {
118 |                     'positions': positions,
119 |                     'cameras': self._cameras[subject],
120 |                 }
121 |    


--------------------------------------------------------------------------------
/H36M-Toolbox/common/model_stmo.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from model.block.vanilla_transformer_encoder import Transformer
  4 | from model.block.strided_transformer_encoder import Transformer as Transformer_reduce
  5 | 
  6 | class Linear(nn.Module):
  7 |     def __init__(self, linear_size, p_dropout=0.25):
  8 |         super(Linear, self).__init__()
  9 |         self.l_size = linear_size
 10 | 
 11 |         self.relu = nn.LeakyReLU(0.2, inplace=True)
 12 |         self.dropout = nn.Dropout(p_dropout)
 13 | 
 14 |         #self.w1 = nn.Linear(self.l_size, self.l_size)
 15 |         self.w1 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1)
 16 |         self.batch_norm1 = nn.BatchNorm1d(self.l_size)
 17 | 
 18 |         #self.w2 = nn.Linear(self.l_size, self.l_size)
 19 |         self.w2 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1)
 20 |         self.batch_norm2 = nn.BatchNorm1d(self.l_size)
 21 | 
 22 |     def forward(self, x):
 23 |         y = self.w1(x)
 24 |         y = self.batch_norm1(y)
 25 |         y = self.relu(y)
 26 |         y = self.dropout(y)
 27 | 
 28 |         y = self.w2(y)
 29 |         y = self.batch_norm2(y)
 30 |         y = self.relu(y)
 31 |         y = self.dropout(y)
 32 | 
 33 |         out = x + y
 34 | 
 35 |         return out
 36 | 
 37 | class FCBlock(nn.Module):
 38 | 
 39 |     def __init__(self, channel_in, channel_out, linear_size, block_num):
 40 |         super(FCBlock, self).__init__()
 41 | 
 42 |         self.linear_size = linear_size
 43 |         self.block_num = block_num
 44 |         self.layers = []
 45 |         self.channel_in = channel_in
 46 |         self.stage_num = 3
 47 |         self.p_dropout = 0.1
 48 |         #self.fc_1 = nn.Linear(self.channel_in, self.linear_size)
 49 |         self.fc_1 = nn.Conv1d(self.channel_in, self.linear_size, kernel_size=1)
 50 |         self.bn_1 = nn.BatchNorm1d(self.linear_size)
 51 |         for i in range(block_num):
 52 |             self.layers.append(Linear(self.linear_size, self.p_dropout))
 53 |         #self.fc_2 = nn.Linear(self.linear_size, channel_out)
 54 |         self.fc_2 = nn.Conv1d(self.linear_size, channel_out, kernel_size=1)
 55 | 
 56 |         self.layers = nn.ModuleList(self.layers)
 57 |         self.relu = nn.LeakyReLU(0.2, inplace=True)
 58 |         self.dropout = nn.Dropout(self.p_dropout)
 59 | 
 60 |     def forward(self, x):
 61 | 
 62 |         x = self.fc_1(x)
 63 |         x = self.bn_1(x)
 64 |         x = self.relu(x)
 65 |         x = self.dropout(x)
 66 |         for i in range(self.block_num):
 67 |             x = self.layers[i](x)
 68 |         x = self.fc_2(x)
 69 | 
 70 |         return x
 71 | 
 72 | class PoseTransformer(nn.Module):
 73 |     def __init__(self, args):
 74 |         super().__init__()
 75 | 
 76 |         layers, channel, d_hid, length  = 3, 256, 512, 9#args.frames
 77 |         stride_num = {
 78 |             '9': [1, 3, 3],
 79 |             '27':  [3, 3, 3],
 80 |             '351': [3, 9, 13],
 81 |             '81': [3, 3, 3, 3],
 82 |             '243': [3, 3, 3, 3, 3],
 83 |         }
 84 |         stride_num = stride_num[str(length)]
 85 |         self.num_joints_in, self.num_joints_out = 17, 17
 86 | 
 87 |         self.encoder = FCBlock(2*self.num_joints_in, channel, 2*channel, 1)
 88 | 
 89 |         self.Transformer = Transformer(layers, channel, d_hid, length=length)
 90 |         self.Transformer_reduce = Transformer_reduce(len(stride_num), channel, d_hid, \
 91 |             length=length, stride_num=stride_num)
 92 |         
 93 |         self.fcn = nn.Sequential(
 94 |             nn.BatchNorm1d(channel, momentum=0.1),
 95 |             nn.Conv1d(channel, 3*self.num_joints_out, kernel_size=1)
 96 |         )
 97 | 
 98 |         self.fcn_1 = nn.Sequential(
 99 |             nn.BatchNorm1d(channel, momentum=0.1),
100 |             nn.Conv1d(channel, 3*self.num_joints_out, kernel_size=1)
101 |         )
102 | 
103 |     def forward(self, x):
104 |         x = x[:, :, :, :, 0].permute(0, 2, 3, 1).contiguous() 
105 |         x_shape = x.shape
106 | 
107 |         x = x.view(x.shape[0], x.shape[1], -1) 
108 |         x = x.permute(0, 2, 1).contiguous() 
109 | 
110 |         x = self.encoder(x) 
111 | 
112 |         x = x.permute(0, 2, 1).contiguous()
113 |         x = self.Transformer(x) 
114 | 
115 |         x_VTE = x
116 |         x_VTE = x_VTE.permute(0, 2, 1).contiguous()
117 |         x_VTE = self.fcn_1(x_VTE) 
118 | 
119 |         x_VTE = x_VTE.view(x_shape[0], self.num_joints_out, -1, x_VTE.shape[2])
120 |         x_VTE = x_VTE.permute(0, 2, 3, 1).contiguous().unsqueeze(dim=-1)
121 | 
122 |         x = self.Transformer_reduce(x) 
123 |         x = x.permute(0, 2, 1).contiguous() 
124 |         x = self.fcn(x) 
125 | 
126 |         x = x.view(x_shape[0], self.num_joints_out, -1, x.shape[2])
127 |         x = x.permute(0, 2, 3, 1).contiguous().unsqueeze(dim=-1)
128 |         
129 |         return x, x_VTE
130 | 
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/dataset/mpi_inf_3dhp/README.txt:
--------------------------------------------------------------------------------
 1 | ####################
 2 | MPI-INF-3DHP Dataset
 3 | ####################
 4 | 
 5 | Terms of use: 
 6 | The provided dataset is intended for research purposes only and any use of it for non-scientific and/or commercial means is not allowed. This includes publishing any scientific
 7 | results obtained with our data in non-scientific literature, such as tabloid press. We ask the user to respect our actors and not to use the data for any distasteful manipulations. 
 8 | If you use our training or test data, you are required to cite the origin: 
 9 | [1] VNect: Real-time 3D Human Pose Estimation With A Single RGB Camera (ACM Trans. on Graphics, SIGGRAPH 2017)
10 | 	Mehta, D.; Sridhar, S.; Sotnycheno, O.; Rhodin, H.; Shafiei, M.; Seidel, H.; Xu, W.; Casas, D.; Theobalt, C.
11 | [2] Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision (3DV 2017)
12 | 	Mehta, D.; Rhodin, H.; Casas, D.; Fua, P.; Sotnychenko, O.; Xu, W.; Theobalt, C.
13 | 
14 | Refer to the license (license.txt) distributed with the data.
15 | 
16 | ########################
17 | Downloading the Dataset
18 | ########################
19 | Use the script get_dataset.sh to download the training set and get_testset.sh for the test set. You would need to read and review the configuration under conf.ig before you can proceed with downloading the dataset
20 | 
21 | ####################
22 | Training Set Details
23 | ####################
24 | The dataset comprises of 8 subjects, covering the following 8 activities with 2 sequences per subject.
25 | Sequence 1: 
26 | A1: Walking/Standing
27 | 	Walking, jogging, waiting in a queue, pointing at things, having an animated conversation, smoking while standing or walking, phone call etc
28 | A2: Exercise
29 | 	Lunges, pushups, bridge, stretch legs, other forms of slow exercise
30 | A3: Sitting(1)
31 | 	Eating, working at a computer, picking something off the floor, lie back on the chair, cross feet, sit with hands behind head etc
32 | A4: Crouch/Reach
33 | 	Crouch and pretend lift something, tie shoe laces, photography while crouching, crouch and interact with objects etc
34 | 
35 | Sequence 2: 
36 | A5: On the Floor
37 | 	Cycling, crunches and other complicated poses while lying on the ground.
38 | A6: Sports
39 | 	Boxing, tennis, golf, soccer and other forms of fast motion. Slightly awkward because the green screen covering the floor didn't have traction on the floor.
40 | A7: Sitting(2)
41 | 	Move the chair around while seated, wave someone over, cheer for sports team, animated conversation with someone, cross legs etc
42 | A8: Miscellaneous
43 | 	Dance, jump, walk hand in hand with another (pretend) person, etc	
44 | with 2 sets of clothing each. There is at least 1 clothing set per subject that is unique to that subject.
45 | 
46 | Each sequence is roughly 4 minutes, with each activity taking roughly 1 minute.
47 | 
48 | Each subject wears a different set of clothing in the two sequences. At least 1 set of clothing per subject is unique to that subject.
49 | 
50 | The dataset was recorded in a green screen studio with 14 cameras. The dataset has segmentation masks available for the background, for the chair, and for upper body and lower body clothing.
51 | Use mpii_get_sequence_info to get information about which masks are available for each subject-sequence combination. The same function also provides information about the frame rate of the videos in the sequence, as well as the number of frames available per video.
52 | 
53 | The dataset is organized in the following hierarchy.
54 |  SX: Where X is the subject ID (1 to 8)
55 | 	SeqY: Where Y is the sequence number (1 or 2)
56 | 		ChairMasks: Masks for the chair. This mask is encoded in the Red channel.
57 | 		FGmaks: Masks for the green screen, and the lower body and the upper body when available. See mpii_get_sequence_info for available augmentation opportunities. The green screen mask is in the Red channel, and when available, the green channel carries the upper body mask and the blue channel carries the lower body mask. It may be helpful to apply some gaussian smoothing to the masks when using them.
58 | 		imageSequence: The RGB frames.
59 | 		annot.mat: Body joint annotations in each camera's coordinate system. There are 2D pose annotations, 3D pose annotations and normalized 3D pose annotations (universal) available for each camera. For information about the joint order, joint labels and the joint subsets used in various projects, refer to mpii_get_joint_set. The file also contains the camera correspondence for each annotation cell (cameras, indexed with 0). For the camera subsets used in various projects, refer to mpii_get_camera_set. The 3D annotations (annot3) when reprojected into the image match the 2D annotations (annot2), however the same is not true of the normalized 3D annotations (univ_annot3). The 2D annotations (annot2) for each frame are arranged in a single row as x1,y1,x2,y2..,xj,yj, while the 3D annotations (annot3,univ_annot3) are arranged as x1,y1,z1,x2,y2,z2..,xj,yj,zj.. The file also contains the frame number correspondence for each row of annotations (frames). Though rare, it is possible that some sequences have a few frames missing at the end but annotations still available. Thus it is advisable to get the frame count F from mpii_get_sequence_info and read only the first F rows of annotations. 
60 | 		camera.calibration: Camera calibration parameters
61 | 
62 | 
63 | The image frames of the dataset come in the form of video sequences, which are further grouped by common camera sets for ease of distribution. Before using the data, it is recommended to convert the videos back to image sequences using ffmpeg (ffmpeg -i "<some_folder>/video_X.avi" -qscale:v 1 "<some_folder>/img_X_%%06d.jpg") to ensure valid correspondence between the annotations and the frames.
64 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/common/opt.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import math
  4 | import time
  5 | import torch
  6 | 
  7 | class opts():
  8 |     def __init__(self):
  9 |         self.parser = argparse.ArgumentParser()
 10 | 
 11 |     def init(self):
 12 |         self.parser.add_argument('--backbone', default='hrnet_48', type=str)
 13 |         self.parser.add_argument('--layers', default=3, type=int)
 14 |         self.parser.add_argument('--channel', default=256, type=int)
 15 |         self.parser.add_argument('--d_hid', default=512, type=int)
 16 |         self.parser.add_argument('--dataset', type=str, default='h36m')
 17 |         self.parser.add_argument('-k', '--keypoints', default='cpn_ft_h36m_dbb', type=str)
 18 |         self.parser.add_argument('--data_augmentation', type=bool, default=True)
 19 |         self.parser.add_argument('--reverse_augmentation', type=bool, default=False)
 20 |         self.parser.add_argument('--test_augmentation', type=bool, default=True)
 21 |         self.parser.add_argument('--crop_uv', type=int, default=0)
 22 |         self.parser.add_argument('--root_path', type=str, default='dataset/')
 23 |         self.parser.add_argument('-a', '--actions', default='*', type=str)
 24 |         self.parser.add_argument('--downsample', default=1, type=int)
 25 |         self.parser.add_argument('--subset', default=1, type=float)
 26 |         self.parser.add_argument('-s', '--stride', default=1, type=int)
 27 |         self.parser.add_argument('--gpu', default='0', type=str, help='')
 28 |         self.parser.add_argument('--train', type=int, default=0)
 29 |         self.parser.add_argument('--test', type=int, default=1)
 30 |         self.parser.add_argument('--nepoch', type=int, default=80)
 31 |         self.parser.add_argument('-b','--batchSize', type=int, default=160)
 32 |         self.parser.add_argument('--lr', type=float, default=1e-3)
 33 |         self.parser.add_argument('--lr_refine', type=float, default=1e-5)
 34 |         self.parser.add_argument('--lr_decay_large', type=float, default=0.5)
 35 |         self.parser.add_argument('--large_decay_epoch', type=int, default=80)
 36 |         self.parser.add_argument('--workers', type=int, default=14)
 37 |         self.parser.add_argument('-lrd', '--lr_decay', default=0.95, type=float)
 38 |         self.parser.add_argument('-f','--frames', type=int, default=243)
 39 |         self.parser.add_argument('--pad', type=int, default=121)
 40 |         self.parser.add_argument('--refine', action='store_true')
 41 |         self.parser.add_argument('--reload', type=int, default=0)
 42 |         self.parser.add_argument('--refine_reload', type=int, default=0)
 43 |         self.parser.add_argument('-c','--checkpoint', type=str, default='model')
 44 |         self.parser.add_argument('--previous_dir', type=str, default='')
 45 |         self.parser.add_argument('--n_joints', type=int, default=17)
 46 |         self.parser.add_argument('--out_joints', type=int, default=17)
 47 |         self.parser.add_argument('--out_all', type=int, default=1)
 48 |         self.parser.add_argument('--in_channels', type=int, default=2)
 49 |         self.parser.add_argument('--out_channels', type=int, default=3)
 50 |         self.parser.add_argument('-previous_best_threshold', type=float, default= math.inf)
 51 |         self.parser.add_argument('-previous_name', type=str, default='')
 52 |         self.parser.add_argument('--previous_refine_name', type=str, default='')
 53 |         self.parser.add_argument('--manualSeed', type=int, default=1)
 54 | 
 55 |         self.parser.add_argument('--MAE', action='store_true')
 56 |         self.parser.add_argument('-tmr','--temporal_mask_rate', type=float, default=0)
 57 |         self.parser.add_argument('-smn', '--spatial_mask_num', type=int, default=0)
 58 |         self.parser.add_argument('-tds', '--t_downsample', type=int, default=1)
 59 | 
 60 |         self.parser.add_argument('--MAE_reload', type=int, default=0)
 61 |         self.parser.add_argument('-r', '--resume', action='store_true')
 62 | 
 63 | 
 64 | 
 65 |     def parse(self):
 66 |         self.init()
 67 |         self.opt = self.parser.parse_args()
 68 | 
 69 |         self.opt.pad = (self.opt.frames-1) // 2
 70 | 
 71 |         stride_num = {
 72 |                 '9': [1, 3, 3],
 73 |                 '27':  [3, 3, 3],
 74 |                 '351': [3, 9, 13],
 75 |                 '81': [3, 3, 3, 3],
 76 |                 '243': [3, 3, 3, 3, 3],
 77 |             }
 78 | 
 79 |         if str(self.opt.frames) in stride_num:
 80 |             self.opt.stride_num = stride_num[str(self.opt.frames)]
 81 |         else:
 82 |             self.opt.stride_num = None
 83 |             # print('no stride_num')
 84 |             # exit()
 85 | 
 86 |         self.opt.subjects_train = 'S1,S5,S6,S7,S8'
 87 |         self.opt.subjects_test = 'S9,S11'
 88 |         #self.opt.subjects_test = 'S11'
 89 | 
 90 |         #if self.opt.train:
 91 |         logtime = time.strftime('%m%d_%H%M_%S_')
 92 | 
 93 |         ckp_suffix = ''
 94 |         if self.opt.refine:
 95 |             ckp_suffix='_refine'
 96 |         elif self.opt.MAE:
 97 |             ckp_suffix = '_pretrain'
 98 |         else:
 99 |             ckp_suffix = '_STMO'
100 |         self.opt.checkpoint = 'checkpoint/'+self.opt.checkpoint + '_%d'%(self.opt.pad*2+1) + \
101 |             '%s'%ckp_suffix
102 | 
103 |         if not os.path.exists(self.opt.checkpoint):
104 |             os.makedirs(self.opt.checkpoint)
105 | 
106 |         if self.opt.train:
107 |             args = dict((name, getattr(self.opt, name)) for name in dir(self.opt)
108 |                     if not name.startswith('_'))
109 | 
110 |             file_name = os.path.join(self.opt.checkpoint, 'opt.txt')
111 |             with open(file_name, 'wt') as opt_file:
112 |                 opt_file.write('==> Args:\n')
113 |                 for k, v in sorted(args.items()):
114 |                     opt_file.write('  %s: %s\n' % (str(k), str(v)))
115 |                 opt_file.write('==> Args:\n')
116 |        
117 |         return self.opt
118 | 
119 | 
120 | 
121 | 
122 | 
123 |         
124 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/model_stmo_pretrain.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from model.block.vanilla_transformer_encoder_pretrain import Transformer, Transformer_dec
  4 | from model.block.strided_transformer_encoder import Transformer as Transformer_reduce
  5 | import numpy as np
  6 | 
  7 | class LayerNorm(nn.Module):
  8 |     def __init__(self, features, eps=1e-6):
  9 |         super(LayerNorm, self).__init__()
 10 |         self.a_2 = nn.Parameter(torch.ones(features))
 11 |         self.b_2 = nn.Parameter(torch.zeros(features))
 12 |         self.eps = eps
 13 | 
 14 |     def forward(self, x):
 15 |         mean = x.mean(-1, keepdim=True)
 16 |         std = x.std(-1, keepdim=True)
 17 |         return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
 18 | 
 19 | class Linear(nn.Module):
 20 |     def __init__(self, linear_size, p_dropout=0.25):
 21 |         super(Linear, self).__init__()
 22 |         self.l_size = linear_size
 23 | 
 24 |         self.relu = nn.LeakyReLU(0.2, inplace=True)
 25 |         self.dropout = nn.Dropout(p_dropout)
 26 | 
 27 |         #self.w1 = nn.Linear(self.l_size, self.l_size)
 28 |         self.w1 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1)
 29 |         self.batch_norm1 = nn.BatchNorm1d(self.l_size)
 30 | 
 31 |         #self.w2 = nn.Linear(self.l_size, self.l_size)
 32 |         self.w2 = nn.Conv1d(self.l_size, self.l_size, kernel_size=1)
 33 |         self.batch_norm2 = nn.BatchNorm1d(self.l_size)
 34 | 
 35 |     def forward(self, x):
 36 |         y = self.w1(x)
 37 |         y = self.batch_norm1(y)
 38 |         y = self.relu(y)
 39 |         y = self.dropout(y)
 40 | 
 41 |         y = self.w2(y)
 42 |         y = self.batch_norm2(y)
 43 |         y = self.relu(y)
 44 |         y = self.dropout(y)
 45 | 
 46 |         out = x + y
 47 | 
 48 |         return out
 49 | 
 50 | class FCBlock(nn.Module):
 51 | 
 52 |     def __init__(self, channel_in, channel_out, linear_size, block_num):
 53 |         super(FCBlock, self).__init__()
 54 | 
 55 |         self.linear_size = linear_size
 56 |         self.block_num = block_num
 57 |         self.layers = []
 58 |         self.channel_in = channel_in
 59 |         self.stage_num = 3
 60 |         self.p_dropout = 0.1
 61 |         #self.fc_1 = nn.Linear(self.channel_in, self.linear_size)
 62 |         self.fc_1 = nn.Conv1d(self.channel_in, self.linear_size, kernel_size=1)
 63 |         self.bn_1 = nn.BatchNorm1d(self.linear_size)
 64 |         for i in range(block_num):
 65 |             self.layers.append(Linear(self.linear_size, self.p_dropout))
 66 |         #self.fc_2 = nn.Linear(self.linear_size, channel_out)
 67 |         self.fc_2 = nn.Conv1d(self.linear_size, channel_out, kernel_size=1)
 68 | 
 69 |         self.layers = nn.ModuleList(self.layers)
 70 |         self.relu = nn.LeakyReLU(0.2, inplace=True)
 71 |         self.dropout = nn.Dropout(self.p_dropout)
 72 | 
 73 |     def forward(self, x):
 74 | 
 75 |         x = self.fc_1(x)
 76 |         x = self.bn_1(x)
 77 |         x = self.relu(x)
 78 |         x = self.dropout(x)
 79 |         for i in range(self.block_num):
 80 |             x = self.layers[i](x)
 81 |         x = self.fc_2(x)
 82 | 
 83 |         return x
 84 | 
 85 | class PoseTransformer(nn.Module):
 86 |     def __init__(self, args):
 87 |         super().__init__()
 88 | 
 89 |         layers, channel, d_hid, length  = 3, 256, 512, args.number_of_frames
 90 |         stride_num = {
 91 |             '9': [1, 3, 3],
 92 |             '27':  [3, 3, 3],
 93 |             '351': [3, 9, 13],
 94 |             '81': [3, 3, 3, 3],
 95 |             '243': [3, 3, 3, 3, 3],
 96 |         }
 97 |         stride_num = stride_num[str(length)]
 98 |         self.spatial_mask_num = 2
 99 |         self.num_joints_in, self.num_joints_out = 17, 17
100 | 
101 |         self.length = length
102 |         dec_dim_shrink = 2
103 | 
104 |         self.encoder = FCBlock(2*self.num_joints_in, channel, 2*channel, 1)
105 | 
106 |         self.Transformer = Transformer(layers, channel, d_hid, length=length)
107 |         self.Transformer_dec = Transformer_dec(layers-1, channel//dec_dim_shrink, d_hid//dec_dim_shrink, length=length)
108 | 
109 |         self.encoder_to_decoder = nn.Linear(channel, channel//dec_dim_shrink, bias=False)
110 |         self.encoder_LN = LayerNorm(channel)
111 |         
112 |         self.fcn_dec = nn.Sequential(
113 |             nn.BatchNorm1d(channel//dec_dim_shrink, momentum=0.1),
114 |             nn.Conv1d(channel//dec_dim_shrink, 2*self.num_joints_out, kernel_size=1)
115 |         )
116 | 
117 |         # self.fcn_1 = nn.Sequential(
118 |         #     nn.BatchNorm1d(channel, momentum=0.1),
119 |         #     nn.Conv1d(channel, 3*self.num_joints_out, kernel_size=1)
120 |         # )
121 | 
122 |         self.dec_pos_embedding = nn.Parameter(torch.randn(1, length, channel//dec_dim_shrink))
123 |         self.mask_token = nn.Parameter(torch.randn(1, 1, channel//dec_dim_shrink))
124 | 
125 |         self.spatial_mask_token = nn.Parameter(torch.randn(1, 1, 2))
126 | 
127 |     def forward(self, x_in, mask, spatial_mask):
128 |         x_in = x_in[:, :, :, :, 0].permute(0, 2, 3, 1).contiguous()
129 |         b,f,_,_ = x_in.shape
130 | 
131 |         # spatial mask out
132 |         x = x_in.clone()
133 | 
134 |         x[:,spatial_mask] = self.spatial_mask_token.expand(b,self.spatial_mask_num*f,2)
135 | 
136 | 
137 |         x = x.view(b, f, -1)
138 | 
139 |         x = x.permute(0, 2, 1).contiguous()
140 | 
141 |         x = self.encoder(x)
142 | 
143 |         x = x.permute(0, 2, 1).contiguous()
144 |         feas = self.Transformer(x, mask_MAE=mask)
145 | 
146 |         feas = self.encoder_LN(feas)
147 |         feas = self.encoder_to_decoder(feas)
148 | 
149 |         B, N, C = feas.shape
150 | 
151 |         # we don't unshuffle the correct visible token order,
152 |         # but shuffle the pos embedding accorddingly.
153 |         expand_pos_embed = self.dec_pos_embedding.expand(B, -1, -1).clone()
154 |         pos_emd_vis = expand_pos_embed[:, ~mask].reshape(B, -1, C)
155 |         pos_emd_mask = expand_pos_embed[:, mask].reshape(B, -1, C)
156 |         x_full = torch.cat([feas + pos_emd_vis, self.mask_token + pos_emd_mask], dim=1)
157 | 
158 |         x_out = self.Transformer_dec(x_full, pos_emd_mask.shape[1])
159 | 
160 |         x_out = x_out.permute(0, 2, 1).contiguous()
161 |         x_out = self.fcn_dec(x_out)
162 | 
163 |         x_out = x_out.view(b, self.num_joints_out, 2, -1)
164 |         x_out = x_out.permute(0, 2, 3, 1).contiguous().unsqueeze(dim=-1)
165 |         
166 |         return x_out
167 | 
168 | 
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/H36M-Toolbox/common/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | def mpjpe(predicted, target, weights=None, gamma=0, return_weights=False):
 14 |     """
 15 |     Mean per-joint position error (i.e. mean Euclidean distance),
 16 |     often referred to as "Protocol #1" in many papers.
 17 |     """
 18 |     assert predicted.shape == target.shape
 19 |     norm = torch.norm(predicted - target, dim=len(target.shape)-1)#.mean(axis=2).squeeze(-1)
 20 |     if weights is not None:
 21 |         norm = (weights ** gamma) * norm
 22 |         # norm = (weights.view(-1, 1, 1) ** gamma) * norm
 23 | 
 24 |     if return_weights:
 25 |         return torch.mean(norm), norm
 26 |     else:
 27 |         return torch.mean(norm) #, norm
 28 |     # return torch.mean(torch.norm(predicted - target, dim=len(target.shape)-1))
 29 | 
 30 | def pck(pred, gt):
 31 |     error = np.linalg.norm(pred - gt, ord=2, axis=-1)
 32 |     pck = (error < 0.15).astype(np.float32).mean() * 100
 33 |     return pck  
 34 | 
 35 | def auc(pred, gt):
 36 |     error = np.linalg.norm(pred - gt, ord=2, axis=-1)
 37 | 
 38 |     thresholds = np.linspace(0., 0.15, 31)
 39 |     pck_values = np.zeros(len(thresholds))
 40 |     for i in range(len(thresholds)):
 41 |         pck_values[i] = (error < thresholds[i]).astype(np.float32).mean()
 42 | 
 43 |     auc = pck_values.mean() * 100
 44 |     return auc   
 45 | 
 46 | class kl_loss(nn.Module):
 47 |     def __init__(self, num_bins):
 48 |         super(kl_loss, self).__init__()
 49 |         self.LogSoftmax = nn.LogSoftmax(dim=-1) #[B,LOGITS]
 50 |         self.criterion_ = nn.KLDivLoss(reduction='mean')
 51 |         self.num_bins = num_bins
 52 |  
 53 |     def criterion(self, dec_outs, labels):
 54 |         scores = self.LogSoftmax(dec_outs)
 55 |         loss = self.criterion_(scores, labels)
 56 |         return loss
 57 | 
 58 |     def forward(self, predicted, target, weights=None, gamma=0):
 59 |         output_x, output_y, output_z = predicted
 60 |         target_x = target[:,:,:,:self.num_bins[0]]
 61 |         target_y = target[:,:,:,self.num_bins[0]:self.num_bins[0]+self.num_bins[1]]
 62 |         target_z = target[:,:,:,-self.num_bins[2]:]
 63 |         num_joints = output_x.size(2)
 64 | 
 65 |         loss = 0
 66 |         for idx in range(num_joints):
 67 |             loss += self.criterion(output_x[:,:,idx],target_x[:,:,idx])
 68 |             loss += self.criterion(output_y[:,:,idx],target_y[:,:,idx])
 69 |             loss += self.criterion(output_z[:,:,idx],target_z[:,:,idx])
 70 |         return loss / num_joints
 71 | 
 72 | # def kl_loss(predicted, target, weights=None, gamma=0):
 73 | #     LogSoftmax = nn.LogSoftmax(dim=-1)
 74 | #     loss = nn.KLDivLoss(reduction="mean")
 75 | #     return loss(LogSoftmax(predicted), target)
 76 | 
 77 | def mse(predicted, target, weights=None, gamma=0):
 78 |     loss = nn.MSELoss()
 79 |     return loss(predicted, target)
 80 | 
 81 | def cross_entropy(predicted, target, weights=None, gamma=0, return_weights=False):
 82 |     loss = nn.CrossEntropyLoss()
 83 |     return loss(predicted.permute(0, 4, 1, 2, 3), target)
 84 |     
 85 | def weighted_mpjpe(predicted, target, w):
 86 |     """
 87 |     Weighted mean per-joint position error (i.e. mean Euclidean distance)
 88 |     """
 89 |     assert predicted.shape == target.shape
 90 |     assert w.shape[0] == predicted.shape[0]
 91 |     return torch.mean(w * torch.norm(predicted - target, dim=len(target.shape)-1))
 92 | 
 93 | def p_mpjpe(predicted, target):
 94 |     """
 95 |     Pose error: MPJPE after rigid alignment (scale, rotation, and translation),
 96 |     often referred to as "Protocol #2" in many papers.
 97 |     """
 98 |     assert predicted.shape == target.shape
 99 |     
100 |     muX = np.mean(target, axis=1, keepdims=True)
101 |     muY = np.mean(predicted, axis=1, keepdims=True)
102 |     
103 |     X0 = target - muX
104 |     Y0 = predicted - muY
105 | 
106 |     normX = np.sqrt(np.sum(X0**2, axis=(1, 2), keepdims=True))
107 |     normY = np.sqrt(np.sum(Y0**2, axis=(1, 2), keepdims=True))
108 |     
109 |     X0 /= normX
110 |     Y0 /= normY
111 | 
112 |     H = np.matmul(X0.transpose(0, 2, 1), Y0)
113 |     U, s, Vt = np.linalg.svd(H)
114 |     V = Vt.transpose(0, 2, 1)
115 |     R = np.matmul(V, U.transpose(0, 2, 1))
116 | 
117 |     # Avoid improper rotations (reflections), i.e. rotations with det(R) = -1
118 |     sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1))
119 |     V[:, :, -1] *= sign_detR
120 |     s[:, -1] *= sign_detR.flatten()
121 |     R = np.matmul(V, U.transpose(0, 2, 1)) # Rotation
122 | 
123 |     tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2)
124 | 
125 |     a = tr * normX / normY # Scale
126 |     t = muX - a*np.matmul(muY, R) # Translation
127 |     
128 |     # Perform rigid transformation on the input
129 |     predicted_aligned = a*np.matmul(predicted, R) + t
130 |     
131 |     # Return MPJPE
132 |     return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape)-1))
133 |     
134 | def n_mpjpe(predicted, target):
135 |     """
136 |     Normalized MPJPE (scale only), adapted from:
137 |     https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py
138 |     """
139 |     assert predicted.shape == target.shape
140 |     
141 |     norm_predicted = torch.mean(torch.sum(predicted**2, dim=3, keepdim=True), dim=2, keepdim=True)
142 |     norm_target = torch.mean(torch.sum(target*predicted, dim=3, keepdim=True), dim=2, keepdim=True)
143 |     scale = norm_target / norm_predicted
144 |     return mpjpe(scale * predicted, target)#[0]
145 | 
146 | def weighted_bonelen_loss(predict_3d_length, gt_3d_length):
147 |     loss_length = 0.001 * torch.pow(predict_3d_length - gt_3d_length, 2).mean()
148 |     return loss_length
149 | 
150 | def weighted_boneratio_loss(predict_3d_length, gt_3d_length):
151 |     loss_length = 0.1 * torch.pow((predict_3d_length - gt_3d_length)/gt_3d_length, 2).mean()
152 |     return loss_length
153 | 
154 | def mean_velocity_error(predicted, target):
155 |     """
156 |     Mean per-joint velocity error (i.e. mean Euclidean distance of the 1st derivative)
157 |     """
158 |     assert predicted.shape == target.shape
159 |     
160 |     velocity_predicted = np.diff(predicted, axis=0)
161 |     velocity_target = np.diff(target, axis=0)
162 |     
163 |     return np.mean(np.linalg.norm(velocity_predicted - velocity_target, axis=len(target.shape)-1))


--------------------------------------------------------------------------------
/ContextPose/mvn/models/cpn/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import time
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | import torch
  7 | import torch.nn.parallel
  8 | import torch.backends.cudnn as cudnn
  9 | import torch.optim
 10 | import torchvision.datasets as datasets
 11 | 
 12 | from config import cfg
 13 | from utils.logger import Logger
 14 | from utils.evaluation import AverageMeter
 15 | from utils.misc import save_model, adjust_learning_rate
 16 | from utils.osutils import mkdir_p, isfile, isdir, join
 17 | from networks import network 
 18 | from dataloader.mscocoMulti import MscocoMulti
 19 | 
 20 | 
 21 | def main(args):
 22 |     # create checkpoint dir
 23 |     if not isdir(args.checkpoint):
 24 |         mkdir_p(args.checkpoint)
 25 | 
 26 |     # create model
 27 |     model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = True)
 28 |     model = torch.nn.DataParallel(model).cuda()
 29 | 
 30 |     # define loss function (criterion) and optimizer
 31 |     criterion1 = torch.nn.MSELoss().cuda() # for Global loss
 32 |     criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss
 33 |     optimizer = torch.optim.Adam(model.parameters(),
 34 |                                 lr = cfg.lr,
 35 |                                 weight_decay=cfg.weight_decay)
 36 |     
 37 |     if args.resume:
 38 |         if isfile(args.resume):
 39 |             print("=> loading checkpoint '{}'".format(args.resume))
 40 |             checkpoint = torch.load(args.resume)
 41 |             pretrained_dict = checkpoint['state_dict']
 42 |             model.load_state_dict(pretrained_dict)
 43 |             args.start_epoch = checkpoint['epoch']
 44 |             optimizer.load_state_dict(checkpoint['optimizer'])
 45 |             print("=> loaded checkpoint '{}' (epoch {})"
 46 |                   .format(args.resume, checkpoint['epoch']))
 47 |             logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
 48 |         else:
 49 |             print("=> no checkpoint found at '{}'".format(args.resume))
 50 |     else:        
 51 |         logger = Logger(join(args.checkpoint, 'log.txt'))
 52 |         logger.set_names(['Epoch', 'LR', 'Train Loss'])
 53 | 
 54 |     cudnn.benchmark = True
 55 |     print('    Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4))
 56 | 
 57 |     train_loader = torch.utils.data.DataLoader(
 58 |         MscocoMulti(cfg),
 59 |         batch_size=cfg.batch_size*args.num_gpus, shuffle=True,
 60 |         num_workers=args.workers, pin_memory=True) 
 61 | 
 62 |     for epoch in range(args.start_epoch, args.epochs):
 63 |         lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma)
 64 |         print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) 
 65 | 
 66 |         # train for one epoch
 67 |         train_loss = train(train_loader, model, [criterion1, criterion2], optimizer)
 68 |         print('train_loss: ',train_loss)
 69 | 
 70 |         # append logger file
 71 |         logger.append([epoch + 1, lr, train_loss])
 72 | 
 73 |         save_model({
 74 |             'epoch': epoch + 1,
 75 |             'state_dict': model.state_dict(),
 76 |             'optimizer' : optimizer.state_dict(),
 77 |         }, checkpoint=args.checkpoint)
 78 | 
 79 |     logger.close()
 80 | 
 81 | 
 82 | 
 83 | def train(train_loader, model, criterions, optimizer):
 84 |     # prepare for refine loss
 85 |     def ohkm(loss, top_k):
 86 |         ohkm_loss = 0.
 87 |         for i in range(loss.size()[0]):
 88 |             sub_loss = loss[i]
 89 |             topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False)
 90 |             tmp_loss = torch.gather(sub_loss, 0, topk_idx)
 91 |             ohkm_loss += torch.sum(tmp_loss) / top_k
 92 |         ohkm_loss /= loss.size()[0]
 93 |         return ohkm_loss
 94 |     criterion1, criterion2 = criterions
 95 | 
 96 |     batch_time = AverageMeter()
 97 |     data_time = AverageMeter()
 98 |     losses = AverageMeter()
 99 | 
100 |     # switch to train mode
101 |     model.train()
102 | 
103 |     for i, (inputs, targets, valid, meta) in enumerate(train_loader):     
104 |         input_var = torch.autograd.Variable(inputs.cuda())
105 | 
106 |         target15, target11, target9, target7 = targets
107 |         refine_target_var = torch.autograd.Variable(target7.cuda(async=True))
108 |         valid_var = torch.autograd.Variable(valid.cuda(async=True))
109 | 
110 |         # compute output
111 |         global_outputs, refine_output = model(input_var)
112 |         score_map = refine_output.data.cpu()
113 | 
114 |         loss = 0.
115 |         global_loss_record = 0.
116 |         refine_loss_record = 0.
117 |         # comput global loss and refine loss
118 |         for global_output, label in zip(global_outputs, targets):
119 |             num_points = global_output.size()[1]
120 |             global_label = label * (valid > 1.1).type(torch.FloatTensor).view(-1, num_points, 1, 1)
121 |             global_loss = criterion1(global_output, torch.autograd.Variable(global_label.cuda(async=True))) / 2.0
122 |             loss += global_loss
123 |             global_loss_record += global_loss.data.item()
124 |         refine_loss = criterion2(refine_output, refine_target_var)
125 |         refine_loss = refine_loss.mean(dim=3).mean(dim=2)
126 |         refine_loss *= (valid_var > 0.1).type(torch.cuda.FloatTensor)
127 |         refine_loss = ohkm(refine_loss, 8)
128 |         loss += refine_loss
129 |         refine_loss_record = refine_loss.data.item()
130 | 
131 |         # record loss
132 |         losses.update(loss.data.item(), inputs.size(0))
133 | 
134 |         # compute gradient and do Optimization step
135 |         optimizer.zero_grad()
136 |         loss.backward()
137 |         optimizer.step()
138 | 
139 |         if(i%100==0 and i!=0):
140 |             print('iteration {} | loss: {}, global loss: {}, refine loss: {}, avg loss: {}'
141 |                 .format(i, loss.data.item(), global_loss_record, 
142 |                     refine_loss_record, losses.avg)) 
143 | 
144 |     return losses.avg
145 | 
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     parser = argparse.ArgumentParser(description='PyTorch CPN Training')
150 |     parser.add_argument('-j', '--workers', default=12, type=int, metavar='N',
151 |                         help='number of data loading workers (default: 12)')
152 |     parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N',
153 |                         help='number of GPU to use (default: 1)')    
154 |     parser.add_argument('--epochs', default=32, type=int, metavar='N',
155 |                         help='number of total epochs to run (default: 32)')
156 |     parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
157 |                         help='manual epoch number (useful on restarts)')
158 |     parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
159 |                         help='path to save checkpoint (default: checkpoint)')
160 |     parser.add_argument('--resume', default='', type=str, metavar='PATH',
161 |                         help='path to latest checkpoint')
162 | 
163 | 
164 |     main(parser.parse_args())
165 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/models/cpn/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | import torch
  5 | import torch.nn.parallel
  6 | import torch.optim
  7 | import cv2
  8 | import json
  9 | import numpy as np
 10 | 
 11 | from test_config import cfg
 12 | from pycocotools.coco import COCO
 13 | from pycocotools.cocoeval import COCOeval
 14 | from utils.osutils import mkdir_p, isdir
 15 | from utils.imutils import im_to_numpy, im_to_torch
 16 | from networks import network 
 17 | from dataloader.mscocoMulti import MscocoMulti
 18 | from tqdm import tqdm
 19 | 
 20 | def main(args):
 21 |     # create model
 22 |     model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = False)
 23 |     model = torch.nn.DataParallel(model).cuda()
 24 | 
 25 |     test_loader = torch.utils.data.DataLoader(
 26 |         MscocoMulti(cfg, train=False),
 27 |         batch_size=args.batch*args.num_gpus, shuffle=False,
 28 |         num_workers=args.workers, pin_memory=True) 
 29 | 
 30 |     # load trainning weights
 31 |     checkpoint_file = os.path.join(args.checkpoint, args.test+'.pth.tar')
 32 |     checkpoint = torch.load(checkpoint_file)
 33 |     model.load_state_dict(checkpoint['state_dict'])
 34 |     print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch']))
 35 |     
 36 |     # change to evaluation mode
 37 |     model.eval()
 38 |     
 39 |     print('testing...')
 40 |     full_result = []
 41 |     for i, (inputs, meta) in tqdm(enumerate(test_loader)):
 42 |         with torch.no_grad():
 43 |             input_var = torch.autograd.Variable(inputs.cuda())
 44 |             if args.flip == True:
 45 |                 flip_inputs = inputs.clone()
 46 |                 for i, finp in enumerate(flip_inputs):
 47 |                     finp = im_to_numpy(finp)
 48 |                     finp = cv2.flip(finp, 1)
 49 |                     flip_inputs[i] = im_to_torch(finp)
 50 |                 flip_input_var = torch.autograd.Variable(flip_inputs.cuda())
 51 | 
 52 |             # compute output
 53 |             global_outputs, refine_output = model(input_var)
 54 |             score_map = refine_output.data.cpu()
 55 |             score_map = score_map.numpy()
 56 | 
 57 |             if args.flip == True:
 58 |                 flip_global_outputs, flip_output = model(flip_input_var)
 59 |                 flip_score_map = flip_output.data.cpu()
 60 |                 flip_score_map = flip_score_map.numpy()
 61 | 
 62 |                 for i, fscore in enumerate(flip_score_map):
 63 |                     fscore = fscore.transpose((1,2,0))
 64 |                     fscore = cv2.flip(fscore, 1)
 65 |                     fscore = list(fscore.transpose((2,0,1)))
 66 |                     for (q, w) in cfg.symmetry:
 67 |                        fscore[q], fscore[w] = fscore[w], fscore[q] 
 68 |                     fscore = np.array(fscore)
 69 |                     score_map[i] += fscore
 70 |                     score_map[i] /= 2
 71 | 
 72 |             ids = meta['imgID'].numpy()
 73 |             det_scores = meta['det_scores']
 74 |             for b in range(inputs.size(0)):
 75 |                 details = meta['augmentation_details']
 76 |                 single_result_dict = {}
 77 |                 single_result = []
 78 |                 
 79 |                 single_map = score_map[b]
 80 |                 r0 = single_map.copy()
 81 |                 r0 /= 255
 82 |                 r0 += 0.5
 83 |                 v_score = np.zeros(17)
 84 |                 for p in range(17): 
 85 |                     single_map[p] /= np.amax(single_map[p])
 86 |                     border = 10
 87 |                     dr = np.zeros((cfg.output_shape[0] + 2*border, cfg.output_shape[1]+2*border))
 88 |                     dr[border:-border, border:-border] = single_map[p].copy()
 89 |                     dr = cv2.GaussianBlur(dr, (21, 21), 0)
 90 |                     lb = dr.argmax()
 91 |                     y, x = np.unravel_index(lb, dr.shape)
 92 |                     dr[y, x] = 0
 93 |                     lb = dr.argmax()
 94 |                     py, px = np.unravel_index(lb, dr.shape)
 95 |                     y -= border
 96 |                     x -= border
 97 |                     py -= border + y
 98 |                     px -= border + x
 99 |                     ln = (px ** 2 + py ** 2) ** 0.5
100 |                     delta = 0.25
101 |                     if ln > 1e-3:
102 |                         x += delta * px / ln
103 |                         y += delta * py / ln
104 |                     x = max(0, min(x, cfg.output_shape[1] - 1))
105 |                     y = max(0, min(y, cfg.output_shape[0] - 1))
106 |                     resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1])
107 |                     resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0])
108 |                     v_score[p] = float(r0[p, int(round(y)+1e-10), int(round(x)+1e-10)])                
109 |                     single_result.append(resx)
110 |                     single_result.append(resy)
111 |                     single_result.append(1)   
112 |                 if len(single_result) != 0:
113 |                     single_result_dict['image_id'] = int(ids[b])
114 |                     single_result_dict['category_id'] = 1
115 |                     single_result_dict['keypoints'] = single_result
116 |                     single_result_dict['score'] = float(det_scores[b])*v_score.mean()
117 |                     full_result.append(single_result_dict)
118 | 
119 |     result_path = args.result
120 |     if not isdir(result_path):
121 |         mkdir_p(result_path)
122 |     result_file = os.path.join(result_path, 'result.json')
123 |     with open(result_file,'w') as wf:
124 |         json.dump(full_result, wf)
125 | 
126 |     # evaluate on COCO
127 |     eval_gt = COCO(cfg.ori_gt_path)
128 |     eval_dt = eval_gt.loadRes(result_file)
129 |     cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints')
130 |     cocoEval.evaluate()
131 |     cocoEval.accumulate()
132 |     cocoEval.summarize()    
133 | 
134 | if __name__ == '__main__':
135 |     parser = argparse.ArgumentParser(description='PyTorch CPN Test')
136 |     parser.add_argument('-j', '--workers', default=12, type=int, metavar='N',
137 |                         help='number of data loading workers (default: 12)')
138 |     parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N',
139 |                         help='number of GPU to use (default: 1)')      
140 |     parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
141 |                         help='path to load checkpoint (default: checkpoint)')
142 |     parser.add_argument('-f', '--flip', default=True, type=bool,
143 |                         help='flip input image during test (default: True)')
144 |     parser.add_argument('-b', '--batch', default=128, type=int,
145 |                         help='test batch size (default: 128)')
146 |     parser.add_argument('-t', '--test', default='CPN256x192', type=str,
147 |                         help='using which checkpoint to be tested (default: CPN256x192')
148 |     parser.add_argument('-r', '--result', default='result', type=str,
149 |                         help='path to save save result (default: result)')
150 |     main(parser.parse_args())


--------------------------------------------------------------------------------
/H36M-Toolbox/transform.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import cv2
  3 | import numpy as np
  4 | import os.path as osp
  5 | import h5py
  6 | 
  7 | 
  8 | def _infer_box(pose3d, camera, rootIdx):
  9 |     root_joint = pose3d[rootIdx, :]
 10 |     tl_joint = root_joint.copy()
 11 |     tl_joint[0] -= 1000.0
 12 |     tl_joint[1] -= 900.0
 13 |     br_joint = root_joint.copy()
 14 |     br_joint[0] += 1000.0
 15 |     br_joint[1] += 1100.0
 16 |     tl_joint = np.reshape(tl_joint, (1, 3))
 17 |     br_joint = np.reshape(br_joint, (1, 3))
 18 | 
 19 |     tl2d = _weak_project(tl_joint, camera['fx'], camera['fy'], camera['cx'],
 20 |                          camera['cy']).flatten()
 21 | 
 22 |     br2d = _weak_project(br_joint, camera['fx'], camera['fy'], camera['cx'],
 23 |                          camera['cy']).flatten()
 24 |     return np.array([tl2d[0], tl2d[1], br2d[0], br2d[1]])
 25 | 
 26 | 
 27 | def _weak_project(pose3d, fx, fy, cx, cy):
 28 |     pose2d = pose3d[:, :2] / pose3d[:, 2:3]
 29 |     pose2d[:, 0] *= fx
 30 |     pose2d[:, 1] *= fy
 31 |     pose2d[:, 0] += cx
 32 |     pose2d[:, 1] += cy
 33 |     return pose2d
 34 | 
 35 | 
 36 | def get_3rd_point(a, b):
 37 |     direct = a - b
 38 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
 39 | 
 40 | 
 41 | def get_dir(src_point, rot_rad):
 42 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
 43 | 
 44 |     src_result = [0, 0]
 45 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
 46 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
 47 | 
 48 |     return src_result
 49 | 
 50 | 
 51 | def get_affine_transform(
 52 |         center, scale, rot, output_size,
 53 |         shift=np.array([0, 0], dtype=np.float32), inv=0
 54 | ):
 55 |     center = np.array(center)
 56 |     scale = np.array(scale)
 57 | 
 58 |     scale_tmp = scale * 200.0
 59 |     src_w = scale_tmp[0]
 60 |     dst_w = output_size[0]
 61 |     dst_h = output_size[1]
 62 | 
 63 |     # rot_rad = np.pi * rot / 180
 64 | 
 65 |     # src_dir = get_dir([0, (src_w-1) * -0.5], rot_rad)
 66 |     src_dir = np.array([0, (src_w-1) * -0.5], np.float32)
 67 |     dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
 68 |     src = np.zeros((3, 2), dtype=np.float32)
 69 |     dst = np.zeros((3, 2), dtype=np.float32)
 70 |     src[0, :] = center + scale_tmp * shift
 71 |     src[1, :] = center + src_dir + scale_tmp * shift
 72 |     dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
 73 |     dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
 74 | 
 75 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 76 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 77 | 
 78 |     if inv:
 79 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 80 |     else:
 81 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 82 | 
 83 |     return trans
 84 | 
 85 | 
 86 | def affine_transform(pt, t):
 87 |     new_pt = np.array([pt[0], pt[1], 1.]).T
 88 |     new_pt = np.dot(t, new_pt)
 89 |     return new_pt[:2]
 90 | 
 91 | 
 92 | def normalize_screen_coordinates(X, w, h): 
 93 |     assert X.shape[-1] == 2
 94 |     
 95 |     # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
 96 |     return X/w*2 - [1, h/w]
 97 | 
 98 | # train_data = pickle.load(file=open('./h36m_train_hr.pkl', 'rb'))
 99 | # img = train_data[0]
100 | # path = osp.join('images', img['image'])
101 | # c = img['center'] # [x, y] 从左上角开始计算
102 | # s = img['scale']
103 | # print(s)
104 | # pose2d = img['joints_2d_gt_crop']
105 | # print(pose2d)
106 | # print(c, s)
107 | # pose2d_1 = np.zeros_like(pose2d)
108 | # pose2d_1_inv = np.zeros_like(pose2d)
109 | # pose2d_2 = np.zeros_like(pose2d)
110 | # pose2d_2_inv = np.zeros_like(pose2d)
111 | # box = img['box']
112 | # left_top = np.array(pose2d[5])
113 | 
114 | # data_numpy = cv2.imread(
115 | #                 path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
116 | #             )
117 | 
118 | # h, w, _ = data_numpy.shape
119 | 
120 | # trans = get_affine_transform(c, s, 0, [192, 256])
121 | # trans_inv = get_affine_transform(c, s, 0, [192, 256], inv=1)
122 | # left_top = affine_transform(left_top, trans)
123 | # input = cv2.warpAffine(
124 | #             data_numpy,
125 | #             trans,
126 | #             ([192, 256]),
127 | #             flags=cv2.INTER_LINEAR)
128 | 
129 | # for i in range(17):
130 | #     pose2d_1[i] = affine_transform(pose2d[i], trans)
131 | #     pose2d_2[i] = pose2d[i] / np.array([192, 256]) * np.array([288, 384])
132 | #     cv2.circle(input, (int(pose2d[i,0]), int(pose2d[i,1])), 3, (0, 255, 0), -1)
133 | # print(pose2d_1)
134 | # print(pose2d_1/[4,4])
135 | # retval = cv2.imwrite("./demo/demo_00.jpg", input)
136 | 
137 | # trans = get_affine_transform(c, s, 0, [288, 384])
138 | # input = cv2.warpAffine(
139 | #             data_numpy,
140 | #             trans,
141 | #             ([288, 384]),
142 | #             flags=cv2.INTER_LINEAR)
143 | 
144 | # for i in range(17):
145 | #     pose2d_1[i] = affine_transform(pose2d[i], trans)
146 | #     print(pose2d_2[i])
147 | #     cv2.circle(input, (int(pose2d_2[i,0]), int(pose2d_2[i,1])), 3, (0, 255, 0), -1)
148 | # print(pose2d_1)
149 | # print(pose2d_1/[4,4])
150 | # retval = cv2.imwrite("./demo/demo_01.jpg", input)
151 | 
152 | # trans = get_affine_transform(c, s, 0, [192//4, 256//4])
153 | # input = cv2.warpAffine(
154 | #             data_numpy,
155 | #             trans,
156 | #             ([192//4, 256//4]),
157 | #             flags=cv2.INTER_LINEAR)
158 | # for i in range(17):
159 | #     pose2d_2[i] = affine_transform(pose2d[i], trans)
160 |     # cv2.circle(input, (int(pose2d_1[i,0]), int(pose2d_1[i,1])), 3, (255, 0, 0), -1)
161 | # print(pose2d_2)
162 | # print(pose2d_2-pose2d_1/[4,4])
163 | # retval = cv2.imwrite("./demo/demo_01.jpg", input)
164 | 
165 | 
166 | # cv2.circle(input, (int(left_top[0]), int(left_top[1])), 10, (255, 0, 0), -1)
167 | # retval = cv2.imwrite("./demo/demo_00.jpg", input)
168 | 
169 | # input = input[:, ::-1]
170 | # left_top = affine_transform(left_top, trans)
171 | # cv2.circle(input, (int(w-left_top[0]-1),int(left_top[1])), 10, (255, 0, 0), -1)
172 | # retval = cv2.imwrite(".demo/demo_new.jpg", input) # "/demo.jpg" 会保存到根目录
173 | 
174 | 
175 | 
176 | # joints_left = [4, 5, 6, 11, 12, 13] 
177 | # joints_right = [1, 2, 3, 14, 15, 16]
178 | 
179 | # save_dir = osp.join('h36m_256x192', 'S1')
180 | # action = 'act_{:02d}_subact_{:02d}'.format(2, 1)
181 | # file = h5py.File(osp.join(save_dir, action+".h5"), "r")
182 | 
183 | # image = file['data'][:][55, 0, 0]
184 | # pose2d = file['pose2d'][:][55, 0, 0]
185 | # # image_flip = file['data'][:][0, 0, 1]
186 | # image_flip = np.array(image[:, ::-1], copy=True)
187 | # # pose2d_flip = file['pose2d'][:][-1, 0, 1]
188 | # pose2d_flip = np.array(pose2d, copy=True)
189 | # pose2d_flip[:, 0] = image.shape[1] - pose2d_flip[:, 0] - 1
190 | # pose2d_flip[joints_left + joints_right] = pose2d_flip[joints_right + joints_left]
191 | # print(image.shape, pose2d.shape)
192 | # # print((image!=image_flip[:,::-1]).sum())
193 | 
194 | # for i in range(17):
195 | #     cv2.circle(image, (int(pose2d[i, 0]), int(pose2d[i, 1])), 2, (255, 0, 0), -1)
196 | #     cv2.circle(image_flip, (int(pose2d_flip[i, 0]), int(pose2d_flip[i, 1])), 2, (0, 0, 255), -1)
197 | 
198 | # img = cv2.resize(image, (192//4,256//4))
199 | # retval = cv2.imwrite("./demo/demo_2023_small.jpg", img)
200 | # retval = cv2.imwrite("./demo/demo_2023.jpg", image)
201 | # retval = cv2.imwrite("./demo/demo_2023_flip.jpg", image_flip)
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/common/cfg.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | from easydict import EasyDict as edict
  3 | import numpy as np
  4 | import os
  5 | 
  6 | config = edict()
  7 | 
  8 | config.title = "human36m_vol_softmax_single"
  9 | config.kind = "human36m"
 10 | config.azureroot = ""
 11 | config.logdir = "logs"
 12 | config.batch_output = False
 13 | config.vis_freq = 1000
 14 | config.vis_n_elements = 10
 15 | 
 16 | 
 17 | # model definition
 18 | config.model = edict()
 19 | config.model.name = "vol"
 20 | config.model.kind = "mpii"
 21 | config.model.image_shape = [384, 384]
 22 | config.model.heatmap_shape = [96, 96]
 23 | config.model.heatmap_softmax = True
 24 | config.model.heatmap_multiplier = 100.0
 25 | config.model.init_weights = True
 26 | config.model.checkpoint = None
 27 | 
 28 | config.model.backbone = edict()
 29 | config.model.backbone.name = "resnet152"
 30 | config.model.backbone.style = "simple"
 31 | config.model.backbone.num_final_layer_channel = 17
 32 | config.model.backbone.num_joints = 17
 33 | config.model.backbone.num_layers = 152
 34 | config.model.backbone.init_weights = True
 35 | config.model.backbone.fix_weights = True
 36 | # config.model.backbone.checkpoint = "dataset/pretrained/pose_hrnet_w32_256x192.pth"
 37 | config.model.backbone.checkpoint = "dataset/pretrained/pose_hrnet_w48_256x192.pth"
 38 | # config.model.backbone.checkpoint = "dataset/pretrained/CPN50_256x192.pth.tar"
 39 | 
 40 | config.model.backbone.NUM_JOINTS = 17
 41 | config.model.backbone.PRETRAINED_LAYERS = ['*']
 42 | config.model.backbone.STEM_INPLANES = 64
 43 | config.model.backbone.FINAL_CONV_KERNEL = 1
 44 | 
 45 | config.model.backbone.STAGE2 = edict()
 46 | config.model.backbone.STAGE2.NUM_MODULES = 1
 47 | config.model.backbone.STAGE2.NUM_BRANCHES = 2
 48 | config.model.backbone.STAGE2.NUM_BLOCKS = [4, 4]
 49 | config.model.backbone.STAGE2.NUM_CHANNELS = [48, 96]
 50 | config.model.backbone.STAGE2.BLOCK = 'BASIC'
 51 | config.model.backbone.STAGE2.FUSE_METHOD = 'SUM'
 52 | 
 53 | config.model.backbone.STAGE3 = edict()
 54 | config.model.backbone.STAGE3.NUM_MODULES = 4
 55 | config.model.backbone.STAGE3.NUM_BRANCHES = 3
 56 | config.model.backbone.STAGE3.NUM_BLOCKS = [4, 4, 4]
 57 | config.model.backbone.STAGE3.NUM_CHANNELS = [48, 96, 192]
 58 | config.model.backbone.STAGE3.BLOCK = 'BASIC'
 59 | config.model.backbone.STAGE3.FUSE_METHOD = 'SUM'
 60 | 
 61 | config.model.backbone.STAGE4 = edict()
 62 | config.model.backbone.STAGE4.NUM_MODULES = 3
 63 | config.model.backbone.STAGE4.NUM_BRANCHES = 4
 64 | config.model.backbone.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
 65 | config.model.backbone.STAGE4.NUM_CHANNELS = [48, 96, 192, 384]
 66 | config.model.backbone.STAGE4.BLOCK = 'BASIC'
 67 | config.model.backbone.STAGE4.FUSE_METHOD = 'SUM'
 68 | 
 69 | config.model.volume_net = edict()
 70 | config.model.volume_net.volume_aggregation_method = "softmax"
 71 | config.model.volume_net.use_gt_pelvis = False
 72 | config.model.volume_net.cuboid_size = 2500.0
 73 | config.model.volume_net.volume_size = 64
 74 | config.model.volume_net.volume_multiplier = 1.0
 75 | config.model.volume_net.volume_softmax = True
 76 | config.model.volume_net.use_feature_v2v = True
 77 | config.model.volume_net.att_channels = 51
 78 | config.model.volume_net.temperature = 1500
 79 | 
 80 | config.model.poseformer = edict()
 81 | config.model.poseformer.base_dim = 48
 82 | config.model.poseformer.embed_dim_ratio = 96
 83 | config.model.poseformer.depth = 4
 84 | config.model.poseformer.levels = 4
 85 | 
 86 | # loss related params
 87 | config.loss = edict()
 88 | config.loss.criterion = "MAE"
 89 | config.loss.mse_smooth_threshold = 0
 90 | config.loss.grad_clip = 0
 91 | config.loss.scale_keypoints_3d = 0.1
 92 | config.loss.use_volumetric_ce_loss = True
 93 | config.loss.volumetric_ce_loss_weight = 0.01
 94 | config.loss.use_global_attention_loss = True
 95 | config.loss.global_attention_loss_weight = 1000000
 96 | 
 97 | # dataset related params
 98 | config.dataset = edict()
 99 | config.dataset.kind = "human36m"
100 | config.dataset.data_format = ''
101 | config.dataset.transfer_cmu_to_human36m = False
102 | config.dataset.root = "../H36M-Toolbox/images/"
103 | config.dataset.extra_root = "data/human36m/extra"
104 | config.dataset.train_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy"
105 | config.dataset.val_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy"
106 | config.dataset.train_dataset = "multiview_human36m"
107 | config.dataset.val_dataset = "human36m"
108 | 
109 | # train related params
110 | config.train = edict()
111 | config.train.n_objects_per_epoch = 15000
112 | config.train.n_epochs = 9999
113 | config.train.n_iters_per_epoch = 5000
114 | config.train.batch_size = 3
115 | config.train.optimizer = 'Adam'
116 | config.train.backbone_lr = 0.0001
117 | config.train.backbone_lr_step = [1000]
118 | config.train.backbone_lr_factor = 0.1
119 | config.train.process_features_lr = 0.001
120 | config.train.volume_net_lr = 0.001
121 | config.train.volume_net_lr_decay = 0.99
122 | config.train.volume_net_lr_step = [1000]
123 | config.train.volume_net_lr_factor = 0.5
124 | config.train.with_damaged_actions = True
125 | config.train.undistort_images = True
126 | config.train.scale_bbox = 1.0
127 | config.train.ignore_cameras = []
128 | config.train.crop = True
129 | config.train.erase = False
130 | config.train.shuffle = True
131 | config.train.randomize_n_views = True
132 | config.train.min_n_views = 1
133 | config.train.max_n_views = 1
134 | config.train.num_workers = 8
135 | config.train.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5"
136 | config.train.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/train.pkl"
137 | 
138 | # val related params
139 | config.val = edict()
140 | config.val.flip_test = True
141 | config.val.batch_size = 6
142 | config.val.with_damaged_actions = True
143 | config.val.undistort_images = True
144 | config.val.scale_bbox = 1.0
145 | config.val.ignore_cameras = []
146 | config.val.crop = True
147 | config.val.erase = False
148 | config.val.shuffle = False
149 | config.val.randomize_n_views = True
150 | config.val.min_n_views = 1
151 | config.val.max_n_views = 1
152 | config.val.num_workers = 10
153 | config.val.retain_every_n_frames_in_test = 1
154 | config.val.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5"
155 | config.val.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/val.pkl"
156 | 
157 | def update_dict(v, cfg):
158 |     for kk, vv in v.items():
159 |         if kk in cfg:
160 |             if isinstance(vv, dict):
161 |                 update_dict(vv, cfg[kk])
162 |             else:
163 |                 cfg[kk] = vv
164 |         else:
165 |             raise ValueError("{} not exist in cfg.py".format(kk))
166 | 
167 | 
168 | def update_config(path):
169 |     exp_config = None
170 |     with open(path) as fin:
171 |         exp_config = edict(yaml.safe_load(fin))
172 |         update_dict(exp_config, config)
173 | 
174 | 
175 | def handle_azureroot(config_dict, azureroot):
176 |     for key in config_dict.keys():
177 |         if isinstance(config_dict[key], str):
178 |             if config_dict[key].startswith('data/'):
179 |                 config_dict[key] = os.path.join(azureroot, config_dict[key])
180 |         elif isinstance(config_dict[key], dict):
181 |             handle_azureroot(config_dict[key], azureroot)
182 | 
183 | 
184 | def update_dir(azureroot, logdir):
185 |     config.azureroot = azureroot
186 |     config.logdir = os.path.join(config.azureroot, logdir)
187 |     if config.model.checkpoint != None and not config.model.checkpoint.startswith('data/'):
188 |         config.model.checkpoint = os.path.join(config.azureroot, config.model.checkpoint)
189 |     handle_azureroot(config, config.azureroot)   
190 | 
191 |    


--------------------------------------------------------------------------------
/ContextPose/mvn/models/loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | 
  6 | 
  7 | def UNCERTAINTY(sigma_list, keypoints_pred, keypoints_gt):
  8 | 	loss = 0.0
  9 | 	diff = keypoints_pred - keypoints_gt 
 10 | 	for sigma in sigma_list:
 11 | 		loss_term = torch.mean(torch.norm(diff / (sigma + 1e-6), dim=len(keypoints_gt.shape)-1)) + 0.01 * torch.mean(torch.log(sigma + 1e-6))
 12 | 		loss += loss_term
 13 | 	return loss
 14 | 
 15 | 
 16 | class MPJPE(nn.Module):
 17 | 	def __init__(self):
 18 | 		super().__init__()
 19 | 
 20 | 	def forward(self, keypoints_pred, keypoints_gt):
 21 | 		assert keypoints_pred.shape == keypoints_gt.shape
 22 | 		return torch.mean(torch.norm(keypoints_pred - keypoints_gt, dim=len(keypoints_gt.shape)-1))
 23 | 
 24 | 
 25 | class P_MPJPE(nn.Module):
 26 | 	def __init__(self):
 27 | 		super().__init__()
 28 | 
 29 | 	def forward(self, keypoints_pred, keypoints_gt):
 30 | 		"""
 31 | 		Pose error: MPJPE after rigid alignment (scale, rotation, and translation),
 32 | 		often referred to as "Protocol #2" in many papers.
 33 | 		"""
 34 | 		assert keypoints_pred.shape == keypoints_gt.shape
 35 | 
 36 | 		muX = np.mean(keypoints_gt, axis=1, keepdims=True)
 37 | 		muY = np.mean(keypoints_pred, axis=1, keepdims=True)
 38 | 
 39 | 		X0 = keypoints_gt - muX
 40 | 		Y0 = keypoints_pred - muY
 41 | 
 42 | 		normX = np.sqrt(np.sum(X0**2, axis=(1, 2), keepdims=True))
 43 | 		normY = np.sqrt(np.sum(Y0**2, axis=(1, 2), keepdims=True))
 44 | 
 45 | 		X0 /= normX
 46 | 		Y0 /= normY
 47 | 
 48 | 		H = np.matmul(X0.transpose(0, 2, 1), Y0)
 49 | 		U, s, Vt = np.linalg.svd(H)
 50 | 		V = Vt.transpose(0, 2, 1)
 51 | 		R = np.matmul(V, U.transpose(0, 2, 1))
 52 | 
 53 | 		# Avoid improper rotations (reflections), i.e. rotations with det(R) = -1
 54 | 		sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1))
 55 | 		V[:, :, -1] *= sign_detR
 56 | 		s[:, -1] *= sign_detR.flatten()
 57 | 		R = np.matmul(V, U.transpose(0, 2, 1)) # Rotation
 58 | 
 59 | 		tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2)
 60 | 
 61 | 		a = tr * normX / normY # Scale
 62 | 		t = muX - a*np.matmul(muY, R) # Translation
 63 | 
 64 | 		# Perform rigid transformation on the input
 65 | 		keypoints_pred_aligned = a*np.matmul(keypoints_pred, R) + t
 66 | 
 67 | 		# Return MPJPE
 68 | 		return np.mean(np.linalg.norm(keypoints_pred_aligned - keypoints_gt, axis=len(keypoints_gt.shape)-1))
 69 | 
 70 | 
 71 | class N_MPJPE(nn.Module):
 72 | 	def __init__(self):
 73 | 		super().__init__()
 74 | 
 75 | 	def forward(self, keypoints_pred, keypoints_gt):
 76 | 		"""
 77 | 		Normalized MPJPE (scale only), adapted from:
 78 | 		https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py
 79 | 		"""
 80 | 		assert keypoints_pred.shape == keypoints_gt.shape
 81 | 
 82 | 		norm_keypoints_pred = torch.mean(torch.sum(keypoints_pred**2, dim=3, keepdim=True), dim=2, keepdim=True)
 83 | 		norm_keypoints_gt = torch.mean(torch.sum(keypoints_gt*keypoints_pred, dim=3, keepdim=True), dim=2, keepdim=True)
 84 | 		scale = norm_keypoints_gt / norm_keypoints_pred
 85 | 		return MPJPE()(scale * keypoints_pred, keypoints_gt)#[0]
 86 | 
 87 | class MPJVE(nn.Module):
 88 | 	def __init__(self):
 89 | 		super().__init__()
 90 | 
 91 | 	def forward(self, keypoints_pred, keypoints_gt):
 92 | # def mean_velocity_error(predicted, target):
 93 | 		"""
 94 | 		Mean per-joint velocity error (i.e. mean Euclidean distance of the 1st derivative)
 95 | 		"""
 96 | 		assert keypoints_pred.shape == keypoints_gt.shape
 97 | 
 98 | 		velocity_predicted = np.diff(keypoints_pred, axis=0)
 99 | 		velocity_target = np.diff(keypoints_gt, axis=0)
100 | 
101 | 		return np.mean(np.linalg.norm(velocity_predicted - velocity_target, axis=len(keypoints_gt.shape)-1))
102 | 
103 | 
104 | class KeypointsMSELoss(nn.Module):
105 | 	def __init__(self):
106 | 		super().__init__()
107 | 
108 | 	def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity):
109 | 		dimension = keypoints_pred.shape[-1]
110 | 		loss = torch.sum((keypoints_gt - keypoints_pred) ** 2 * keypoints_binary_validity)
111 | 		loss = loss / (dimension * max(1, torch.sum(keypoints_binary_validity).item()))
112 | 		return loss
113 | 
114 | 
115 | class KeypointsMSESmoothLoss(nn.Module):
116 | 	def __init__(self, threshold=400):
117 | 		super().__init__()
118 | 
119 | 		self.threshold = threshold
120 | 
121 | 	def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity):
122 | 		dimension = keypoints_pred.shape[-1]
123 | 		diff = (keypoints_gt - keypoints_pred) ** 2 * keypoints_binary_validity
124 | 		diff[diff > self.threshold] = torch.pow(diff[diff > self.threshold], 0.1) * (self.threshold ** 0.9)
125 | 		loss = torch.sum(diff) / (dimension * max(1, torch.sum(keypoints_binary_validity).item()))
126 | 		return loss
127 | 
128 | 
129 | class KeypointsMAELoss(nn.Module):
130 | 	def __init__(self):
131 | 		super().__init__()
132 | 
133 | 	def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity):
134 | 		dimension = keypoints_pred.shape[-1]
135 | 		loss = torch.sum(torch.abs(keypoints_gt - keypoints_pred) * keypoints_binary_validity)
136 | 		loss = loss / (dimension * max(1, torch.sum(keypoints_binary_validity).item()))
137 | 		return loss
138 | 
139 | 
140 | class KeypointsL2Loss(nn.Module):
141 | 	def __init__(self):
142 | 		super().__init__()
143 | 
144 | 	def forward(self, keypoints_pred, keypoints_gt, keypoints_binary_validity):
145 | 		loss = torch.sum(torch.sqrt(torch.sum((keypoints_gt - keypoints_pred) ** 2 * keypoints_binary_validity, dim=2)))
146 | 		loss = loss / max(1, torch.sum(keypoints_binary_validity).item())
147 | 		return loss
148 | 
149 | 
150 | class VolumetricCELoss(nn.Module):
151 | 	def __init__(self):
152 | 		super().__init__()
153 | 
154 | 	def forward(self, coord_volumes_batch, volumes_batch_pred, keypoints_gt, keypoints_binary_validity):
155 | 		loss = 0.0
156 | 		n_losses = 0
157 | 
158 | 		batch_size = volumes_batch_pred.shape[0]
159 | 		for batch_i in range(batch_size):
160 | 			coord_volume = coord_volumes_batch[batch_i]
161 | 			keypoints_gt_i = keypoints_gt[batch_i]
162 | 
163 | 			coord_volume_unsq = coord_volume.unsqueeze(0)
164 | 			keypoints_gt_i_unsq = keypoints_gt_i.unsqueeze(1).unsqueeze(1).unsqueeze(1)
165 | 
166 | 			dists = torch.sqrt(((coord_volume_unsq - keypoints_gt_i_unsq) ** 2).sum(-1))
167 | 			dists = dists.view(dists.shape[0], -1)
168 | 
169 | 			min_indexes = torch.argmin(dists, dim=-1).detach().cpu().numpy()
170 | 			min_indexes = np.stack(np.unravel_index(min_indexes, volumes_batch_pred.shape[-3:]), axis=1)
171 | 
172 | 			for joint_i, index in enumerate(min_indexes):
173 | 				validity = keypoints_binary_validity[batch_i, joint_i]
174 | 				loss += validity[0] * (-torch.log(volumes_batch_pred[batch_i, joint_i, index[0], index[1], index[2]] + 1e-6))
175 | 				n_losses += 1
176 | 
177 | 
178 | 		return loss / n_losses
179 | 
180 | 
181 | class LimbLengthError(nn.Module):
182 | 	""" Limb Length Loss: to let the """
183 | 	def __init__(self):
184 | 		super(LimbLengthError, self).__init__()
185 | 		self.CONNECTIVITY_DICT = [(0, 1), (1, 2), (2, 6), (5, 4), (4, 3), (3, 6), (6, 7), (7, 8), (8, 16), (9, 16), (8, 12), (11, 12), (10, 11), (8, 13), (13, 14), (14, 15)]
186 | 
187 | 	def forward(self, keypoints_3d_pred, keypoints_3d_gt):
188 | 		# (b, 17, 3)
189 | 
190 | 		error = 0
191 | 		for (joint0, joint1) in self.CONNECTIVITY_DICT:
192 | 			limb_pred = keypoints_3d_pred[:, joint0] - keypoints_3d_pred[:, joint1]
193 | 			limb_gt = keypoints_3d_gt[:, joint0] - keypoints_3d_gt[:, joint1]
194 | 			if isinstance(limb_pred, np.ndarray):
195 | 				limb_pred = torch.from_numpy(limb_pred)
196 | 				limb_gt = torch.from_numpy(limb_gt)
197 | 			limb_length_pred = torch.norm(limb_pred, dim = 1)
198 | 			limb_length_gt = torch.norm(limb_gt, dim = 1)
199 | 			error += torch.abs(limb_length_pred - limb_length_gt).mean().cpu()
200 | 
201 | 		return float(error)/len(self.CONNECTIVITY_DICT)
202 | 


--------------------------------------------------------------------------------
/ContextPose_mpi/common/load_data_3dhp_mae.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch.utils.data as data
  3 | import numpy as np
  4 | 
  5 | from common.utils import deterministic_random
  6 | from common.camera import world_to_camera, normalize_screen_coordinates
  7 | from common.generator_3dhp import ChunkedGenerator
  8 | 
  9 | class Fusion(data.Dataset):
 10 |     def __init__(self, opt, root_path, train=True, MAE=False):
 11 |         self.data_type = opt.dataset
 12 |         self.train = train
 13 |         self.keypoints_name = opt.keypoints
 14 |         self.root_path = root_path
 15 | 
 16 |         self.train_list = opt.subjects_train.split(',')
 17 |         self.test_list = opt.subjects_test.split(',')
 18 |         self.action_filter = None if opt.actions == '*' else opt.actions.split(',')
 19 |         self.downsample = opt.downsample
 20 |         self.subset = opt.subset
 21 |         self.stride = opt.stride
 22 |         self.crop_uv = opt.crop_uv
 23 |         self.test_aug = opt.test_augmentation
 24 |         self.pad = opt.pad
 25 |         self.MAE=MAE
 26 |         if self.train:
 27 |             self.poses_train, self.poses_train_2d, self.poses_train_2d_crop = self.prepare_data(opt.root_path, train=True)
 28 |             self.generator = ChunkedGenerator(opt.batchSize // opt.stride, None, self.poses_train,
 29 |                                               self.poses_train_2d, self.poses_train_2d_crop, None, chunk_length=self.stride, pad=self.pad,
 30 |                                               augment=False, reverse_aug=opt.reverse_augmentation,
 31 |                                               kps_left=self.kps_left, kps_right=self.kps_right,
 32 |                                               joints_left=self.joints_left,
 33 |                                               joints_right=self.joints_right, out_all=opt.out_all, MAE=MAE, train = True)
 34 |             print('INFO: Training on {} frames'.format(self.generator.num_frames()))
 35 |         else:
 36 |             self.poses_test, self.poses_test_2d, self.poses_test_2d_crop, self.valid_frame = self.prepare_data(opt.root_path, train=False)
 37 |             # self.cameras_test, self.poses_test, self.poses_test_2d = self.fetch(dataset, self.test_list,
 38 |             #                                                                     subset=self.subset)
 39 |             self.generator = ChunkedGenerator(opt.batchSize // opt.stride, None, self.poses_test,
 40 |                                               self.poses_test_2d, self.poses_test_2d_crop, self.valid_frame,
 41 |                                               pad=self.pad, augment=False, kps_left=self.kps_left,
 42 |                                               kps_right=self.kps_right, joints_left=self.joints_left,
 43 |                                               joints_right=self.joints_right, MAE=MAE, train = False)
 44 |             self.key_index = self.generator.saved_index
 45 |             print('INFO: Testing on {} frames'.format(self.generator.num_frames()))
 46 | 
 47 |     def prepare_data(self, path, train=True):
 48 |         out_poses_3d = {}
 49 |         out_poses_2d = {}
 50 |         out_poses_2d_crop = {}
 51 |         valid_frame = {}
 52 | 
 53 |         self.kps_left, self.kps_right = [5, 6, 7, 11, 12, 13], [2, 3, 4, 8, 9, 10]
 54 |         self.joints_left, self.joints_right = [5, 6, 7, 11, 12, 13], [2, 3, 4, 8, 9, 10]
 55 | 
 56 |         if train == True:
 57 |             data = np.load("dataset/data_train_3dhp.npz",allow_pickle=True)['data'].item()
 58 |             for seq in data.keys():
 59 |                 for cam in data[seq][0].keys():
 60 |                     anim = data[seq][0][cam]
 61 | 
 62 |                     subject_name, seq_name = seq.split(" ")
 63 | 
 64 |                     data_3d = anim['data_3d']
 65 |                     data_3d[:, :14] -= data_3d[:, 14:15]
 66 |                     data_3d[:, 15:] -= data_3d[:, 14:15]
 67 |                     out_poses_3d[(subject_name, seq_name, cam)] = data_3d
 68 | 
 69 |                     data_2d = anim['data_2d']
 70 |                     data_2d_crop = anim['data_2d_crop']
 71 | 
 72 |                     data_2d[..., :2] = normalize_screen_coordinates(data_2d[..., :2], w=2048, h=2048)
 73 |                     out_poses_2d[(subject_name, seq_name, cam)] = data_2d
 74 |                     out_poses_2d_crop[(subject_name, seq_name, cam)] = data_2d_crop
 75 | 
 76 |             return out_poses_3d, out_poses_2d, out_poses_2d_crop
 77 |         else:
 78 |             data = np.load("dataset/data_test_3dhp.npz", allow_pickle=True)['data'].item()
 79 |             for seq in data.keys():
 80 | 
 81 |                 anim = data[seq]
 82 | 
 83 |                 valid_frame[seq] = anim["valid"]
 84 | 
 85 |                 data_3d = anim['data_3d']
 86 |                 data_3d[:, :14] -= data_3d[:, 14:15]
 87 |                 data_3d[:, 15:] -= data_3d[:, 14:15]
 88 |                 out_poses_3d[seq] = data_3d
 89 | 
 90 |                 data_2d = anim['data_2d']
 91 |                 data_2d_crop = anim['data_2d_crop']
 92 | 
 93 |                 if seq == "TS5" or seq == "TS6":
 94 |                     width = 1920
 95 |                     height = 1080
 96 |                 else:
 97 |                     width = 2048
 98 |                     height = 2048
 99 |                 data_2d[..., :2] = normalize_screen_coordinates(data_2d[..., :2], w=width, h=height)
100 |                 out_poses_2d[seq] = data_2d
101 |                 out_poses_2d_crop[seq] = data_2d_crop
102 | 
103 |             return out_poses_3d, out_poses_2d, out_poses_2d_crop, valid_frame
104 | 
105 |     def __len__(self):
106 |         return len(self.generator.pairs)
107 |         #return 200
108 | 
109 |     def __getitem__(self, index):
110 |         seq_name, start_3d, end_3d, flip, reverse = self.generator.pairs[index]
111 |         # ['S7' 'Seq2' '6'] 10782 10783 False False
112 | 
113 |         if self.MAE:
114 |             pass
115 |             # cam, input_2D, seq, subject, cam_ind = self.generator.get_batch(seq_name, start_3d, end_3d, flip,
116 |             #                                                                           reverse)
117 |             # if self.train == False and self.test_aug:
118 |             #     _, input_2D_aug, _, _,_ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse)
119 |             #     input_2D = np.concatenate((np.expand_dims(input_2D,axis=0),np.expand_dims(input_2D_aug,axis=0)),0)
120 |         else:
121 |             cam, gt_3D, input_2D, input_2D_crop, img, seq, subject, cam_ind = self.generator.get_batch(seq_name, start_3d, end_3d, flip, reverse)
122 | 
123 |             # if self.train == False and self.test_aug:
124 |             #     _, _, input_2D_aug, _, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse)
125 |             #     _, _, _, input_2D_crop_aug, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse)
126 |             #     _, _, _, _, img_aug, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=True, reverse=reverse)
127 |             #     input_2D = np.concatenate((np.expand_dims(input_2D,axis=0),np.expand_dims(input_2D_aug,axis=0)),0)
128 |             #     input_2D_crop = np.concatenate((np.expand_dims(input_2D_crop,axis=0),np.expand_dims(input_2D_crop_aug,axis=0)),0)
129 |             #     img = np.concatenate((np.expand_dims(img,axis=0),np.expand_dims(img,axis=0)),0)
130 |             
131 |         bb_box = np.array([0, 0, 1, 1])
132 | 
133 |         scale = float(1.0)
134 | 
135 |         if self.MAE:
136 |             if self.train == True:
137 |                 return cam, input_2D_update, seq, subject, scale, bb_box, cam_ind
138 |             else:
139 |                 return cam, input_2D_update, seq, scale, bb_box
140 |         else:
141 |             if self.train == True:
142 |                 return cam, gt_3D, input_2D, input_2D_crop, img, seq, subject, scale, bb_box, cam_ind
143 |             else:
144 |                 return cam, gt_3D, input_2D, input_2D_crop, img, seq, scale, bb_box
145 | 
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/ContextPose/mvn/utils/cfg.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | from easydict import EasyDict as edict
  3 | import os
  4 | 
  5 | config = edict()
  6 | 
  7 | config.title = "human36m_vol_softmax_single"
  8 | config.kind = "human36m"
  9 | config.azureroot = ""
 10 | config.logdir = "logs"
 11 | config.batch_output = False
 12 | config.vis_freq = 1000
 13 | config.vis_n_elements = 10
 14 | config.id = 600
 15 | config.frame = 1
 16 | 
 17 | # model definition
 18 | config.model = edict()
 19 | config.model.image_shape = [192, 256]
 20 | config.model.init_weights = True
 21 | config.model.checkpoint = None
 22 | 
 23 | config.model.backbone = edict()
 24 | config.model.backbone.type = 'hrnet_32'
 25 | config.model.backbone.num_final_layer_channel = 17
 26 | config.model.backbone.num_joints = 17
 27 | config.model.backbone.num_layers = 152
 28 | config.model.backbone.init_weights = True
 29 | config.model.backbone.fix_weights = False
 30 | config.model.backbone.checkpoint = "data/pretrained/human36m/pose_hrnet_w32_256x192.pth"
 31 | 
 32 | # pose_hrnet related params
 33 | # config.model.backbone = edict()
 34 | config.model.backbone.NUM_JOINTS = 17
 35 | config.model.backbone.PRETRAINED_LAYERS = ['*']
 36 | config.model.backbone.STEM_INPLANES = 64
 37 | config.model.backbone.FINAL_CONV_KERNEL = 1
 38 | 
 39 | config.model.backbone.STAGE2 = edict()
 40 | config.model.backbone.STAGE2.NUM_MODULES = 1
 41 | config.model.backbone.STAGE2.NUM_BRANCHES = 2
 42 | config.model.backbone.STAGE2.NUM_BLOCKS = [4, 4]
 43 | config.model.backbone.STAGE2.NUM_CHANNELS = [32, 64]
 44 | # config.model.backbone.STAGE2.NUM_CHANNELS = [48, 96]
 45 | config.model.backbone.STAGE2.BLOCK = 'BASIC'
 46 | config.model.backbone.STAGE2.FUSE_METHOD = 'SUM'
 47 | 
 48 | config.model.backbone.STAGE3 = edict()
 49 | # config.model.backbone.STAGE3.NUM_MODULES = 1
 50 | config.model.backbone.STAGE3.NUM_MODULES = 4
 51 | config.model.backbone.STAGE3.NUM_BRANCHES = 3
 52 | config.model.backbone.STAGE3.NUM_BLOCKS = [4, 4, 4]
 53 | config.model.backbone.STAGE3.NUM_CHANNELS = [32, 64, 128]
 54 | # config.model.backbone.STAGE3.NUM_CHANNELS = [48, 96, 192]
 55 | config.model.backbone.STAGE3.BLOCK = 'BASIC'
 56 | config.model.backbone.STAGE3.FUSE_METHOD = 'SUM'
 57 | 
 58 | config.model.backbone.STAGE4 = edict()
 59 | # config.model.backbone.STAGE4.NUM_MODULES = 1
 60 | config.model.backbone.STAGE4.NUM_MODULES = 3
 61 | config.model.backbone.STAGE4.NUM_BRANCHES = 4
 62 | config.model.backbone.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
 63 | config.model.backbone.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
 64 | # config.model.backbone.STAGE4.NUM_CHANNELS = [48, 96, 192, 384]
 65 | config.model.backbone.STAGE4.BLOCK = 'BASIC'
 66 | config.model.backbone.STAGE4.FUSE_METHOD = 'SUM'
 67 | 
 68 | # pose_resnet related params
 69 | config.model.backbone.NUM_LAYERS = 50
 70 | config.model.backbone.DECONV_WITH_BIAS = False
 71 | config.model.backbone.NUM_DECONV_LAYERS = 3
 72 | config.model.backbone.NUM_DECONV_FILTERS = [256, 256, 256]
 73 | config.model.backbone.NUM_DECONV_KERNELS = [4, 4, 4]
 74 | config.model.backbone.FINAL_CONV_KERNEL = 1
 75 | config.model.backbone.PRETRAINED_LAYERS = ['*']
 76 | 
 77 | config.model.volume_net = edict()
 78 | config.model.volume_net.volume_aggregation_method = "softmax"
 79 | config.model.volume_net.use_gt_pelvis = False
 80 | config.model.volume_net.cuboid_size = 2500.0
 81 | config.model.volume_net.volume_size = 64
 82 | config.model.volume_net.volume_multiplier = 1.0
 83 | config.model.volume_net.volume_softmax = True
 84 | config.model.volume_net.use_feature_v2v = True
 85 | config.model.volume_net.att_channels = 51
 86 | config.model.volume_net.temperature = 1500
 87 | 
 88 | config.model.poseformer = edict()
 89 | config.model.poseformer.base_dim = 32
 90 | config.model.poseformer.embed_dim_ratio = 128
 91 | config.model.poseformer.depth = 4
 92 | config.model.poseformer.levels = 4
 93 | 
 94 | # loss related params
 95 | config.loss = edict()
 96 | config.loss.criterion = "MAE"
 97 | config.loss.mse_smooth_threshold = 0
 98 | config.loss.grad_clip = 0
 99 | config.loss.scale_keypoints_3d = 0.1
100 | config.loss.use_volumetric_ce_loss = True
101 | config.loss.volumetric_ce_loss_weight = 0.01
102 | config.loss.use_global_attention_loss = True
103 | config.loss.global_attention_loss_weight = 1000000
104 | 
105 | # dataset related params
106 | config.dataset = edict()
107 | config.dataset.kind = "human36m"
108 | config.dataset.data_format = ''
109 | config.dataset.transfer_cmu_to_human36m = False
110 | config.dataset.root = "../H36M-Toolbox/images/"
111 | config.dataset.extra_root = "data/human36m/extra"
112 | config.dataset.train_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy"
113 | config.dataset.val_labels_path = "data/human36m/extra/human36m-multiview-labels-GTbboxes.npy"
114 | config.dataset.train_dataset = "multiview_human36m"
115 | config.dataset.val_dataset = "human36m"
116 | 
117 | # train related params
118 | config.train = edict()
119 | config.train.n_objects_per_epoch = 15000
120 | config.train.n_epochs = 9999
121 | config.train.n_iters_per_epoch = 5000
122 | config.train.batch_size = 3
123 | config.train.optimizer = 'Adam'
124 | config.train.backbone_lr = 0.0001
125 | config.train.backbone_lr_step = [1000]
126 | config.train.backbone_lr_factor = 0.1
127 | config.train.process_features_lr = 0.001
128 | config.train.volume_net_lr = 0.001
129 | config.train.volume_net_lr_decay = 0.99
130 | config.train.volume_net_lr_step = [1000]
131 | config.train.volume_net_lr_factor = 0.5
132 | config.train.with_damaged_actions = True
133 | config.train.undistort_images = True
134 | config.train.scale_bbox = 1.0
135 | config.train.ignore_cameras = []
136 | config.train.crop = True
137 | config.train.erase = False
138 | config.train.shuffle = True
139 | config.train.randomize_n_views = True
140 | config.train.min_n_views = 1
141 | config.train.max_n_views = 1
142 | config.train.num_workers = 8
143 | config.train.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5"
144 | config.train.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/train.pkl"
145 | 
146 | # val related params
147 | config.val = edict()
148 | config.val.flip_test = True
149 | config.val.batch_size = 6
150 | config.val.with_damaged_actions = True
151 | config.val.undistort_images = True
152 | config.val.scale_bbox = 1.0
153 | config.val.ignore_cameras = []
154 | config.val.crop = True
155 | config.val.erase = False
156 | config.val.shuffle = False
157 | config.val.randomize_n_views = True
158 | config.val.min_n_views = 1
159 | config.val.max_n_views = 1
160 | config.val.num_workers = 10
161 | config.val.retain_every_n_frames_in_test = 1
162 | config.val.limb_length_path = "data/human36m/extra/mean_and_std_limb_length.h5"
163 | config.val.pred_results_path = "data/pretrained/human36m/human36m_alg_10-04-2019/checkpoints/0060/results/val.pkl"
164 | 
165 | 
166 | def update_dict(v, cfg):
167 |     for kk, vv in v.items():
168 |         if kk in cfg:
169 |             if isinstance(vv, dict):
170 |                 update_dict(vv, cfg[kk])
171 |             else:
172 |                 cfg[kk] = vv
173 |         else:
174 |             raise ValueError("{} not exist in cfg.py".format(kk))
175 | 
176 | 
177 | def update_config(path):
178 |     exp_config = None
179 |     with open(path) as fin:
180 |         exp_config = edict(yaml.safe_load(fin))
181 |         update_dict(exp_config, config)
182 | 
183 | 
184 | def handle_azureroot(config_dict, azureroot):
185 |     for key in config_dict.keys():
186 |         if isinstance(config_dict[key], str):
187 |             if config_dict[key].startswith('data/'):
188 |                 config_dict[key] = os.path.join(azureroot, config_dict[key])
189 |         elif isinstance(config_dict[key], dict):
190 |             handle_azureroot(config_dict[key], azureroot)
191 | 
192 | 
193 | def update_dir(azureroot, logdir):
194 |     config.azureroot = azureroot
195 |     config.logdir = os.path.join(config.azureroot, logdir)
196 |     if config.model.checkpoint != None and not config.model.checkpoint.startswith('data/'):
197 |         config.model.checkpoint = os.path.join(config.azureroot, config.model.checkpoint)
198 |     handle_azureroot(config, config.azureroot)   
199 | 
200 |    


--------------------------------------------------------------------------------
/ContextPose/mvn/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from mvn.utils.img import image_batch_to_torch
  5 | 
  6 | import os
  7 | import zipfile
  8 | import cv2
  9 | import random
 10 | 
 11 | 
 12 | joints_left = [4, 5, 6, 11, 12, 13] 
 13 | joints_right = [1, 2, 3, 14, 15, 16]
 14 | 
 15 | class data_prefetcher():
 16 |     def __init__(self, loader, device, is_train, flip_test, backbone):
 17 |         self.loader = iter(loader)
 18 |         self.stream = torch.cuda.Stream()
 19 |         self.device = device
 20 |         self.is_train = is_train
 21 |         self.flip_test = flip_test
 22 |         self.backbone = backbone
 23 | 
 24 |         if backbone in ['hrnet_32', 'hrnet_48']:
 25 |             self.mean = torch.tensor([0.485, 0.456, 0.406]).cuda().to(device)
 26 |             self.std = torch.tensor([0.229, 0.224, 0.225]).cuda().to(device)
 27 |         elif backbone == 'cpn':
 28 |             self.mean = torch.tensor([122.7717, 115.9465, 102.9801]).cuda().to(device).view(1, 1, 1, 3)
 29 |             self.mean /= 255.
 30 | 
 31 |         self.preload()
 32 | 
 33 |     def preload(self):
 34 |         try:
 35 |             self.next_batch = next(self.loader)
 36 |         except StopIteration:
 37 |             self.next_batch = None
 38 |             return
 39 |         with torch.cuda.stream(self.stream):
 40 |             for i in range(len(self.next_batch)):
 41 |                 self.next_batch[i] = self.next_batch[i].cuda(non_blocking=True).to(self.device)
 42 | 
 43 |             images_batch, keypoints_3d_gt, keypoints_2d_batch_cpn, keypoints_2d_batch_cpn_crop = self.next_batch
 44 | 
 45 |             images_batch = torch.flip(images_batch, [-1])
 46 | 
 47 |             if self.backbone in ['hrnet_32', 'hrnet_48']:
 48 |                 images_batch = (images_batch / 255.0 - self.mean) / self.std
 49 |             elif self.backbone == 'cpn':
 50 |                 images_batch = images_batch / 255.0 - self.mean  # for CPN
 51 |                 
 52 |             keypoints_3d_gt[:, :, 1:] -= keypoints_3d_gt[:, :, :1]
 53 |             keypoints_3d_gt[:, :, 0] = 0
 54 | 
 55 |             if random.random() <= 0.5 and self.is_train:
 56 |                 images_batch = torch.flip(images_batch, [-2])
 57 | 
 58 |                 keypoints_2d_batch_cpn[..., 0] *= -1
 59 |                 keypoints_2d_batch_cpn[..., joints_left + joints_right, :] = keypoints_2d_batch_cpn[..., joints_right + joints_left, :]
 60 | 
 61 |                 keypoints_2d_batch_cpn_crop[:, :, 0] = 192 - keypoints_2d_batch_cpn_crop[:, :, 0] - 1
 62 |                 keypoints_2d_batch_cpn_crop[:, joints_left + joints_right] = keypoints_2d_batch_cpn_crop[:, joints_right + joints_left]
 63 | 
 64 |                 keypoints_3d_gt[:, :, :, 0] *= -1
 65 |                 keypoints_3d_gt[:, :, joints_left + joints_right] = keypoints_3d_gt[:, :, joints_right + joints_left]
 66 | 
 67 |             if (not self.is_train) and self.flip_test:
 68 |                 images_batch = torch.stack([images_batch, torch.flip(images_batch,[2])], dim=1)
 69 | 
 70 |                 keypoints_2d_batch_cpn_flip = keypoints_2d_batch_cpn.clone()
 71 |                 keypoints_2d_batch_cpn_flip[..., 0] *= -1
 72 |                 keypoints_2d_batch_cpn_flip[..., joints_left + joints_right, :] = keypoints_2d_batch_cpn_flip[..., joints_right + joints_left, :]
 73 |                 keypoints_2d_batch_cpn = torch.stack([keypoints_2d_batch_cpn, keypoints_2d_batch_cpn_flip], dim=1)
 74 | 
 75 |                 keypoints_2d_batch_cpn_crop_flip = keypoints_2d_batch_cpn_crop.clone()
 76 |                 keypoints_2d_batch_cpn_crop_flip[:, :, 0] = 192 - keypoints_2d_batch_cpn_crop_flip[:, :, 0] - 1
 77 |                 keypoints_2d_batch_cpn_crop_flip[:, joints_left + joints_right] = keypoints_2d_batch_cpn_crop_flip[:, joints_right + joints_left]
 78 |                 keypoints_2d_batch_cpn_crop = torch.stack([keypoints_2d_batch_cpn_crop, keypoints_2d_batch_cpn_crop_flip], dim=1)
 79 | 
 80 |                 del keypoints_2d_batch_cpn_flip, keypoints_2d_batch_cpn_crop_flip
 81 | 
 82 |             self.next_batch = [images_batch.float(), keypoints_3d_gt.float(), keypoints_2d_batch_cpn.float(), keypoints_2d_batch_cpn_crop.float()]
 83 | 
 84 | 
 85 |     def next(self):
 86 |         torch.cuda.current_stream().wait_stream(self.stream)
 87 |         batch = self.next_batch
 88 |         self.preload()
 89 |         return batch
 90 | 
 91 | 
 92 | def make_collate_fn(randomize_n_views=True, min_n_views=10, max_n_views=31):
 93 | 
 94 |     def collate_fn(items):
 95 |         items = list(filter(lambda x: x is not None, items))
 96 |         if len(items) == 0:
 97 |             print("All items in batch are None")
 98 |             return None
 99 | 
100 |         batch = dict()
101 |         total_n_views = min(len(item['images']) for item in items)
102 | 
103 |         indexes = np.arange(total_n_views)
104 |         if randomize_n_views:
105 |             n_views = np.random.randint(min_n_views, min(total_n_views, max_n_views) + 1)
106 |             indexes = np.random.choice(np.arange(total_n_views), size=n_views, replace=False)
107 |         else:
108 |             indexes = np.arange(total_n_views)
109 | 
110 |         batch['images'] = np.stack([np.stack([item['images'][i] for item in items], axis=0) for i in indexes], axis=0).swapaxes(0, 1)
111 |         # batch['detections'] = np.array([[item['detections'][i] for item in items] for i in indexes]).swapaxes(0, 1)
112 |         # batch['cameras'] = [[item['cameras'][i] for item in items] for i in indexes]
113 | 
114 |         batch['keypoints_3d'] = [item['keypoints_3d'] for item in items]
115 |         batch['keypoints_2d_cpn'] = [item['keypoints_2d_cpn'] for item in items]
116 |         batch['keypoints_2d_cpn_crop'] = [item['keypoints_2d_cpn_crop'] for item in items]
117 |         # batch['cuboids'] = [item['cuboids'] for item in items]
118 |         batch['indexes'] = [item['indexes'] for item in items]
119 |         batch['subject'] = [item['subject'] for item in items]
120 | 
121 |         try:
122 |             batch['pred_keypoints_3d'] = np.array([item['pred_keypoints_3d'] for item in items])
123 |         except:
124 |             pass
125 | 
126 |         return batch
127 | 
128 |     return collate_fn
129 | 
130 | 
131 | def worker_init_fn(worker_id):
132 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
133 | 
134 | 
135 | def prepare_batch(batch, device, config):
136 |     # images
137 |     images_batch = []
138 |     for image_batch in batch['images']:
139 |         image_batch = image_batch_to_torch(image_batch)
140 |         image_batch = image_batch.to(device)
141 |         images_batch.append(image_batch)
142 | 
143 |     images_batch = torch.stack(images_batch, dim=0)
144 | 
145 |     # 3D keypoints
146 |     keypoints_3d_batch_gt = torch.from_numpy(np.stack(batch['keypoints_3d'], axis=0)[:, :, :3]).float().to(device)      # (b, n_joints, 3)
147 | 
148 |     # 2D keypoints
149 |     keypoints_2d_batch_cpn = torch.from_numpy(np.stack(batch['keypoints_2d_cpn'], axis=0)[:, :, :2]).float().to(device)      # (b, n_joints, 3)
150 |     keypoints_2d_batch_cpn_crop = torch.from_numpy(np.stack(batch['keypoints_2d_cpn_crop'], axis=0)[:, :, :2]).float().to(device)      # (b, n_joints, 3)
151 | 
152 |     return images_batch, keypoints_3d_batch_gt, keypoints_2d_batch_cpn, keypoints_2d_batch_cpn_crop
153 | 
154 | _im_zfile = []
155 | 
156 | 
157 | def zipreader_imread(filename, flags=cv2.IMREAD_COLOR):
158 |     global _im_zfile
159 |     path = filename
160 |     pos_at = path.index('@')
161 |     if pos_at == -1:
162 |         print("character '@' is not found from the given path '%s'" % (path))
163 |         assert 0
164 |     path_zip = path[0:pos_at]
165 |     if not os.path.isfile(path_zip):
166 |         print("zip file '%s' is not found" % (path_zip))
167 |         assert 0
168 |     for i in range(len(_im_zfile)):
169 |         if _im_zfile[i]['path'] == path_zip:
170 |             path_img = os.path.join(_im_zfile[i]['zipfile'].namelist()[0], path[pos_at+2:])
171 |             data = _im_zfile[i]['zipfile'].read(path_img)
172 |             return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
173 | 
174 |     _im_zfile.append({
175 |         'path': path_zip,
176 |         'zipfile': zipfile.ZipFile(path_zip, 'r')
177 |     })
178 |     path_img = os.path.join(_im_zfile[-1]['zipfile'].namelist()[0], path[pos_at+2:])
179 |     data = _im_zfile[-1]['zipfile'].read(path_img)
180 | 
181 |     return cv2.imdecode(np.frombuffer(data, np.uint8), flags)


--------------------------------------------------------------------------------
/H36M-Toolbox/common/arguments.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | 
  8 | import argparse
  9 | 
 10 | def parse_args():
 11 |     parser = argparse.ArgumentParser(description='Training script')
 12 | 
 13 |     # General arguments
 14 |     parser.add_argument('-d', '--dataset', default='h36m', type=str, metavar='NAME', help='target dataset') # h36m or humaneva
 15 |     parser.add_argument('-k', '--keypoints', default='cpn_ft_h36m_dbb', type=str, metavar='NAME', help='2D detections to use')
 16 |     parser.add_argument('-str', '--subjects-train', default='S1,S5,S6,S7,S8', type=str, metavar='LIST',
 17 |                         help='training subjects separated by comma')
 18 |     parser.add_argument('-ste', '--subjects-test', default='S9,S11', type=str, metavar='LIST', help='test subjects separated by comma')
 19 |     parser.add_argument('-sun', '--subjects-unlabeled', default='', type=str, metavar='LIST',
 20 |                         help='unlabeled subjects separated by comma for self-supervision')
 21 |     parser.add_argument('-a', '--actions', default='*', type=str, metavar='LIST',
 22 |                         help='actions to train/test on, separated by comma, or * for all')
 23 |     parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
 24 |                         help='checkpoint directory')
 25 |     parser.add_argument('--checkpoint-frequency', default=40, type=int, metavar='N',
 26 |                         help='create a checkpoint every N epochs')
 27 |     parser.add_argument('-r', '--resume', default='', type=str, metavar='FILENAME',
 28 |                         help='checkpoint to resume (file name)')
 29 |     parser.add_argument('--evaluate', default='', type=str, metavar='FILENAME', help='checkpoint to evaluate (file name)')
 30 |     parser.add_argument('--render', action='store_true', help='visualize a particular video')
 31 |     parser.add_argument('--by-subject', action='store_true', help='break down error by subject (on evaluation)')
 32 |     parser.add_argument('--export-training-curves', action='store_true', help='save training curves as .png images')
 33 |     parser.add_argument('-g', '--gpu', type=list, help='set gpu number')
 34 |     parser.add_argument('--local_rank', type=int, default=0, help='node rank for distributed training')
 35 |     parser.add_argument('--from-scratch', type=int, default=0, help='choose to train from scratch or not')
 36 |     parser.add_argument('--center-pose', type=int, default=0, help='choose fine-tuning task as 3d pose estimation')
 37 | 
 38 |     # Model arguments
 39 |     parser.add_argument('-s', '--stride', default=1, type=int, metavar='N', help='chunk size to use during training')
 40 |     parser.add_argument('-e', '--epochs', default=200, type=int, metavar='N', help='number of training epochs')
 41 |     parser.add_argument('-b', '--batch-size', default=1024, type=int, metavar='N', help='batch size in terms of predicted frames')
 42 |     parser.add_argument('-drop', '--dropout', default=0., type=float, metavar='P', help='dropout probability')
 43 |     parser.add_argument('-lr', '--learning-rate', default=0.0001, type=float, metavar='LR', help='initial learning rate')
 44 |     parser.add_argument('-lrd', '--lr-decay', default=0.99, type=float, metavar='LR', help='learning rate decay per epoch')
 45 |     parser.add_argument('-no-da', '--no-data-augmentation', dest='data_augmentation', action='store_false',
 46 |                         help='disable train-time flipping')
 47 |     # parser.add_argument('-no-tta', '--no-test-time-augmentation', dest='test_time_augmentation', action='store_false',
 48 |     #                     help='disable test-time flipping')
 49 |     # parser.add_argument('-arc', '--architecture', default='3,3,3', type=str, metavar='LAYERS', help='filter widths separated by comma')
 50 |     parser.add_argument('-frame', '--number-of-frames', default='81', type=int, metavar='N',
 51 |                         help='how many frames used as input')
 52 |     parser.add_argument('-frame-kept', '--number-of-kept-frames', default='27', type=int, metavar='N',
 53 |                         help='how many frames are kept')
 54 |     parser.add_argument('-coeff-kept', '--number-of-kept-coeffs', type=int, metavar='N', help='how many coefficients are kept')
 55 |     # parser.add_argument('--causal', action='store_true', help='use causal convolutions for real-time processing')
 56 |     # parser.add_argument('-ch', '--channels', default=1024, type=int, metavar='N', help='number of channels in convolution layers')
 57 |     parser.add_argument('--depth', default=4, type=int, metavar='N', help='number of transformer blocks')  
 58 |     parser.add_argument('--embed-dim-ratio', default=32, type=int, metavar='N', help='dimension of embedding ratio') 
 59 |     parser.add_argument('-kd', type=int, default=0, help='choose to use knowledge distillation or not')
 60 |     parser.add_argument('-alpha', type=float, default=1.0, help='the weight for distillation loss')
 61 |     parser.add_argument('-std', type=float, default=0.0, help='the standard deviation for gaussian noise')
 62 | 
 63 |     # Experimental
 64 |     parser.add_argument('--subset', default=1, type=float, metavar='FRACTION', help='reduce dataset size by fraction')
 65 |     parser.add_argument('--downsample', default=1, type=int, metavar='FACTOR', help='downsample frame rate by factor (semi-supervised)')
 66 |     parser.add_argument('--warmup', default=1, type=int, metavar='N', help='warm-up epochs for semi-supervision')
 67 |     parser.add_argument('--no-eval', action='store_true', help='disable epoch evaluation while training (small speed-up)')
 68 |     parser.add_argument('--dense', action='store_true', help='use dense convolutions instead of dilated convolutions')
 69 |     parser.add_argument('--disable-optimizations', action='store_true', help='disable optimized model for single-frame predictions')
 70 |     parser.add_argument('--linear-projection', action='store_true', help='use only linear coefficients for semi-supervised projection')
 71 |     parser.add_argument('--no-bone-length', action='store_false', dest='bone_length_term',
 72 |                         help='disable bone length term in semi-supervised settings')
 73 |     parser.add_argument('--no-proj', action='store_true', help='disable projection for semi-supervised setting')
 74 |     
 75 |     # Visualization
 76 |     parser.add_argument('--viz-subject', type=str, metavar='STR', help='subject to render')
 77 |     parser.add_argument('--viz-action', type=str, metavar='STR', help='action to render')
 78 |     parser.add_argument('--viz-camera', type=int, default=0, metavar='N', help='camera to render')
 79 |     parser.add_argument('--viz-video', type=str, metavar='PATH', help='path to input video')
 80 |     parser.add_argument('--viz-skip', type=int, default=0, metavar='N', help='skip first N frames of input video')
 81 |     parser.add_argument('--viz-output', type=str, metavar='PATH', help='output file name (.gif or .mp4)')
 82 |     parser.add_argument('--viz-export', type=str, metavar='PATH', help='output file name for coordinates')
 83 |     parser.add_argument('--viz-bitrate', type=int, default=3000, metavar='N', help='bitrate for mp4 videos')
 84 |     parser.add_argument('--viz-no-ground-truth', action='store_true', help='do not show ground-truth poses')
 85 |     parser.add_argument('--viz-limit', type=int, default=-1, metavar='N', help='only render first N frames')
 86 |     parser.add_argument('--viz-downsample', type=int, default=1, metavar='N', help='downsample FPS by a factor N')
 87 |     parser.add_argument('--viz-size', type=int, default=5, metavar='N', help='image size')
 88 |     
 89 |     parser.set_defaults(bone_length_term=True)
 90 |     parser.set_defaults(data_augmentation=True)
 91 |     parser.set_defaults(test_time_augmentation=True)
 92 |     # parser.set_defaults(test_time_augmentation=False)
 93 | 
 94 |     args = parser.parse_args()
 95 |     # Check invalid configuration
 96 |     if args.resume and args.evaluate:
 97 |         print('Invalid flags: --resume and --evaluate cannot be set at the same time')
 98 |         exit()
 99 |         
100 |     if args.export_training_curves and args.no_eval:
101 |         print('Invalid flags: --export-training-curves and --no-eval cannot be set at the same time')
102 |         exit()
103 | 
104 |     return args


--------------------------------------------------------------------------------
/ContextPose_mpi/common/generator_tds.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class ChunkedGenerator:
  5 |     def __init__(self, batch_size, cameras, poses_3d, poses_2d,
  6 |                  chunk_length=1, pad=0, causal_shift=0,
  7 |                  shuffle=False, random_seed=1234,
  8 |                  augment=False, reverse_aug= False,kps_left=None, kps_right=None, joints_left=None, joints_right=None,
  9 |                  endless=False, out_all = False, MAE=False, tds=1):
 10 |         assert poses_3d is None or len(poses_3d) == len(poses_2d), (len(poses_3d), len(poses_2d))
 11 |         assert cameras is None or len(cameras) == len(poses_2d)
 12 | 
 13 |         pairs = []
 14 |         self.saved_index = {}
 15 |         start_index = 0
 16 | 
 17 |         for key in poses_2d.keys():
 18 |             assert poses_3d is None or poses_3d[key].shape[0] == poses_3d[key].shape[0]
 19 |             n_chunks = (poses_2d[key].shape[0] + chunk_length - 1) // chunk_length
 20 |             offset = (n_chunks * chunk_length - poses_2d[key].shape[0]) // 2
 21 |             bounds = np.arange(n_chunks + 1) * chunk_length - offset
 22 |             augment_vector = np.full(len(bounds - 1), False, dtype=bool)
 23 |             reverse_augment_vector = np.full(len(bounds - 1), False, dtype=bool)
 24 |             keys = np.tile(np.array(key).reshape([1,3]),(len(bounds - 1),1))
 25 |             pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector,reverse_augment_vector))
 26 |             if reverse_aug:
 27 |                 pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, ~reverse_augment_vector))
 28 |             if augment:
 29 |                 if reverse_aug:
 30 |                     pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector,~reverse_augment_vector))
 31 |                 else:
 32 |                     pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, reverse_augment_vector))
 33 | 
 34 |             end_index = start_index + poses_3d[key].shape[0]
 35 |             self.saved_index[key] = [start_index,end_index]
 36 |             start_index = start_index + poses_3d[key].shape[0]
 37 | 
 38 | 
 39 |         if cameras is not None:
 40 |             self.batch_cam = np.empty((batch_size, cameras[key].shape[-1]))
 41 | 
 42 |         if poses_3d is not None:
 43 |             self.batch_3d = np.empty((batch_size, chunk_length, poses_3d[key].shape[-2], poses_3d[key].shape[-1]))
 44 |         self.batch_2d = np.empty((batch_size, chunk_length + 2 * pad, poses_2d[key].shape[-2], poses_2d[key].shape[-1]))
 45 | 
 46 |         self.num_batches = (len(pairs) + batch_size - 1) // batch_size
 47 |         self.batch_size = batch_size
 48 |         self.random = np.random.RandomState(random_seed)
 49 |         self.pairs = pairs
 50 |         self.shuffle = shuffle
 51 |         self.pad = pad
 52 |         self.causal_shift = causal_shift
 53 |         self.endless = endless
 54 |         self.state = None
 55 | 
 56 |         self.cameras = cameras
 57 |         if cameras is not None:
 58 |             self.cameras = cameras
 59 |         self.poses_3d = poses_3d
 60 |         self.poses_2d = poses_2d
 61 | 
 62 |         self.augment = augment
 63 |         self.kps_left = kps_left
 64 |         self.kps_right = kps_right
 65 |         self.joints_left = joints_left
 66 |         self.joints_right = joints_right
 67 |         self.out_all = out_all
 68 |         self.MAE = MAE
 69 |         self.tds = tds
 70 | 
 71 |     def num_frames(self):
 72 |         return self.num_batches * self.batch_size
 73 | 
 74 |     def random_state(self):
 75 |         return self.random
 76 | 
 77 |     def set_random_state(self, random):
 78 |         self.random = random
 79 | 
 80 |     def augment_enabled(self):
 81 |         return self.augment
 82 | 
 83 |     def next_pairs(self):
 84 |         if self.state is None:
 85 |             if self.shuffle:
 86 |                 pairs = self.random.permutation(self.pairs)
 87 |             else:
 88 |                 pairs = self.pairs
 89 |             return 0, pairs
 90 |         else:
 91 |             return self.state
 92 | 
 93 |     def get_batch(self, seq_i, start_3d, end_3d, flip, reverse):
 94 |         subject,action,cam_index = seq_i
 95 |         seq_name = (subject,action,int(cam_index))
 96 |         start_2d = start_3d - self.pad * self.tds - self.causal_shift
 97 |         end_2d = end_3d + self.pad * self.tds - self.causal_shift
 98 | 
 99 |         seq_2d = self.poses_2d[seq_name].copy()
100 |         low_2d = max(start_2d, 0)
101 |         high_2d = min(end_2d, seq_2d.shape[0])
102 |         pad_left_2d = low_2d - start_2d
103 |         pad_right_2d = end_2d - high_2d
104 |         if pad_left_2d != 0:
105 |             data_pad = np.repeat(seq_2d[0:1],pad_left_2d,axis=0)
106 |             new_data = np.concatenate((data_pad, seq_2d[low_2d:high_2d]), axis=0)
107 |             self.batch_2d = new_data[::self.tds]
108 |             #self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0)), 'edge')
109 | 
110 |         elif pad_right_2d != 0:
111 |             data_pad = np.repeat(seq_2d[seq_2d.shape[0]-1:seq_2d.shape[0]], pad_right_2d, axis=0)
112 |             new_data = np.concatenate((seq_2d[low_2d:high_2d], data_pad), axis=0)
113 |             self.batch_2d = new_data[::self.tds]
114 |             #self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0)), 'edge')
115 |         else:
116 |             self.batch_2d = seq_2d[low_2d:high_2d:self.tds]
117 | 
118 |         if flip:
119 |             self.batch_2d[ :, :, 0] *= -1
120 |             self.batch_2d[ :, self.kps_left + self.kps_right] = self.batch_2d[ :,
121 |                                                                   self.kps_right + self.kps_left]
122 |         if reverse:
123 |             self.batch_2d = self.batch_2d[::-1].copy()
124 | 
125 |         if not self.MAE:
126 |             if self.poses_3d is not None:
127 |                 seq_3d = self.poses_3d[seq_name].copy()
128 |                 if self.out_all:
129 |                     low_3d = low_2d
130 |                     high_3d = high_2d
131 |                     pad_left_3d = pad_left_2d
132 |                     pad_right_3d = pad_right_2d
133 |                 else:
134 |                     low_3d = max(start_3d, 0)
135 |                     high_3d = min(end_3d, seq_3d.shape[0])
136 |                     pad_left_3d = low_3d - start_3d
137 |                     pad_right_3d = end_3d - high_3d
138 | 
139 |                 if pad_left_3d != 0:
140 |                     data_pad = np.repeat(seq_3d[0:1], pad_left_3d, axis=0)
141 |                     new_data = np.concatenate((data_pad, seq_3d[low_3d:high_3d]), axis=0)
142 |                     self.batch_3d = new_data[::self.tds]
143 |                 elif pad_right_3d != 0:
144 |                     data_pad = np.repeat(seq_3d[seq_3d.shape[0] - 1:seq_3d.shape[0]], pad_right_3d, axis=0)
145 |                     new_data = np.concatenate((seq_3d[low_3d:high_3d], data_pad), axis=0)
146 |                     self.batch_3d = new_data[::self.tds]
147 |                     # self.batch_3d = np.pad(seq_3d[low_3d:high_3d],
148 |                     #                           ((pad_left_3d, pad_right_3d), (0, 0), (0, 0)), 'edge')
149 |                 else:
150 |                     self.batch_3d = seq_3d[low_3d:high_3d:self.tds]
151 | 
152 |                 if flip:
153 |                     self.batch_3d[ :, :, 0] *= -1
154 |                     self.batch_3d[ :, self.joints_left + self.joints_right] = \
155 |                         self.batch_3d[ :, self.joints_right + self.joints_left]
156 |                 if reverse:
157 |                     self.batch_3d = self.batch_3d[::-1].copy()
158 | 
159 |         if self.cameras is not None:
160 |             self.batch_cam = self.cameras[seq_name].copy()
161 |             if flip:
162 |                 self.batch_cam[ 2] *= -1
163 |                 self.batch_cam[ 7] *= -1
164 | 
165 |         if self.MAE:
166 |             return self.batch_cam, self.batch_2d.copy(), action, subject, int(cam_index)
167 |         if self.poses_3d is None and self.cameras is None:
168 |             return None, None, self.batch_2d.copy(), action, subject, int(cam_index)
169 |         elif self.poses_3d is not None and self.cameras is None:
170 |             return np.zeros(9), self.batch_3d.copy(), self.batch_2d.copy(),action, subject, int(cam_index)
171 |         elif self.poses_3d is None:
172 |             return self.batch_cam, None, self.batch_2d.copy(),action, subject, int(cam_index)
173 |         else:
174 |             return self.batch_cam, self.batch_3d.copy(), self.batch_2d.copy(),action, subject, int(cam_index)
175 | 
176 | 
177 | 
178 | 
179 | 
180 |             
181 | 
182 | 


--------------------------------------------------------------------------------