├── lib
├── core
│ ├── __init__.py
│ ├── config.py
│ ├── evaluate.py
│ └── loss.py
├── utils
│ ├── __init__.py
│ ├── one_euro_filter.py
│ ├── smooth_pose.py
│ ├── pose_tracker.py
│ ├── smooth_bbox.py
│ ├── renderer.py
│ ├── utils.py
│ ├── eval_utils.py
│ └── demo_utils.py
├── models
│ ├── __init__.py
│ ├── attention.py
│ ├── motion_discriminator.py
│ ├── smpl.py
│ └── vibe.py
├── dataset
│ ├── __init__.py
│ ├── penn_action.py
│ ├── posetrack.py
│ ├── threedpw.py
│ ├── mpii3d.py
│ ├── amass.py
│ ├── insta.py
│ ├── loaders.py
│ ├── inference.py
│ ├── dataset_2d.py
│ └── dataset_3d.py
├── data_utils
│ ├── feature_extractor.py
│ ├── penn_action_utils.py
│ ├── amass_utils.py
│ ├── posetrack_utils.py
│ └── threedpw_utils.py
└── smplify
│ ├── prior.py
│ ├── losses.py
│ └── temporal_smplify.py
├── girl_dance.mp4
├── .gitignore
├── scripts
├── prepare_data.sh
├── install_pip.sh
├── install_conda.sh
└── prepare_training_data.sh
├── doc
├── eval.md
├── train.md
└── demo.md
├── requirements.txt
├── .github
└── ISSUE_TEMPLATE
│ ├── bug-report.md
│ └── feature_request.md
├── configs
├── config.yaml
└── config_wo_3dpw.yaml
├── tests
├── test_2d_datasets.py
└── test_3d_datasets.py
├── eval.py
├── vibe_demo.ipynb
├── train.py
├── LICENSE
└── README.md
/lib/core/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/girl_dance.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cedro3/VIBE/master/girl_dance.mp4
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | data
3 | __pycache__/
4 | vibe-env/
5 | output/
6 | *.mp4
7 | results
8 |
--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .vibe import VIBE
2 | from .motion_discriminator import MotionDiscriminator
3 |
--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_2d import Dataset2D
2 | from .dataset_3d import Dataset3D
3 |
4 | from .insta import Insta
5 | from .amass import AMASS
6 | from .mpii3d import MPII3D
7 | from .threedpw import ThreeDPW
8 | from .posetrack import PoseTrack
9 | from .penn_action import PennAction
10 |
11 |
--------------------------------------------------------------------------------
/scripts/prepare_data.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | mkdir -p data
4 | cd data
5 | gdown "https://drive.google.com/uc?id=1untXhYOLQtpNEy4GTY_0fL_H-k6cTf_r"
6 | unzip vibe_data.zip
7 | rm vibe_data.zip
8 | cd ..
9 | mv data/vibe_data/sample_video.mp4 .
10 | mkdir -p $HOME/.torch/models/
11 | mv data/vibe_data/yolov3.weights $HOME/.torch/models/
12 |
--------------------------------------------------------------------------------
/scripts/install_pip.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | echo "Creating virtual environment"
4 | python3.7 -m venv vibe-env
5 | echo "Activating virtual environment"
6 |
7 | source $PWD/vibe-env/bin/activate
8 |
9 | $PWD/vibe-env/bin/pip install numpy==1.17.5 torch==1.4.0 torchvision==0.5.0
10 | $PWD/vibe-env/bin/pip install git+https://github.com/giacaglia/pytube.git --upgrade
11 | $PWD/vibe-env/bin/pip install -r requirements.txt
12 |
--------------------------------------------------------------------------------
/scripts/install_conda.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export CONDA_ENV_NAME=vibe-env
4 | echo $CONDA_ENV_NAME
5 |
6 | conda create -n $CONDA_ENV_NAME python=3.7
7 |
8 | eval "$(conda shell.bash hook)"
9 | conda activate $CONDA_ENV_NAME
10 |
11 | which python
12 | which pip
13 |
14 | pip install numpy==1.17.5 torch==1.4.0 torchvision==0.5.0
15 | pip install git+https://github.com/giacaglia/pytube.git --upgrade
16 | pip install -r requirements.txt
17 |
--------------------------------------------------------------------------------
/doc/eval.md:
--------------------------------------------------------------------------------
1 | # Evaluation
2 |
3 | Run the commands below to evaluate a pretrained model.
4 |
5 | ```shell script
6 | python eval.py --cfg configs/config.yaml
7 | ```
8 |
9 | Change the `TRAIN.PRETRAINED` field of the config file to the checkpoint you would like to evaluate.
10 | You should be able to obtain the output below:
11 |
12 | ```shell script
13 | # TRAIN.PRETRAINED = 'data/vibe_data/vibe_model_wo_3dpw.pth.tar'
14 | ...Evaluating on 3DPW test set...
15 | MPJPE: 93.5881, PA-MPJPE: 56.5608, PVE: 113.4118, ACCEL: 27.1242, ACCEL_ERR: 27.9877
16 | ```
17 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm==4.28.1
2 | yacs==0.1.6
3 | h5py==2.10.0
4 | numpy==1.17.5
5 | scipy==1.4.1
6 | numba==0.47.0
7 | smplx==0.1.13
8 | gdown==3.6.4
9 | PyYAML==5.3.1
10 | joblib==0.14.1
11 | pillow==6.2.1
12 | trimesh==3.5.25
13 | pyrender==0.1.36
14 | progress==1.5
15 | filterpy==1.4.5
16 | matplotlib==3.1.3
17 | tensorflow==1.15.4
18 | tensorboard==2.1.0
19 | torchvision==0.5.0
20 | scikit-image==0.16.2
21 | scikit-video==1.1.11
22 | opencv-python==4.1.2.30
23 | llvmlite==0.32.1
24 | git+https://github.com/mattloper/chumpy.git
25 | git+https://github.com/mkocabas/yolov3-pytorch.git
26 | git+https://github.com/mkocabas/multi-person-tracker.git
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: Use this to report bugs
4 | title: "[BUG]"
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | Thanks for your interest in our research!
11 |
12 | If you have problems running our code, please include;
13 |
14 | 1. your operating system and the version,
15 | 2. your python version,
16 | 3. your pytorch version,
17 | 4. the stack trace of the error that you see,
18 |
19 | Specifically, if you have an issue with pyrender or OpenGL setup & installation, please refer to pyrender [docs](https://pyrender.readthedocs.io/en/latest/) or [github issues](https://github.com/mmatl/pyrender/issues).
20 |
--------------------------------------------------------------------------------
/scripts/prepare_training_data.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | mkdir -p ./data/vibe_db
4 | export PYTHONPATH="./:$PYTHONPATH"
5 |
6 | # AMASS
7 | python lib/data_utils/amass_utils.py --dir ./data/amass
8 |
9 | # InstaVariety
10 | # Comment this if you already downloaded the preprocessed file
11 | python lib/data_utils/insta_utils.py --dir ./data/insta_variety
12 |
13 | # 3DPW
14 | python lib/data_utils/threedpw_utils.py --dir ./data/3dpw
15 |
16 | # MPI-INF-3D-HP
17 | python lib/data_utils/mpii3d_utils.py --dir ./data/mpi_inf_3dhp
18 |
19 | # PoseTrack
20 | python lib/data_utils/posetrack_utils.py --dir ./data/posetrack
21 |
22 | # PennAction
23 | python lib/data_utils/penn_action_utils.py --dir ./data/penn_action
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Use this to suggest an idea for this project
4 | title: "[FEATURE]"
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/configs/config.yaml:
--------------------------------------------------------------------------------
1 | DEBUG: false
2 | DEBUG_FREQ: 5
3 | LOGDIR: ''
4 | DEVICE: 'cuda'
5 | EXP_NAME: 'vibe'
6 | OUTPUT_DIR: 'results/vibe_tests'
7 | NUM_WORKERS: 8
8 | SEED_VALUE: -1
9 | DATASET:
10 | SEQLEN: 16
11 | LOSS:
12 | KP_2D_W: 300.0
13 | KP_3D_W: 300.0
14 | SHAPE_W: 0.06
15 | POSE_W: 60.0
16 | D_MOTION_LOSS_W: 0.5
17 | TRAIN:
18 | BATCH_SIZE: 32
19 | NUM_ITERS_PER_EPOCH: 500
20 | PRETRAINED: ''
21 | PRETRAINED_REGRESSOR: 'data/vibe_data/spin_model_checkpoint.pth.tar'
22 | RESUME: ''
23 | START_EPOCH: 0
24 | END_EPOCH: 30
25 | LR_PATIENCE: 5
26 | DATA_2D_RATIO: 0.6
27 | DATASETS_2D:
28 | - 'Insta'
29 | # - 'PoseTrack'
30 | # - 'PennAction'
31 | DATASETS_3D:
32 | # - 'ThreeDPW'
33 | - 'MPII3D'
34 | DATASET_EVAL: 'ThreeDPW'
35 | GEN_LR: 0.00005
36 | GEN_WD: 0.0
37 | MOT_DISCR:
38 | OPTIM: 'Adam'
39 | LR: 0.0001
40 | WD: 0.0001
41 | MOMENTUM: 0.9
42 | HIDDEN_SIZE: 1024
43 | NUM_LAYERS: 2
44 | FEATURE_POOL: 'attention'
45 | ATT:
46 | LAYERS: 3
47 | SIZE: 1024
48 | DROPOUT: 0.2
49 | MODEL:
50 | TEMPORAL_TYPE: 'gru'
51 | TGRU:
52 | NUM_LAYERS: 2
53 | ADD_LINEAR: true
54 | RESIDUAL: true
55 | BIDIRECTIONAL: false
56 | HIDDEN_SIZE: 1024
--------------------------------------------------------------------------------
/configs/config_wo_3dpw.yaml:
--------------------------------------------------------------------------------
1 | DEBUG: false
2 | DEBUG_FREQ: 5
3 | LOGDIR: ''
4 | DEVICE: 'cuda'
5 | EXP_NAME: 'vibe'
6 | OUTPUT_DIR: 'results/vibe_wo_3dpw'
7 | NUM_WORKERS: 8
8 | SEED_VALUE: -1
9 | DATASET:
10 | SEQLEN: 16
11 | LOSS:
12 | KP_2D_W: 300.0
13 | KP_3D_W: 300.0
14 | SHAPE_W: 0.06
15 | POSE_W: 60.0
16 | D_MOTION_LOSS_W: 0.5
17 | TRAIN:
18 | BATCH_SIZE: 32
19 | NUM_ITERS_PER_EPOCH: 500
20 | PRETRAINED: ''
21 | PRETRAINED_REGRESSOR: 'data/vibe_data/spin_model_checkpoint.pth.tar'
22 | RESUME: ''
23 | START_EPOCH: 0
24 | END_EPOCH: 30
25 | LR_PATIENCE: 5
26 | DATA_2D_RATIO: 0.6
27 | DATASETS_2D:
28 | - 'Insta'
29 | # - 'PoseTrack'
30 | # - 'PennAction'
31 | DATASETS_3D:
32 | # - 'ThreeDPW'
33 | - 'MPII3D'
34 | DATASET_EVAL: 'ThreeDPW'
35 | GEN_LR: 0.00005
36 | GEN_WD: 0.0
37 | MOT_DISCR:
38 | OPTIM: 'Adam'
39 | LR: 0.0001
40 | WD: 0.0001
41 | MOMENTUM: 0.9
42 | HIDDEN_SIZE: 1024
43 | NUM_LAYERS: 2
44 | FEATURE_POOL: 'attention'
45 | ATT:
46 | LAYERS: 3
47 | SIZE: 1024
48 | DROPOUT: 0.2
49 | MODEL:
50 | TEMPORAL_TYPE: 'gru'
51 | TGRU:
52 | NUM_LAYERS: 2
53 | ADD_LINEAR: true
54 | RESIDUAL: true
55 | BIDIRECTIONAL: false
56 | HIDDEN_SIZE: 1024
--------------------------------------------------------------------------------
/lib/dataset/penn_action.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from lib.dataset import Dataset2D
18 | from lib.core.config import PENNACTION_DIR
19 |
20 |
21 | class PennAction(Dataset2D):
22 | def __init__(self, seqlen, overlap=0.75, debug=False):
23 | db_name = 'pennaction'
24 |
25 | super(PennAction, self).__init__(
26 | seqlen = seqlen,
27 | folder=PENNACTION_DIR,
28 | dataset_name=db_name,
29 | debug=debug,
30 | overlap=overlap,
31 | )
32 | print(f'{db_name} - number of dataset objects {self.__len__()}')
33 |
--------------------------------------------------------------------------------
/lib/dataset/posetrack.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from lib.dataset import Dataset2D
18 | from lib.core.config import POSETRACK_DIR
19 |
20 |
21 | class PoseTrack(Dataset2D):
22 | def __init__(self, seqlen, overlap=0.75, folder=None, debug=False):
23 | db_name = 'posetrack'
24 | super(PoseTrack, self).__init__(
25 | seqlen = seqlen,
26 | folder=POSETRACK_DIR,
27 | dataset_name=db_name,
28 | debug=debug,
29 | overlap=overlap,
30 | )
31 | print(f'{db_name} - number of dataset objects {self.__len__()}')
32 |
--------------------------------------------------------------------------------
/tests/test_2d_datasets.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('.')
3 |
4 | import torch
5 | import numpy as np
6 | import skimage.io as io
7 | import matplotlib.pyplot as plt
8 | from torch.utils.data import DataLoader
9 |
10 | from lib.dataset import *
11 | from lib.utils.vis import batch_draw_skeleton, batch_visualize_preds
12 |
13 |
14 | def debug_2d_data(dataset, DEBUG=True):
15 | is_train = True
16 | seqlen = 32
17 | batch_size = 1
18 | db = eval(dataset)(seqlen=seqlen, debug=DEBUG)
19 |
20 | dataloader = DataLoader(
21 | dataset=db,
22 | batch_size=batch_size,
23 | shuffle=True,
24 | num_workers=1,
25 | )
26 |
27 | for i, target in enumerate(dataloader):
28 | for k, v in target.items():
29 | print(k, v.shape)
30 |
31 | if DEBUG:
32 | if dataset is 'Insta':
33 | input = torch.ones(batch_size, seqlen, 3, 224, 224)[0]
34 | else:
35 | input = target['video'][0]
36 | single_target = {k: v[0] for k, v in target.items()}
37 |
38 | dataset_name = 'spin'
39 | plt.figure(figsize=(19.2,10.8))
40 | images = batch_draw_skeleton(input, single_target, dataset=dataset_name, max_images=4)
41 | plt.imshow(images)
42 | plt.show()
43 |
44 | if i == 20:
45 | break
46 |
47 |
48 | if __name__ == '__main__':
49 | debug_2d_data('Insta', DEBUG=True)
50 |
--------------------------------------------------------------------------------
/lib/utils/one_euro_filter.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 |
4 |
5 | def smoothing_factor(t_e, cutoff):
6 | r = 2 * math.pi * cutoff * t_e
7 | return r / (r + 1)
8 |
9 |
10 | def exponential_smoothing(a, x, x_prev):
11 | return a * x + (1 - a) * x_prev
12 |
13 |
14 | class OneEuroFilter:
15 | def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0,
16 | d_cutoff=1.0):
17 | """Initialize the one euro filter."""
18 | # The parameters.
19 | self.min_cutoff = float(min_cutoff)
20 | self.beta = float(beta)
21 | self.d_cutoff = float(d_cutoff)
22 | # Previous values.
23 | self.x_prev = x0
24 | self.dx_prev = dx0
25 | self.t_prev = t0
26 |
27 | def __call__(self, t, x):
28 | """Compute the filtered signal."""
29 | t_e = t - self.t_prev
30 |
31 | # The filtered derivative of the signal.
32 | a_d = smoothing_factor(t_e, self.d_cutoff)
33 | dx = (x - self.x_prev) / t_e
34 | dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
35 |
36 | # The filtered signal.
37 | cutoff = self.min_cutoff + self.beta * np.abs(dx_hat)
38 | a = smoothing_factor(t_e, cutoff)
39 | x_hat = exponential_smoothing(a, x, self.x_prev)
40 |
41 | # Memorize the previous values.
42 | self.x_prev = x_hat
43 | self.dx_prev = dx_hat
44 | self.t_prev = t
45 |
46 | return x_hat
47 |
--------------------------------------------------------------------------------
/lib/dataset/threedpw.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from lib.dataset import Dataset3D
18 | from lib.core.config import THREEDPW_DIR
19 |
20 | class ThreeDPW(Dataset3D):
21 | def __init__(self, set, seqlen, overlap=0.75, debug=False):
22 | db_name = '3dpw'
23 |
24 | # during testing we don't need data augmentation
25 | # but we can use it as an ensemble
26 | is_train = False
27 | overlap = overlap if is_train else 0.
28 | print('3DPW Dataset overlap ratio: ', overlap)
29 | super(ThreeDPW, self).__init__(
30 | set=set,
31 | folder=THREEDPW_DIR,
32 | seqlen=seqlen,
33 | overlap=overlap,
34 | dataset_name=db_name,
35 | debug=debug,
36 | )
37 | print(f'{db_name} - number of dataset objects {self.__len__()}')
--------------------------------------------------------------------------------
/lib/dataset/mpii3d.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from lib.dataset import Dataset3D
18 | from lib.core.config import MPII3D_DIR
19 |
20 |
21 | class MPII3D(Dataset3D):
22 | def __init__(self, set, seqlen, overlap=0, debug=False):
23 | db_name = 'mpii3d'
24 |
25 | # during testing we don't need data augmentation
26 | # but we can use it as an ensemble
27 | is_train = set == 'train'
28 | overlap = overlap if is_train else 0.
29 | print('MPII3D Dataset overlap ratio: ', overlap)
30 | super(MPII3D, self).__init__(
31 | set = set,
32 | folder=MPII3D_DIR,
33 | seqlen=seqlen,
34 | overlap=overlap,
35 | dataset_name=db_name,
36 | debug=debug,
37 | )
38 | print(f'{db_name} - number of dataset objects {self.__len__()}')
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 | from lib.dataset import ThreeDPW
5 | from lib.models import VIBE
6 | from lib.core.evaluate import Evaluator
7 | from lib.core.config import parse_args
8 | from torch.utils.data import DataLoader
9 |
10 |
11 | def main(cfg):
12 | print('...Evaluating on 3DPW test set...')
13 |
14 | model = VIBE(
15 | n_layers=cfg.MODEL.TGRU.NUM_LAYERS,
16 | batch_size=cfg.TRAIN.BATCH_SIZE,
17 | seqlen=cfg.DATASET.SEQLEN,
18 | hidden_size=cfg.MODEL.TGRU.HIDDEN_SIZE,
19 | pretrained=cfg.TRAIN.PRETRAINED_REGRESSOR,
20 | add_linear=cfg.MODEL.TGRU.ADD_LINEAR,
21 | bidirectional=cfg.MODEL.TGRU.BIDIRECTIONAL,
22 | use_residual=cfg.MODEL.TGRU.RESIDUAL,
23 | ).to(cfg.DEVICE)
24 |
25 | if cfg.TRAIN.PRETRAINED != '' and os.path.isfile(cfg.TRAIN.PRETRAINED):
26 | checkpoint = torch.load(cfg.TRAIN.PRETRAINED)
27 | best_performance = checkpoint['performance']
28 | model.load_state_dict(checkpoint['gen_state_dict'])
29 | print(f'==> Loaded pretrained model from {cfg.TRAIN.PRETRAINED}...')
30 | print(f'Performance on 3DPW test set {best_performance}')
31 | else:
32 | print(f'{cfg.TRAIN.PRETRAINED} is not a pretrained model!!!!')
33 | exit()
34 |
35 | test_db = ThreeDPW(set='test', seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
36 |
37 | test_loader = DataLoader(
38 | dataset=test_db,
39 | batch_size=cfg.TRAIN.BATCH_SIZE,
40 | shuffle=False,
41 | num_workers=cfg.NUM_WORKERS,
42 | )
43 |
44 | Evaluator(
45 | model=model,
46 | device=cfg.DEVICE,
47 | test_loader=test_loader,
48 | ).run()
49 |
50 |
51 | if __name__ == '__main__':
52 | cfg, cfg_file = parse_args()
53 |
54 | main(cfg)
55 |
--------------------------------------------------------------------------------
/tests/test_3d_datasets.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('.')
3 | import time
4 | from lib.dataset import *
5 | import matplotlib.pyplot as plt
6 | from torch.utils.data import DataLoader
7 | from lib.models.smpl import SMPL, SMPL_MODEL_DIR
8 | from lib.utils.vis import batch_draw_skeleton, batch_visualize_preds
9 |
10 | dataset = 'MPII3D'
11 | seqlen = 16
12 | DEBUG = True
13 |
14 | db = eval(dataset)(set='val', seqlen=seqlen, debug=DEBUG)
15 |
16 | dataloader = DataLoader(
17 | dataset=db,
18 | batch_size=4,
19 | shuffle=True,
20 | num_workers=1,
21 | )
22 |
23 | smpl = SMPL(SMPL_MODEL_DIR)
24 |
25 | start = time.time()
26 | for i, target in enumerate(dataloader):
27 | data_time = time.time() - start
28 | start = time.time()
29 | print(f'Data loading time {data_time:.4f}')
30 |
31 | for k, v in target.items():
32 | print(k, v.shape)
33 |
34 | if DEBUG:
35 | input = target['video'][0]
36 | single_target = {k: v[0] for k, v in target.items()}
37 |
38 | if dataset == 'MPII3D':
39 | images = batch_draw_skeleton(input, single_target, dataset='spin', max_images=4)
40 | plt.imshow(images)
41 | plt.show()
42 | else:
43 | theta = single_target['theta']
44 | pose, shape = theta[:, 3:75], theta[:, 75:]
45 |
46 | # verts, j3d, smpl_j3d = smpl(pose, shape)
47 |
48 | pred_output = smpl(betas=shape, body_pose=pose[:, 3:], global_orient=pose[:, :3], pose2rot=True)
49 |
50 | single_target['verts'] = pred_output.vertices
51 |
52 | images = batch_visualize_preds(input, single_target, single_target, max_images=4, dataset='spin')
53 | # images = batch_draw_skeleton(input, single_target, dataset='common', max_images=10)
54 | plt.imshow(images)
55 | plt.show()
56 |
57 | if i == 100:
58 | break
--------------------------------------------------------------------------------
/lib/dataset/amass.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import torch
18 | import joblib
19 | import numpy as np
20 | import os.path as osp
21 | from torch.utils.data import Dataset
22 |
23 | from lib.core.config import VIBE_DB_DIR
24 | from lib.data_utils.img_utils import split_into_chunks
25 |
26 | class AMASS(Dataset):
27 | def __init__(self, seqlen):
28 | self.seqlen = seqlen
29 |
30 | self.stride = seqlen
31 |
32 | self.db = self.load_db()
33 | self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
34 | del self.db['vid_name']
35 | print(f'AMASS dataset number of videos: {len(self.vid_indices)}')
36 |
37 | def __len__(self):
38 | return len(self.vid_indices)
39 |
40 | def __getitem__(self, index):
41 | return self.get_single_item(index)
42 |
43 | def load_db(self):
44 | db_file = osp.join(VIBE_DB_DIR, 'amass_db.pt')
45 | db = joblib.load(db_file)
46 | return db
47 |
48 | def get_single_item(self, index):
49 | start_index, end_index = self.vid_indices[index]
50 | thetas = self.db['theta'][start_index:end_index+1]
51 |
52 | cam = np.array([1., 0., 0.])[None, ...]
53 | cam = np.repeat(cam, thetas.shape[0], axis=0)
54 | theta = np.concatenate([cam, thetas], axis=-1)
55 |
56 | target = {
57 | 'theta': torch.from_numpy(theta).float(), # cam, pose and shape
58 | }
59 | return target
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/lib/utils/smooth_pose.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import torch
18 | import numpy as np
19 |
20 | from lib.models.smpl import SMPL, SMPL_MODEL_DIR
21 | from lib.utils.one_euro_filter import OneEuroFilter
22 |
23 |
24 | def smooth_pose(pred_pose, pred_betas, min_cutoff=0.004, beta=0.7):
25 | # min_cutoff: Decreasing the minimum cutoff frequency decreases slow speed jitter
26 | # beta: Increasing the speed coefficient(beta) decreases speed lag.
27 |
28 | one_euro_filter = OneEuroFilter(
29 | np.zeros_like(pred_pose[0]),
30 | pred_pose[0],
31 | min_cutoff=min_cutoff,
32 | beta=beta,
33 | )
34 |
35 | smpl = SMPL(model_path=SMPL_MODEL_DIR)
36 |
37 | pred_pose_hat = np.zeros_like(pred_pose)
38 |
39 | # initialize
40 | pred_pose_hat[0] = pred_pose[0]
41 |
42 | pred_verts_hat = []
43 | pred_joints3d_hat = []
44 |
45 | smpl_output = smpl(
46 | betas=torch.from_numpy(pred_betas[0]).unsqueeze(0),
47 | body_pose=torch.from_numpy(pred_pose[0, 1:]).unsqueeze(0),
48 | global_orient=torch.from_numpy(pred_pose[0, 0:1]).unsqueeze(0),
49 | )
50 | pred_verts_hat.append(smpl_output.vertices.detach().cpu().numpy())
51 | pred_joints3d_hat.append(smpl_output.joints.detach().cpu().numpy())
52 |
53 | for idx, pose in enumerate(pred_pose[1:]):
54 | idx += 1
55 |
56 | t = np.ones_like(pose) * idx
57 | pose = one_euro_filter(t, pose)
58 | pred_pose_hat[idx] = pose
59 |
60 | smpl_output = smpl(
61 | betas=torch.from_numpy(pred_betas[idx]).unsqueeze(0),
62 | body_pose=torch.from_numpy(pred_pose_hat[idx, 1:]).unsqueeze(0),
63 | global_orient=torch.from_numpy(pred_pose_hat[idx, 0:1]).unsqueeze(0),
64 | )
65 | pred_verts_hat.append(smpl_output.vertices.detach().cpu().numpy())
66 | pred_joints3d_hat.append(smpl_output.joints.detach().cpu().numpy())
67 |
68 | return np.vstack(pred_verts_hat), pred_pose_hat, np.vstack(pred_joints3d_hat)
--------------------------------------------------------------------------------
/lib/dataset/insta.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import h5py
18 | import torch
19 | import logging
20 | import numpy as np
21 | import os.path as osp
22 |
23 | from torch.utils.data import Dataset
24 | from lib.core.config import VIBE_DB_DIR
25 | from lib.data_utils.kp_utils import convert_kps
26 | from lib.data_utils.img_utils import normalize_2d_kp, split_into_chunks
27 |
28 | logger = logging.getLogger(__name__)
29 |
30 | class Insta(Dataset):
31 | def __init__(self, seqlen, overlap=0., debug=False):
32 | self.seqlen = seqlen
33 | self.stride = int(seqlen * (1-overlap))
34 |
35 | self.h5_file = osp.join(VIBE_DB_DIR, 'insta_train_db.h5')
36 |
37 | with h5py.File(self.h5_file, 'r') as db:
38 | self.db = db
39 | self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
40 |
41 | print(f'InstaVariety number of dataset objects {self.__len__()}')
42 |
43 | def __len__(self):
44 | return len(self.vid_indices)
45 |
46 | def __getitem__(self, index):
47 | return self.get_single_item(index)
48 |
49 | def get_single_item(self, index):
50 | start_index, end_index = self.vid_indices[index]
51 |
52 | with h5py.File(self.h5_file, 'r') as db:
53 | self.db = db
54 |
55 | kp_2d = self.db['joints2D'][start_index:end_index + 1]
56 | kp_2d = convert_kps(kp_2d, src='insta', dst='spin')
57 | kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
58 |
59 |
60 | input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()
61 |
62 | vid_name = self.db['vid_name'][start_index:end_index + 1]
63 | frame_id = self.db['frame_id'][start_index:end_index + 1].astype(str)
64 | instance_id = np.array([v.decode('ascii') + f for v, f in zip(vid_name, frame_id)])
65 |
66 | for idx in range(self.seqlen):
67 | kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
68 | kp_2d_tensor[idx] = kp_2d[idx]
69 |
70 | target = {
71 | 'features': input,
72 | 'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
73 | # 'instance_id': instance_id
74 | }
75 |
76 | return target
--------------------------------------------------------------------------------
/lib/dataset/loaders.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from torch.utils.data import ConcatDataset, DataLoader
18 |
19 | from lib.dataset import *
20 |
21 |
22 | def get_data_loaders(cfg):
23 | def get_2d_datasets(dataset_names):
24 | datasets = []
25 | for dataset_name in dataset_names:
26 | db = eval(dataset_name)(seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
27 | datasets.append(db)
28 | return ConcatDataset(datasets)
29 |
30 | def get_3d_datasets(dataset_names):
31 | datasets = []
32 | for dataset_name in dataset_names:
33 | db = eval(dataset_name)(set='train', seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
34 | datasets.append(db)
35 | return ConcatDataset(datasets)
36 |
37 | # ===== 2D keypoint datasets =====
38 | train_2d_dataset_names = cfg.TRAIN.DATASETS_2D
39 | train_2d_db = get_2d_datasets(train_2d_dataset_names)
40 |
41 | data_2d_batch_size = int(cfg.TRAIN.BATCH_SIZE * cfg.TRAIN.DATA_2D_RATIO)
42 | data_3d_batch_size = cfg.TRAIN.BATCH_SIZE - data_2d_batch_size
43 |
44 | train_2d_loader = DataLoader(
45 | dataset=train_2d_db,
46 | batch_size=data_2d_batch_size,
47 | shuffle=True,
48 | num_workers=cfg.NUM_WORKERS,
49 | )
50 |
51 | # ===== 3D keypoint datasets =====
52 | train_3d_dataset_names = cfg.TRAIN.DATASETS_3D
53 | train_3d_db = get_3d_datasets(train_3d_dataset_names)
54 |
55 | train_3d_loader = DataLoader(
56 | dataset=train_3d_db,
57 | batch_size=data_3d_batch_size,
58 | shuffle=True,
59 | num_workers=cfg.NUM_WORKERS,
60 | )
61 |
62 | # ===== Motion Discriminator dataset =====
63 | motion_disc_db = AMASS(seqlen=cfg.DATASET.SEQLEN)
64 |
65 | motion_disc_loader = DataLoader(
66 | dataset=motion_disc_db,
67 | batch_size=cfg.TRAIN.BATCH_SIZE,
68 | shuffle=True,
69 | num_workers=cfg.NUM_WORKERS,
70 | )
71 |
72 | # ===== Evaluation dataset =====
73 | valid_db = eval(cfg.TRAIN.DATASET_EVAL)(set='val', seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
74 |
75 | valid_loader = DataLoader(
76 | dataset=valid_db,
77 | batch_size=cfg.TRAIN.BATCH_SIZE,
78 | shuffle=False,
79 | num_workers=cfg.NUM_WORKERS,
80 | )
81 |
82 | return train_2d_loader, train_3d_loader, motion_disc_loader, valid_loader
--------------------------------------------------------------------------------
/lib/models/attention.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import torch
18 | from torch import nn
19 |
20 | def init_weights(m):
21 | if type(m) == nn.Linear:
22 | torch.nn.init.uniform_(m.weight, -0.1, 0.1)
23 | m.bias.data.fill_(0.01)
24 |
25 | class SelfAttention(nn.Module):
26 | def __init__(self, attention_size,
27 | batch_first=False,
28 | layers=1,
29 | dropout=.0,
30 | non_linearity="tanh"):
31 | super(SelfAttention, self).__init__()
32 |
33 | self.batch_first = batch_first
34 |
35 | if non_linearity == "relu":
36 | activation = nn.ReLU()
37 | else:
38 | activation = nn.Tanh()
39 |
40 | modules = []
41 | for i in range(layers - 1):
42 | modules.append(nn.Linear(attention_size, attention_size))
43 | modules.append(activation)
44 | modules.append(nn.Dropout(dropout))
45 |
46 | # last attention layer must output 1
47 | modules.append(nn.Linear(attention_size, 1))
48 | modules.append(activation)
49 | modules.append(nn.Dropout(dropout))
50 |
51 | self.attention = nn.Sequential(*modules)
52 | self.attention.apply(init_weights)
53 | self.softmax = nn.Softmax(dim=-1)
54 |
55 |
56 | def forward(self, inputs):
57 |
58 | ##################################################################
59 | # STEP 1 - perform dot product
60 | # of the attention vector and each hidden state
61 | ##################################################################
62 |
63 | # inputs is a 3D Tensor: batch, len, hidden_size
64 | # scores is a 2D Tensor: batch, len
65 | scores = self.attention(inputs).squeeze()
66 | scores = self.softmax(scores)
67 |
68 | ##################################################################
69 | # Step 2 - Weighted sum of hidden states, by the attention scores
70 | ##################################################################
71 |
72 | # multiply each hidden state with the attention weights
73 | weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))
74 |
75 | # sum the hidden states
76 | # representations = weighted.sum(1).squeeze()
77 | representations = weighted.sum(1).squeeze()
78 | return representations, scores
79 |
80 |
--------------------------------------------------------------------------------
/vibe_demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | ""
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "kJe1q2JFK4LZ"
17 | },
18 | "source": [
19 | "# セットアップ"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "Tvd4cfPk5a0e"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# githubからコードをコピー\n",
31 | "!git clone https://github.com/cedro3/VIBE.git\n",
32 | "%cd VIBE/\n",
33 | "\n",
34 | "# ライブラリを取得\n",
35 | "!pip install torch==1.4.0 numpy==1.17.5\n",
36 | "!pip install git+https://github.com/giacaglia/pytube.git --upgrade\n",
37 | "!pip install -r requirements.txt\n",
38 | "\n",
39 | "# 学習済み重みとSMPLデータのダウンロード\n",
40 | "!source scripts/prepare_data.sh"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {
46 | "id": "nflTgaAWLqsu"
47 | },
48 | "source": [
49 | "# デモの実行\n",
50 | "最後に --sideview オプションを追加すると横からのView推定も行います。"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "id": "qVNszfLQ7rC9"
58 | },
59 | "outputs": [],
60 | "source": [
61 | "# デモの実行\n",
62 | "!python demo.py --vid_file girl_dance.mp4 --output_folder output/ "
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "id": "j8zxBa_K-FJf"
70 | },
71 | "outputs": [],
72 | "source": [
73 | "# 作成した動画を再生する\n",
74 | "from IPython.display import HTML\n",
75 | "from base64 import b64encode\n",
76 | "\n",
77 | "def video(path):\n",
78 | " mp4 = open(path,'rb').read()\n",
79 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
80 | " return HTML('' % data_url)\n",
81 | "\n",
82 | "video('output/girl_dance/girl_dance_vibe_result.mp4') "
83 | ]
84 | }
85 | ],
86 | "metadata": {
87 | "accelerator": "GPU",
88 | "colab": {
89 | "collapsed_sections": [],
90 | "include_colab_link": true,
91 | "name": "vibe_demo",
92 | "provenance": [],
93 | "toc_visible": true
94 | },
95 | "kernelspec": {
96 | "display_name": "Python 3",
97 | "language": "python",
98 | "name": "python3"
99 | },
100 | "language_info": {
101 | "codemirror_mode": {
102 | "name": "ipython",
103 | "version": 3
104 | },
105 | "file_extension": ".py",
106 | "mimetype": "text/x-python",
107 | "name": "python",
108 | "nbconvert_exporter": "python",
109 | "pygments_lexer": "ipython3",
110 | "version": "3.7.9"
111 | }
112 | },
113 | "nbformat": 4,
114 | "nbformat_minor": 1
115 | }
116 |
--------------------------------------------------------------------------------
/lib/utils/pose_tracker.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import json
19 | import shutil
20 | import subprocess
21 | import numpy as np
22 | import os.path as osp
23 |
24 |
25 | def run_openpose(
26 | video_file,
27 | output_folder,
28 | staf_folder,
29 | vis=False,
30 | ):
31 | pwd = os.getcwd()
32 |
33 | os.chdir(staf_folder)
34 |
35 | render = 1 if vis else 0
36 | display = 2 if vis else 0
37 | cmd = [
38 | 'build/examples/openpose/openpose.bin',
39 | '--model_pose', 'BODY_21A',
40 | '--tracking', '1',
41 | '--render_pose', str(render),
42 | '--video', video_file,
43 | '--write_json', output_folder,
44 | '--display', str(display)
45 | ]
46 |
47 | print('Executing', ' '.join(cmd))
48 | subprocess.call(cmd)
49 | os.chdir(pwd)
50 |
51 |
52 | def read_posetrack_keypoints(output_folder):
53 |
54 | people = dict()
55 |
56 | for idx, result_file in enumerate(sorted(os.listdir(output_folder))):
57 | json_file = osp.join(output_folder, result_file)
58 | data = json.load(open(json_file))
59 | # print(idx, data)
60 | for person in data['people']:
61 | person_id = person['person_id'][0]
62 | joints2d = person['pose_keypoints_2d']
63 | if person_id in people.keys():
64 | people[person_id]['joints2d'].append(joints2d)
65 | people[person_id]['frames'].append(idx)
66 | else:
67 | people[person_id] = {
68 | 'joints2d': [],
69 | 'frames': [],
70 | }
71 | people[person_id]['joints2d'].append(joints2d)
72 | people[person_id]['frames'].append(idx)
73 |
74 | for k in people.keys():
75 | people[k]['joints2d'] = np.array(people[k]['joints2d']).reshape((len(people[k]['joints2d']), -1, 3))
76 | people[k]['frames'] = np.array(people[k]['frames'])
77 |
78 | return people
79 |
80 |
81 | def run_posetracker(video_file, staf_folder, posetrack_output_folder='/tmp', display=False):
82 | posetrack_output_folder = os.path.join(
83 | posetrack_output_folder,
84 | f'{os.path.basename(video_file)}_posetrack'
85 | )
86 |
87 | # run posetrack on video
88 | run_openpose(
89 | video_file,
90 | posetrack_output_folder,
91 | vis=display,
92 | staf_folder=staf_folder
93 | )
94 |
95 | people_dict = read_posetrack_keypoints(posetrack_output_folder)
96 |
97 | shutil.rmtree(posetrack_output_folder)
98 |
99 | return people_dict
--------------------------------------------------------------------------------
/lib/models/motion_discriminator.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import torch
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | from torch.nn.utils import spectral_norm
21 | from lib.models.attention import SelfAttention
22 |
23 | class MotionDiscriminator(nn.Module):
24 |
25 | def __init__(self,
26 | rnn_size,
27 | input_size,
28 | num_layers,
29 | output_size=2,
30 | feature_pool="concat",
31 | use_spectral_norm=False,
32 | attention_size=1024,
33 | attention_layers=1,
34 | attention_dropout=0.5):
35 |
36 | super(MotionDiscriminator, self).__init__()
37 | self.input_size = input_size
38 | self.rnn_size = rnn_size
39 | self.feature_pool = feature_pool
40 | self.num_layers = num_layers
41 | self.attention_size = attention_size
42 | self.attention_layers = attention_layers
43 | self.attention_dropout = attention_dropout
44 |
45 | self.gru = nn.GRU(self.input_size, self.rnn_size, num_layers=num_layers)
46 |
47 | linear_size = self.rnn_size if not feature_pool == "concat" else self.rnn_size * 2
48 |
49 | if feature_pool == "attention" :
50 | self.attention = SelfAttention(attention_size=self.attention_size,
51 | layers=self.attention_layers,
52 | dropout=self.attention_dropout)
53 | if use_spectral_norm:
54 | self.fc = spectral_norm(nn.Linear(linear_size, output_size))
55 | else:
56 | self.fc = nn.Linear(linear_size, output_size)
57 |
58 | def forward(self, sequence):
59 | """
60 | sequence: of shape [batch_size, seq_len, input_size]
61 | """
62 | batchsize, seqlen, input_size = sequence.shape
63 | sequence = torch.transpose(sequence, 0, 1)
64 |
65 | outputs, state = self.gru(sequence)
66 |
67 | if self.feature_pool == "concat":
68 | outputs = F.relu(outputs)
69 | avg_pool = F.adaptive_avg_pool1d(outputs.permute(1, 2, 0), 1).view(batchsize, -1)
70 | max_pool = F.adaptive_max_pool1d(outputs.permute(1, 2, 0), 1).view(batchsize, -1)
71 | output = self.fc(torch.cat([avg_pool, max_pool], dim=1))
72 | elif self.feature_pool == "attention":
73 | outputs = outputs.permute(1, 0, 2)
74 | y, attentions = self.attention(outputs)
75 | output = self.fc(y)
76 | else:
77 | output = self.fc(outputs[-1])
78 |
79 | return output
80 |
--------------------------------------------------------------------------------
/lib/dataset/inference.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import cv2
19 | import numpy as np
20 | import os.path as osp
21 | from torch.utils.data import Dataset
22 | from torchvision.transforms.functional import to_tensor
23 |
24 | from lib.utils.smooth_bbox import get_all_bbox_params
25 | from lib.data_utils.img_utils import get_single_image_crop_demo
26 |
27 |
28 | class Inference(Dataset):
29 | def __init__(self, image_folder, frames, bboxes=None, joints2d=None, scale=1.0, crop_size=224):
30 | self.image_file_names = [
31 | osp.join(image_folder, x)
32 | for x in os.listdir(image_folder)
33 | if x.endswith('.png') or x.endswith('.jpg')
34 | ]
35 | self.image_file_names = sorted(self.image_file_names)
36 | self.image_file_names = np.array(self.image_file_names)[frames]
37 | self.bboxes = bboxes
38 | self.joints2d = joints2d
39 | self.scale = scale
40 | self.crop_size = crop_size
41 | self.frames = frames
42 | self.has_keypoints = True if joints2d is not None else False
43 |
44 | self.norm_joints2d = np.zeros_like(self.joints2d)
45 |
46 | if self.has_keypoints:
47 | bboxes, time_pt1, time_pt2 = get_all_bbox_params(joints2d, vis_thresh=0.3)
48 | bboxes[:, 2:] = 150. / bboxes[:, 2:]
49 | self.bboxes = np.stack([bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 2]]).T
50 |
51 | self.image_file_names = self.image_file_names[time_pt1:time_pt2]
52 | self.joints2d = joints2d[time_pt1:time_pt2]
53 | self.frames = frames[time_pt1:time_pt2]
54 |
55 | def __len__(self):
56 | return len(self.image_file_names)
57 |
58 | def __getitem__(self, idx):
59 | img = cv2.cvtColor(cv2.imread(self.image_file_names[idx]), cv2.COLOR_BGR2RGB)
60 |
61 | bbox = self.bboxes[idx]
62 |
63 | j2d = self.joints2d[idx] if self.has_keypoints else None
64 |
65 | norm_img, raw_img, kp_2d = get_single_image_crop_demo(
66 | img,
67 | bbox,
68 | kp_2d=j2d,
69 | scale=self.scale,
70 | crop_size=self.crop_size)
71 | if self.has_keypoints:
72 | return norm_img, kp_2d
73 | else:
74 | return norm_img
75 |
76 |
77 | class ImageFolder(Dataset):
78 | def __init__(self, image_folder):
79 | self.image_file_names = [
80 | osp.join(image_folder, x)
81 | for x in os.listdir(image_folder)
82 | if x.endswith('.png') or x.endswith('.jpg')
83 | ]
84 | self.image_file_names = sorted(self.image_file_names)
85 |
86 | def __len__(self):
87 | return len(self.image_file_names)
88 |
89 | def __getitem__(self, idx):
90 | img = cv2.cvtColor(cv2.imread(self.image_file_names[idx]), cv2.COLOR_BGR2RGB)
91 | return to_tensor(img)
92 |
--------------------------------------------------------------------------------
/doc/train.md:
--------------------------------------------------------------------------------
1 | # Training Instructions
2 |
3 | Throughout the documentation we refer to VIBE root folder as `$ROOT`.
4 |
5 | ## Data Preparation
6 | During training, VIBE uses precomputed image features to reduce training time. Thus, we process the datasets into a
7 | standard format before using them for training. To obtain these standard training files, you need to run:
8 |
9 | ```shell script
10 | source scripts/prepare_training_data.sh
11 | ```
12 |
13 | This script will first create a folder for the dataset files `$ROOT/data/vibe_db`, then process each dataset and save
14 | output files to this directory. Before proceeding, you need to download each of the datasets listed
15 | below, then modify the `--dir` argument in the script to point the
16 | directory of each dataset.
17 |
18 |
19 |
20 | ## Datasets
21 |
22 | - **AMASS** (https://amass.is.tue.mpg.de)
23 |
24 | Directory structure:
25 |
26 | ```shell script
27 | amass
28 | |-- ACCAD
29 | |-- BioMotionLab_NTroje
30 | |-- CMU
31 | |-- ...
32 | `-- Transitions_mocap
33 | ```
34 |
35 | - **InstaVariety**
36 |
37 | For your convenience, we uploaded the preprocessed InstaVariety data
38 | [here](https://owncloud.tuebingen.mpg.de/index.php/s/MKLnHtPjwn24y9C) (size: 18 GB).
39 | After downloading the file, put it under
40 | `$ROOT/data/vibe_db`. Do not forget to verify checksum for sanity check:
41 | ```
42 | md5sum : 8ec335d1d48bd54687ad5c9a6eeb2999
43 | sha256sum : 7eadff77043cd85b49cbba8bfc9111c4305792ca64da1b92fb40fa702689dfa9
44 | ```
45 |
46 | You may also preprocess the dataset yourself by downloading the
47 | [preprocessed tfrecords](https://github.com/akanazawa/human_dynamics/blob/master/doc/insta_variety.md#pre-processed-tfrecords)
48 | provided by the authors of Temporal HMR.
49 |
50 | Directory structure:
51 | ```shell script
52 | insta_variety
53 | |-- train
54 | | |-- insta_variety_00_copy00_hmr_noS5.ckpt-642561.tfrecord
55 | | |-- insta_variety_01_copy00_hmr_noS5.ckpt-642561.tfrecord
56 | | `-- ...
57 | `-- test
58 | |-- insta_variety_00_copy00_hmr_noS5.ckpt-642561.tfrecord
59 | |-- insta_variety_01_copy00_hmr_noS5.ckpt-642561.tfrecord
60 | `-- ...
61 | ```
62 |
63 | - **MPI-3D-HP** (http://gvv.mpi-inf.mpg.de/3dhp-dataset)
64 |
65 | Donwload the dataset using the bash script provided by the authors. We will be using standard cameras only, so wall and ceiling
66 | cameras aren't needed. Then, run this
67 | [script](https://gist.github.com/mkocabas/cc6fe78aac51f97859e45f46476882b6) to extract frames of videos.
68 |
69 | Directory structure:
70 | ```shell script
71 |
72 | mpi_inf_3dhp
73 | |-- S1
74 | | |-- Seq1
75 | | |-- Seq2
76 | |-- S2
77 | | |-- Seq1
78 | | |-- Seq2
79 | |-- ...
80 | `-- util
81 | ```
82 |
83 | - **3DPW** (https://virtualhumans.mpi-inf.mpg.de/3DPW)
84 |
85 | Directory structure:
86 | ```shell script
87 | 3dpw
88 | |-- imageFiles
89 | | |-- courtyard_arguing_00
90 | | |-- courtyard_backpack_00
91 | | |-- ...
92 | `-- sequenceFiles
93 | |-- test
94 | |-- train
95 | `-- validation
96 | ```
97 |
98 | - **PennAction** (http://dreamdragon.github.io/PennAction/)
99 |
100 | Directory structure:
101 | ```shell script
102 | pennaction
103 | |-- frames
104 | | |-- 0000
105 | | |-- 0001
106 | | |-- ...
107 | `-- labels
108 | |-- 0000.mat
109 | |-- 0001.mat
110 | `-- ...
111 | ```
112 |
113 | - **PoseTrack** (https://posetrack.net/)
114 |
115 | Directory structure:
116 | ```shell script
117 | posetrack
118 | |-- images
119 | | |-- train
120 | | |-- val
121 | | |-- test
122 | `-- posetrack_data
123 | `-- annotations
124 | |-- train
125 | |-- val
126 | `-- test
127 | ```
128 |
129 |
130 |
131 | ## Training
132 | Run the command below to start training.
133 |
134 | ```shell script
135 | python train.py --cfg configs/config.yaml
136 | ```
137 |
138 | See [`configs/config.yaml`](configs/config.yaml) or [`config.py`](lib/core/config.py) to
139 | play with different configurations.
140 |
--------------------------------------------------------------------------------
/lib/models/smpl.py:
--------------------------------------------------------------------------------
1 | # This script is borrowed and extended from https://github.com/nkolot/SPIN/blob/master/models/hmr.py
2 | # Adhere to their licence to use this script
3 |
4 | import torch
5 | import numpy as np
6 | import os.path as osp
7 | from smplx import SMPL as _SMPL
8 | from smplx.body_models import ModelOutput
9 | from smplx.lbs import vertices2joints
10 |
11 | from lib.core.config import VIBE_DATA_DIR
12 |
13 | # Map joints to SMPL joints
14 | JOINT_MAP = {
15 | 'OP Nose': 24, 'OP Neck': 12, 'OP RShoulder': 17,
16 | 'OP RElbow': 19, 'OP RWrist': 21, 'OP LShoulder': 16,
17 | 'OP LElbow': 18, 'OP LWrist': 20, 'OP MidHip': 0,
18 | 'OP RHip': 2, 'OP RKnee': 5, 'OP RAnkle': 8,
19 | 'OP LHip': 1, 'OP LKnee': 4, 'OP LAnkle': 7,
20 | 'OP REye': 25, 'OP LEye': 26, 'OP REar': 27,
21 | 'OP LEar': 28, 'OP LBigToe': 29, 'OP LSmallToe': 30,
22 | 'OP LHeel': 31, 'OP RBigToe': 32, 'OP RSmallToe': 33, 'OP RHeel': 34,
23 | 'Right Ankle': 8, 'Right Knee': 5, 'Right Hip': 45,
24 | 'Left Hip': 46, 'Left Knee': 4, 'Left Ankle': 7,
25 | 'Right Wrist': 21, 'Right Elbow': 19, 'Right Shoulder': 17,
26 | 'Left Shoulder': 16, 'Left Elbow': 18, 'Left Wrist': 20,
27 | 'Neck (LSP)': 47, 'Top of Head (LSP)': 48,
28 | 'Pelvis (MPII)': 49, 'Thorax (MPII)': 50,
29 | 'Spine (H36M)': 51, 'Jaw (H36M)': 52,
30 | 'Head (H36M)': 53, 'Nose': 24, 'Left Eye': 26,
31 | 'Right Eye': 25, 'Left Ear': 28, 'Right Ear': 27
32 | }
33 | JOINT_NAMES = [
34 | 'OP Nose', 'OP Neck', 'OP RShoulder',
35 | 'OP RElbow', 'OP RWrist', 'OP LShoulder',
36 | 'OP LElbow', 'OP LWrist', 'OP MidHip',
37 | 'OP RHip', 'OP RKnee', 'OP RAnkle',
38 | 'OP LHip', 'OP LKnee', 'OP LAnkle',
39 | 'OP REye', 'OP LEye', 'OP REar',
40 | 'OP LEar', 'OP LBigToe', 'OP LSmallToe',
41 | 'OP LHeel', 'OP RBigToe', 'OP RSmallToe', 'OP RHeel',
42 | 'Right Ankle', 'Right Knee', 'Right Hip',
43 | 'Left Hip', 'Left Knee', 'Left Ankle',
44 | 'Right Wrist', 'Right Elbow', 'Right Shoulder',
45 | 'Left Shoulder', 'Left Elbow', 'Left Wrist',
46 | 'Neck (LSP)', 'Top of Head (LSP)',
47 | 'Pelvis (MPII)', 'Thorax (MPII)',
48 | 'Spine (H36M)', 'Jaw (H36M)',
49 | 'Head (H36M)', 'Nose', 'Left Eye',
50 | 'Right Eye', 'Left Ear', 'Right Ear'
51 | ]
52 |
53 | JOINT_IDS = {JOINT_NAMES[i]: i for i in range(len(JOINT_NAMES))}
54 | JOINT_REGRESSOR_TRAIN_EXTRA = osp.join(VIBE_DATA_DIR, 'J_regressor_extra.npy')
55 | SMPL_MEAN_PARAMS = osp.join(VIBE_DATA_DIR, 'smpl_mean_params.npz')
56 | SMPL_MODEL_DIR = VIBE_DATA_DIR
57 | H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
58 | H36M_TO_J14 = H36M_TO_J17[:14]
59 |
60 |
61 | class SMPL(_SMPL):
62 | """ Extension of the official SMPL implementation to support more joints """
63 |
64 | def __init__(self, *args, **kwargs):
65 | super(SMPL, self).__init__(*args, **kwargs)
66 | joints = [JOINT_MAP[i] for i in JOINT_NAMES]
67 | J_regressor_extra = np.load(JOINT_REGRESSOR_TRAIN_EXTRA)
68 | self.register_buffer('J_regressor_extra', torch.tensor(J_regressor_extra, dtype=torch.float32))
69 | self.joint_map = torch.tensor(joints, dtype=torch.long)
70 |
71 | def forward(self, *args, **kwargs):
72 | kwargs['get_skin'] = True
73 | smpl_output = super(SMPL, self).forward(*args, **kwargs)
74 | extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices)
75 | joints = torch.cat([smpl_output.joints, extra_joints], dim=1)
76 | joints = joints[:, self.joint_map, :]
77 | output = ModelOutput(vertices=smpl_output.vertices,
78 | global_orient=smpl_output.global_orient,
79 | body_pose=smpl_output.body_pose,
80 | joints=joints,
81 | betas=smpl_output.betas,
82 | full_pose=smpl_output.full_pose)
83 | return output
84 |
85 |
86 | def get_smpl_faces():
87 | smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
88 | return smpl.faces
--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import argparse
18 | from yacs.config import CfgNode as CN
19 |
20 | # CONSTANTS
21 | # You may modify them at will
22 | VIBE_DB_DIR = 'data/vibe_db'
23 | AMASS_DIR = 'data/amass'
24 | INSTA_DIR = 'data/insta_variety'
25 | MPII3D_DIR = 'data/mpi_inf_3dhp'
26 | THREEDPW_DIR = 'data/3dpw'
27 | PENNACTION_DIR = 'data/penn_action'
28 | POSETRACK_DIR = 'data/posetrack'
29 | VIBE_DATA_DIR = 'data/vibe_data'
30 |
31 | # Configuration variables
32 | cfg = CN()
33 |
34 | cfg.OUTPUT_DIR = 'results'
35 | cfg.EXP_NAME = 'default'
36 | cfg.DEVICE = 'cuda'
37 | cfg.DEBUG = True
38 | cfg.LOGDIR = ''
39 | cfg.NUM_WORKERS = 8
40 | cfg.DEBUG_FREQ = 1000
41 | cfg.SEED_VALUE = -1
42 |
43 | cfg.CUDNN = CN()
44 | cfg.CUDNN.BENCHMARK = True
45 | cfg.CUDNN.DETERMINISTIC = False
46 | cfg.CUDNN.ENABLED = True
47 |
48 | cfg.TRAIN = CN()
49 | cfg.TRAIN.DATASETS_2D = ['Insta']
50 | cfg.TRAIN.DATASETS_3D = ['MPII3D']
51 | cfg.TRAIN.DATASET_EVAL = 'ThreeDPW'
52 | cfg.TRAIN.BATCH_SIZE = 32
53 | cfg.TRAIN.DATA_2D_RATIO = 0.5
54 | cfg.TRAIN.START_EPOCH = 0
55 | cfg.TRAIN.END_EPOCH = 5
56 | cfg.TRAIN.PRETRAINED_REGRESSOR = ''
57 | cfg.TRAIN.PRETRAINED = ''
58 | cfg.TRAIN.RESUME = ''
59 | cfg.TRAIN.NUM_ITERS_PER_EPOCH = 1000
60 | cfg.TRAIN.LR_PATIENCE = 5
61 |
62 | # <====== generator optimizer
63 | cfg.TRAIN.GEN_OPTIM = 'Adam'
64 | cfg.TRAIN.GEN_LR = 1e-4
65 | cfg.TRAIN.GEN_WD = 1e-4
66 | cfg.TRAIN.GEN_MOMENTUM = 0.9
67 |
68 | # <====== motion discriminator optimizer
69 | cfg.TRAIN.MOT_DISCR = CN()
70 | cfg.TRAIN.MOT_DISCR.OPTIM = 'SGD'
71 | cfg.TRAIN.MOT_DISCR.LR = 1e-2
72 | cfg.TRAIN.MOT_DISCR.WD = 1e-4
73 | cfg.TRAIN.MOT_DISCR.MOMENTUM = 0.9
74 | cfg.TRAIN.MOT_DISCR.UPDATE_STEPS = 1
75 | cfg.TRAIN.MOT_DISCR.FEATURE_POOL = 'concat'
76 | cfg.TRAIN.MOT_DISCR.HIDDEN_SIZE = 1024
77 | cfg.TRAIN.MOT_DISCR.NUM_LAYERS = 1
78 | cfg.TRAIN.MOT_DISCR.ATT = CN()
79 | cfg.TRAIN.MOT_DISCR.ATT.SIZE = 1024
80 | cfg.TRAIN.MOT_DISCR.ATT.LAYERS = 1
81 | cfg.TRAIN.MOT_DISCR.ATT.DROPOUT = 0.1
82 |
83 | cfg.DATASET = CN()
84 | cfg.DATASET.SEQLEN = 20
85 | cfg.DATASET.OVERLAP = 0.5
86 |
87 | cfg.LOSS = CN()
88 | cfg.LOSS.KP_2D_W = 60.
89 | cfg.LOSS.KP_3D_W = 30.
90 | cfg.LOSS.SHAPE_W = 0.001
91 | cfg.LOSS.POSE_W = 1.0
92 | cfg.LOSS.D_MOTION_LOSS_W = 1.
93 |
94 | cfg.MODEL = CN()
95 |
96 | cfg.MODEL.TEMPORAL_TYPE = 'gru'
97 |
98 | # GRU model hyperparams
99 | cfg.MODEL.TGRU = CN()
100 | cfg.MODEL.TGRU.NUM_LAYERS = 1
101 | cfg.MODEL.TGRU.ADD_LINEAR = False
102 | cfg.MODEL.TGRU.RESIDUAL = False
103 | cfg.MODEL.TGRU.HIDDEN_SIZE = 2048
104 | cfg.MODEL.TGRU.BIDIRECTIONAL = False
105 |
106 |
107 | def get_cfg_defaults():
108 | """Get a yacs CfgNode object with default values for my_project."""
109 | # Return a clone so that the defaults will not be altered
110 | # This is for the "local variable" use pattern
111 | return cfg.clone()
112 |
113 |
114 | def update_cfg(cfg_file):
115 | cfg = get_cfg_defaults()
116 | cfg.merge_from_file(cfg_file)
117 | return cfg.clone()
118 |
119 |
120 | def parse_args():
121 | parser = argparse.ArgumentParser()
122 | parser.add_argument('--cfg', type=str, help='cfg file path')
123 |
124 | args = parser.parse_args()
125 | print(args, end='\n\n')
126 |
127 | cfg_file = args.cfg
128 | if args.cfg is not None:
129 | cfg = update_cfg(args.cfg)
130 | else:
131 | cfg = get_cfg_defaults()
132 |
133 | return cfg, cfg_file
134 |
--------------------------------------------------------------------------------
/lib/data_utils/feature_extractor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import torch
19 | import torchvision
20 | import numpy as np
21 | import matplotlib.pyplot as plt
22 |
23 | from lib.utils.vis import batch_visualize_preds
24 | from lib.data_utils.img_utils import get_single_image_crop, convert_cvimg_to_tensor
25 |
26 |
27 | def extract_features(model, video, bbox, debug=False, batch_size=200, kp_2d=None, dataset=None, scale=1.3):
28 | '''
29 | :param model: pretrained HMR model, use lib/models/hmr.py:get_pretrained_hmr()
30 | :param video: video filename, torch.Tensor in shape (num_frames,W,H,C)
31 | :param bbox: bbox array in shape (T,4)
32 | :param debug: boolean, true if you want to debug HMR predictions
33 | :param batch_size: batch size for HMR input
34 | :return: features: resnet50 features np.ndarray -> shape (num_frames, 4)
35 | '''
36 | device = 'cuda'
37 |
38 | if isinstance(video, torch.Tensor) or isinstance(video, np.ndarray):
39 | video = video
40 | elif isinstance(video, str):
41 | if os.path.isfile(video):
42 | video, _, _ = torchvision.io.read_video(video)
43 | else:
44 | raise ValueError(f'{video} is not a valid file.')
45 | else:
46 | raise ValueError(f'Unknown type {type(video)} for video object')
47 |
48 | # For debugging ground truth 2d keypoints
49 | if debug and kp_2d is not None:
50 | import cv2
51 | if isinstance(video[0], np.str_):
52 | print(video[0])
53 | frame = cv2.cvtColor(cv2.imread(video[0]), cv2.COLOR_BGR2RGB)
54 | elif isinstance(video[0], np.ndarray):
55 | frame = video[0]
56 | else:
57 | frame = video[0].numpy()
58 | for i in range(kp_2d.shape[1]):
59 | frame = cv2.circle(
60 | frame.copy(),
61 | (int(kp_2d[0,i,0]), int(kp_2d[0,i,1])),
62 | thickness=3,
63 | color=(255,0,0),
64 | radius=3,
65 | )
66 |
67 | plt.imshow(frame)
68 | plt.show()
69 |
70 | if dataset == 'insta':
71 | video = torch.cat(
72 | [convert_cvimg_to_tensor(image).unsqueeze(0) for image in video], dim=0
73 | ).to(device)
74 | else:
75 | # crop bbox locations
76 | video = torch.cat(
77 | [get_single_image_crop(image, bbox, scale=scale).unsqueeze(0) for image, bbox in zip(video, bbox)], dim=0
78 | ).to(device)
79 |
80 | features = []
81 |
82 | # split video into batches of frames
83 | frames = torch.split(video, batch_size)
84 |
85 | with torch.no_grad():
86 | for images in frames:
87 |
88 | if not debug:
89 | pred = model.feature_extractor(images)
90 | features.append(pred.cpu())
91 | del pred, images
92 | else:
93 | preds = model(images)
94 | dataset = 'spin' # dataset if dataset else 'common'
95 | result_image = batch_visualize_preds(
96 | images,
97 | preds[-1],
98 | target_exists=False,
99 | max_images=4,
100 | dataset=dataset,
101 | )
102 |
103 | plt.figure(figsize=(19.2, 10.8))
104 | plt.axis('off')
105 | plt.imshow(result_image)
106 | plt.show()
107 |
108 | del preds, images
109 | return 0
110 |
111 | features = torch.cat(features, dim=0)
112 |
113 | return features.numpy()
114 |
--------------------------------------------------------------------------------
/lib/data_utils/penn_action_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import sys
18 | sys.path.append('.')
19 |
20 | import glob
21 | import torch
22 | import joblib
23 | import argparse
24 | from tqdm import tqdm
25 | import os.path as osp
26 | from skimage import io
27 | from scipy.io import loadmat
28 |
29 | from lib.models import spin
30 | from lib.data_utils.kp_utils import *
31 | from lib.core.config import VIBE_DB_DIR
32 | from lib.data_utils.img_utils import get_bbox_from_kp2d
33 | from lib.data_utils.feature_extractor import extract_features
34 |
35 |
36 | def calc_kpt_bound(kp_2d):
37 | MAX_COORD = 10000
38 | x = kp_2d[:, 0]
39 | y = kp_2d[:, 1]
40 | z = kp_2d[:, 2]
41 | u = MAX_COORD
42 | d = -1
43 | l = MAX_COORD
44 | r = -1
45 | for idx, vis in enumerate(z):
46 | if vis == 0: # skip invisible joint
47 | continue
48 | u = min(u, y[idx])
49 | d = max(d, y[idx])
50 | l = min(l, x[idx])
51 | r = max(r, x[idx])
52 | return u, d, l, r
53 |
54 |
55 | def load_mat(path):
56 | mat = loadmat(path)
57 | del mat['pose'], mat['__header__'], mat['__globals__'], mat['__version__'], mat['train'], mat['action']
58 | mat['nframes'] = mat['nframes'][0][0]
59 |
60 | return mat
61 |
62 |
63 | def read_data(folder):
64 | dataset = {
65 | 'img_name' : [],
66 | 'joints2D': [],
67 | 'bbox': [],
68 | 'vid_name': [],
69 | 'features': [],
70 | }
71 |
72 | model = spin.get_pretrained_hmr()
73 |
74 | file_names = sorted(glob.glob(folder + '/labels/'+'*.mat'))
75 |
76 | for fname in tqdm(file_names):
77 | vid_dict=load_mat(fname)
78 | imgs = sorted(glob.glob(folder + '/frames/'+ fname.strip().split('/')[-1].split('.')[0]+'/*.jpg'))
79 | kp_2d = np.zeros((vid_dict['nframes'], 13, 3))
80 | perm_idxs = get_perm_idxs('pennaction', 'common')
81 |
82 | kp_2d[:, :, 0] = vid_dict['x']
83 | kp_2d[:, :, 1] = vid_dict['y']
84 | kp_2d[:, :, 2] = vid_dict['visibility']
85 | kp_2d = kp_2d[:, perm_idxs, :]
86 |
87 | # fix inconsistency
88 | n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3))
89 | n_kp_2d[:, :12, :] = kp_2d[:, :-1, :]
90 | n_kp_2d[:, 13, :] = kp_2d[:, 12, :]
91 | kp_2d = n_kp_2d
92 |
93 | bbox = np.zeros((vid_dict['nframes'], 4))
94 |
95 | for fr_id, fr in enumerate(kp_2d):
96 | u, d, l, r = calc_kpt_bound(fr)
97 | center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32)
98 | c_x, c_y = center[0], center[1]
99 | w, h = r - l, d - u
100 | w = h = np.where(w / h > 1, w, h)
101 |
102 | bbox[fr_id,:] = np.array([c_x, c_y, w, h])
103 |
104 | dataset['vid_name'].append(np.array([f'{fname}']* vid_dict['nframes']))
105 | dataset['img_name'].append(np.array(imgs))
106 | dataset['joints2D'].append(kp_2d)
107 | dataset['bbox'].append(bbox)
108 |
109 | features = extract_features(model, np.array(imgs) , bbox, dataset='pennaction', debug=False)
110 | dataset['features'].append(features)
111 |
112 | for k in dataset.keys():
113 | dataset[k] = np.array(dataset[k])
114 | for k in dataset.keys():
115 | dataset[k] = np.concatenate(dataset[k])
116 |
117 | return dataset
118 |
119 |
120 | if __name__ == '__main__':
121 | parser = argparse.ArgumentParser()
122 | parser.add_argument('--dir', type=str, help='dataset directory', default='data/pennaction')
123 | args = parser.parse_args()
124 |
125 | dataset = read_data(args.dir)
126 | joblib.dump(dataset, osp.join(VIBE_DB_DIR, 'pennaction_train_db.pt'))
127 |
128 |
--------------------------------------------------------------------------------
/lib/utils/smooth_bbox.py:
--------------------------------------------------------------------------------
1 | # This script is borrowed from https://github.com/akanazawa/human_dynamics/blob/master/src/util/smooth_bbox.py
2 | # Adhere to their licence to use this script
3 |
4 | import numpy as np
5 | import scipy.signal as signal
6 | from scipy.ndimage.filters import gaussian_filter1d
7 |
8 |
9 | def get_smooth_bbox_params(kps, vis_thresh=2, kernel_size=11, sigma=3):
10 | """
11 | Computes smooth bounding box parameters from keypoints:
12 | 1. Computes bbox by rescaling the person to be around 150 px.
13 | 2. Linearly interpolates bbox params for missing annotations.
14 | 3. Median filtering
15 | 4. Gaussian filtering.
16 |
17 | Recommended thresholds:
18 | * detect-and-track: 0
19 | * 3DPW: 0.1
20 |
21 | Args:
22 | kps (list): List of kps (Nx3) or None.
23 | vis_thresh (float): Threshold for visibility.
24 | kernel_size (int): Kernel size for median filtering (must be odd).
25 | sigma (float): Sigma for gaussian smoothing.
26 |
27 | Returns:
28 | Smooth bbox params [cx, cy, scale], start index, end index
29 | """
30 | bbox_params, start, end = get_all_bbox_params(kps, vis_thresh)
31 | smoothed = smooth_bbox_params(bbox_params, kernel_size, sigma)
32 | smoothed = np.vstack((np.zeros((start, 3)), smoothed))
33 | return smoothed, start, end
34 |
35 |
36 | def kp_to_bbox_param(kp, vis_thresh):
37 | """
38 | Finds the bounding box parameters from the 2D keypoints.
39 |
40 | Args:
41 | kp (Kx3): 2D Keypoints.
42 | vis_thresh (float): Threshold for visibility.
43 |
44 | Returns:
45 | [center_x, center_y, scale]
46 | """
47 | if kp is None:
48 | return
49 | vis = kp[:, 2] > vis_thresh
50 | if not np.any(vis):
51 | return
52 | min_pt = np.min(kp[vis, :2], axis=0)
53 | max_pt = np.max(kp[vis, :2], axis=0)
54 | person_height = np.linalg.norm(max_pt - min_pt)
55 | if person_height < 0.5:
56 | return
57 | center = (min_pt + max_pt) / 2.
58 | scale = 150. / person_height
59 | return np.append(center, scale)
60 |
61 |
62 | def get_all_bbox_params(kps, vis_thresh=2):
63 | """
64 | Finds bounding box parameters for all keypoints.
65 |
66 | Look for sequences in the middle with no predictions and linearly
67 | interpolate the bbox params for those
68 |
69 | Args:
70 | kps (list): List of kps (Kx3) or None.
71 | vis_thresh (float): Threshold for visibility.
72 |
73 | Returns:
74 | bbox_params, start_index (incl), end_index (excl)
75 | """
76 | # keeps track of how many indices in a row with no prediction
77 | num_to_interpolate = 0
78 | start_index = -1
79 | bbox_params = np.empty(shape=(0, 3), dtype=np.float32)
80 |
81 | for i, kp in enumerate(kps):
82 | bbox_param = kp_to_bbox_param(kp, vis_thresh=vis_thresh)
83 | if bbox_param is None:
84 | num_to_interpolate += 1
85 | continue
86 |
87 | if start_index == -1:
88 | # Found the first index with a prediction!
89 | start_index = i
90 | num_to_interpolate = 0
91 |
92 | if num_to_interpolate > 0:
93 | # Linearly interpolate each param.
94 | previous = bbox_params[-1]
95 | # This will be 3x(n+2)
96 | interpolated = np.array(
97 | [np.linspace(prev, curr, num_to_interpolate + 2)
98 | for prev, curr in zip(previous, bbox_param)])
99 | bbox_params = np.vstack((bbox_params, interpolated.T[1:-1]))
100 | num_to_interpolate = 0
101 | bbox_params = np.vstack((bbox_params, bbox_param))
102 |
103 | return bbox_params, start_index, i - num_to_interpolate + 1
104 |
105 |
106 | def smooth_bbox_params(bbox_params, kernel_size=11, sigma=8):
107 | """
108 | Applies median filtering and then gaussian filtering to bounding box
109 | parameters.
110 |
111 | Args:
112 | bbox_params (Nx3): [cx, cy, scale].
113 | kernel_size (int): Kernel size for median filtering (must be odd).
114 | sigma (float): Sigma for gaussian smoothing.
115 |
116 | Returns:
117 | Smoothed bounding box parameters (Nx3).
118 | """
119 | smoothed = np.array([signal.medfilt(param, kernel_size)
120 | for param in bbox_params.T]).T
121 | return np.array([gaussian_filter1d(traj, sigma) for traj in smoothed.T]).T
122 |
--------------------------------------------------------------------------------
/lib/utils/renderer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import math
18 | import trimesh
19 | import pyrender
20 | import numpy as np
21 | from pyrender.constants import RenderFlags
22 | from lib.models.smpl import get_smpl_faces
23 |
24 |
25 | class WeakPerspectiveCamera(pyrender.Camera):
26 | def __init__(self,
27 | scale,
28 | translation,
29 | znear=pyrender.camera.DEFAULT_Z_NEAR,
30 | zfar=None,
31 | name=None):
32 | super(WeakPerspectiveCamera, self).__init__(
33 | znear=znear,
34 | zfar=zfar,
35 | name=name,
36 | )
37 | self.scale = scale
38 | self.translation = translation
39 |
40 | def get_projection_matrix(self, width=None, height=None):
41 | P = np.eye(4)
42 | P[0, 0] = self.scale[0]
43 | P[1, 1] = self.scale[1]
44 | P[0, 3] = self.translation[0] * self.scale[0]
45 | P[1, 3] = -self.translation[1] * self.scale[1]
46 | P[2, 2] = -1
47 | return P
48 |
49 |
50 | class Renderer:
51 | def __init__(self, resolution=(224,224), orig_img=False, wireframe=False):
52 | self.resolution = resolution
53 |
54 | self.faces = get_smpl_faces()
55 | self.orig_img = orig_img
56 | self.wireframe = wireframe
57 | self.renderer = pyrender.OffscreenRenderer(
58 | viewport_width=self.resolution[0],
59 | viewport_height=self.resolution[1],
60 | point_size=1.0
61 | )
62 |
63 | # set the scene
64 | self.scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0], ambient_light=(0.3, 0.3, 0.3))
65 |
66 | light = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=1)
67 |
68 | light_pose = np.eye(4)
69 | light_pose[:3, 3] = [0, -1, 1]
70 | self.scene.add(light, pose=light_pose)
71 |
72 | light_pose[:3, 3] = [0, 1, 1]
73 | self.scene.add(light, pose=light_pose)
74 |
75 | light_pose[:3, 3] = [1, 1, 2]
76 | self.scene.add(light, pose=light_pose)
77 |
78 | def render(self, img, verts, cam, angle=None, axis=None, mesh_filename=None, color=[1.0, 1.0, 0.9]):
79 |
80 | mesh = trimesh.Trimesh(vertices=verts, faces=self.faces, process=False)
81 |
82 | Rx = trimesh.transformations.rotation_matrix(math.radians(180), [1, 0, 0])
83 | mesh.apply_transform(Rx)
84 |
85 | if mesh_filename is not None:
86 | mesh.export(mesh_filename)
87 |
88 | if angle and axis:
89 | R = trimesh.transformations.rotation_matrix(math.radians(angle), axis)
90 | mesh.apply_transform(R)
91 |
92 | sx, sy, tx, ty = cam
93 |
94 | camera = WeakPerspectiveCamera(
95 | scale=[sx, sy],
96 | translation=[tx, ty],
97 | zfar=1000.
98 | )
99 |
100 | material = pyrender.MetallicRoughnessMaterial(
101 | metallicFactor=0.0,
102 | alphaMode='OPAQUE',
103 | baseColorFactor=(color[0], color[1], color[2], 1.0)
104 | )
105 |
106 | mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
107 |
108 | mesh_node = self.scene.add(mesh, 'mesh')
109 |
110 | camera_pose = np.eye(4)
111 | cam_node = self.scene.add(camera, pose=camera_pose)
112 |
113 | if self.wireframe:
114 | render_flags = RenderFlags.RGBA | RenderFlags.ALL_WIREFRAME
115 | else:
116 | render_flags = RenderFlags.RGBA
117 |
118 | rgb, _ = self.renderer.render(self.scene, flags=render_flags)
119 | valid_mask = (rgb[:, :, -1] > 0)[:, :, np.newaxis]
120 | output_img = rgb[:, :, :-1] * valid_mask + (1 - valid_mask) * img
121 | image = output_img.astype(np.uint8)
122 |
123 | self.scene.remove_node(mesh_node)
124 | self.scene.remove_node(cam_node)
125 |
126 | return image
127 |
--------------------------------------------------------------------------------
/lib/data_utils/amass_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import joblib
19 | import argparse
20 | import numpy as np
21 | import os.path as osp
22 | from tqdm import tqdm
23 |
24 | from lib.core.config import VIBE_DB_DIR
25 |
26 | dict_keys = ['betas', 'dmpls', 'gender', 'mocap_framerate', 'poses', 'trans']
27 |
28 | # extract SMPL joints from SMPL-H model
29 | joints_to_use = np.array([
30 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31 | 11, 12, 13, 14, 15, 16, 17, 18, 19,
32 | 20, 21, 22, 37
33 | ])
34 | joints_to_use = np.arange(0,156).reshape((-1,3))[joints_to_use].reshape(-1)
35 |
36 | all_sequences = [
37 | 'ACCAD',
38 | 'BioMotionLab_NTroje',
39 | 'CMU',
40 | 'EKUT',
41 | 'Eyes_Japan_Dataset',
42 | 'HumanEva',
43 | 'KIT',
44 | 'MPI_HDM05',
45 | 'MPI_Limits',
46 | 'MPI_mosh',
47 | 'SFU',
48 | 'SSM_synced',
49 | 'TCD_handMocap',
50 | 'TotalCapture',
51 | 'Transitions_mocap',
52 | ]
53 |
54 | def read_data(folder, sequences):
55 | # sequences = [osp.join(folder, x) for x in sorted(os.listdir(folder)) if osp.isdir(osp.join(folder, x))]
56 |
57 | if sequences == 'all':
58 | sequences = all_sequences
59 |
60 | db = {
61 | 'theta': [],
62 | 'vid_name': [],
63 | }
64 |
65 | for seq_name in sequences:
66 | print(f'Reading {seq_name} sequence...')
67 | seq_folder = osp.join(folder, seq_name)
68 |
69 | thetas, vid_names = read_single_sequence(seq_folder, seq_name)
70 | seq_name_list = np.array([seq_name]*thetas.shape[0])
71 | print(seq_name, 'number of videos', thetas.shape[0])
72 | db['theta'].append(thetas)
73 | db['vid_name'].append(vid_names)
74 |
75 | db['theta'] = np.concatenate(db['theta'], axis=0)
76 | db['vid_name'] = np.concatenate(db['vid_name'], axis=0)
77 |
78 | return db
79 |
80 |
81 |
82 | def read_single_sequence(folder, seq_name):
83 | subjects = os.listdir(folder)
84 |
85 | thetas = []
86 | vid_names = []
87 |
88 | for subject in tqdm(subjects):
89 | actions = [x for x in os.listdir(osp.join(folder, subject)) if x.endswith('.npz')]
90 |
91 | for action in actions:
92 | fname = osp.join(folder, subject, action)
93 |
94 | if fname.endswith('shape.npz'):
95 | continue
96 |
97 | data = np.load(fname)
98 |
99 | pose = data['poses'][:, joints_to_use]
100 |
101 | if pose.shape[0] < 60:
102 | continue
103 |
104 | shape = np.repeat(data['betas'][:10][np.newaxis], pose.shape[0], axis=0)
105 | theta = np.concatenate([pose,shape], axis=1)
106 | vid_name = np.array([f'{seq_name}_{subject}_{action[:-4]}']*pose.shape[0])
107 |
108 | vid_names.append(vid_name)
109 | thetas.append(theta)
110 |
111 | return np.concatenate(thetas, axis=0), np.concatenate(vid_names, axis=0)
112 |
113 |
114 | def read_seq_data(folder, nsubjects, fps):
115 | subjects = os.listdir(folder)
116 | sequences = {}
117 |
118 | assert nsubjects < len(subjects), 'nsubjects should be less than len(subjects)'
119 |
120 | for subject in subjects[:nsubjects]:
121 | actions = os.listdir(osp.join(folder, subject))
122 |
123 | for action in actions:
124 | data = np.load(osp.join(folder, subject, action))
125 | mocap_framerate = int(data['mocap_framerate'])
126 | sampling_freq = mocap_framerate // fps
127 | sequences[(subject, action)] = data['poses'][0::sampling_freq, joints_to_use]
128 |
129 | train_set = {}
130 | test_set = {}
131 |
132 | for i, (k,v) in enumerate(sequences.items()):
133 | if i < len(sequences.keys()) - len(sequences.keys()) // 4:
134 | train_set[k] = v
135 | else:
136 | test_set[k] = v
137 |
138 | return train_set, test_set
139 |
140 | if __name__ == '__main__':
141 | parser = argparse.ArgumentParser()
142 | parser.add_argument('--dir', type=str, help='dataset directory', default='data/amass')
143 | args = parser.parse_args()
144 |
145 | db = read_data(args.dir, sequences=all_sequences)
146 | db_file = osp.join(VIBE_DB_DIR, 'amass_db.pt')
147 | print(f'Saving AMASS dataset to {db_file}')
148 | joblib.dump(db, db_file)
149 |
--------------------------------------------------------------------------------
/lib/dataset/dataset_2d.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import torch
19 | import random
20 | import logging
21 | import numpy as np
22 | import os.path as osp
23 | import joblib
24 |
25 | from torch.utils.data import Dataset
26 |
27 | from lib.core.config import VIBE_DB_DIR
28 | from lib.data_utils.kp_utils import convert_kps
29 | from lib.data_utils.img_utils import normalize_2d_kp, transfrom_keypoints, split_into_chunks
30 |
31 | logger = logging.getLogger(__name__)
32 |
33 | class Dataset2D(Dataset):
34 | def __init__(self, seqlen, overlap=0.,
35 | folder=None, dataset_name=None, debug=False):
36 |
37 | self.folder = folder
38 | self.dataset_name = dataset_name
39 | self.seqlen = seqlen
40 | self.stride = int(seqlen * (1-overlap))
41 | self.debug = debug
42 | self.db = self.load_db()
43 | self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
44 |
45 |
46 | def __len__(self):
47 | return len(self.vid_indices)
48 |
49 | def __getitem__(self, index):
50 | return self.get_single_item(index)
51 |
52 | def load_db(self):
53 | set = 'train'
54 |
55 | db_file = osp.join(VIBE_DB_DIR, f'{self.dataset_name}_{set}_db.pt')
56 |
57 | if osp.isfile(db_file):
58 | db = joblib.load(db_file)
59 | else:
60 | raise ValueError(f'{db_file} do not exists')
61 |
62 | print(f'Loaded {self.dataset_name} dataset from {db_file}')
63 | return db
64 |
65 | def get_single_item(self, index):
66 | start_index, end_index = self.vid_indices[index]
67 |
68 | kp_2d = self.db['joints2D'][start_index:end_index+1]
69 | if self.dataset_name != 'posetrack':
70 | kp_2d = convert_kps(kp_2d, src=self.dataset_name, dst='spin')
71 | kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
72 |
73 | bbox = self.db['bbox'][start_index:end_index+1]
74 |
75 | input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()
76 |
77 |
78 | for idx in range(self.seqlen):
79 | # crop image and transform 2d keypoints
80 | kp_2d[idx,:,:2], trans = transfrom_keypoints(
81 | kp_2d=kp_2d[idx,:,:2],
82 | center_x=bbox[idx,0],
83 | center_y=bbox[idx,1],
84 | width=bbox[idx,2],
85 | height=bbox[idx,3],
86 | patch_width=224,
87 | patch_height=224,
88 | do_augment=False,
89 | )
90 |
91 | kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
92 | kp_2d_tensor[idx] = kp_2d[idx]
93 |
94 | vid_name = self.db['vid_name'][start_index:end_index+1]
95 | frame_id = self.db['img_name'][start_index:end_index+1].astype(str)
96 | instance_id = np.array([v+f for v,f in zip(vid_name, frame_id)])
97 |
98 | target = {
99 | 'features': input,
100 | 'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
101 | # 'instance_id': instance_id,
102 | }
103 |
104 | if self.debug:
105 | from lib.data_utils.img_utils import get_single_image_crop
106 |
107 | vid_name = self.db['vid_name'][start_index]
108 |
109 | if self.dataset_name == 'pennaction':
110 | vid_folder = "frames"
111 | vid_name = vid_name.split('/')[-1].split('.')[0]
112 | img_id = "img_name"
113 | elif self.dataset_name == 'posetrack':
114 | vid_folder = osp.join('images', vid_name.split('/')[-2])
115 | vid_name = vid_name.split('/')[-1].split('.')[0]
116 | img_id = "img_name"
117 | else:
118 | vid_name = '_'.join(vid_name.split('_')[:-1])
119 | vid_folder = 'imageFiles'
120 | img_id= 'frame_id'
121 | f = osp.join(self.folder, vid_folder, vid_name)
122 | video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')]
123 | frame_idxs = self.db[img_id][start_index:end_index + 1]
124 | if self.dataset_name == 'pennaction' or self.dataset_name == 'posetrack':
125 | video = frame_idxs
126 | else:
127 | video = [video_file_list[i] for i in frame_idxs]
128 |
129 | video = torch.cat(
130 | [get_single_image_crop(image, bbox).unsqueeze(0) for image, bbox in zip(video, bbox)], dim=0
131 | )
132 |
133 | target['video'] = video
134 |
135 | return target
136 |
137 |
138 |
--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import yaml
19 | import time
20 | import torch
21 | import shutil
22 | import logging
23 | import operator
24 | from tqdm import tqdm
25 | from os import path as osp
26 | from functools import reduce
27 | from typing import List, Union
28 |
29 |
30 | def move_dict_to_device(dict, device, tensor2float=False):
31 | for k,v in dict.items():
32 | if isinstance(v, torch.Tensor):
33 | if tensor2float:
34 | dict[k] = v.float().to(device)
35 | else:
36 | dict[k] = v.to(device)
37 |
38 |
39 | def get_from_dict(dict, keys):
40 | return reduce(operator.getitem, keys, dict)
41 |
42 |
43 | def tqdm_enumerate(iter):
44 | i = 0
45 | for y in tqdm(iter):
46 | yield i, y
47 | i += 1
48 |
49 |
50 | def iterdict(d):
51 | for k,v in d.items():
52 | if isinstance(v, dict):
53 | d[k] = dict(v)
54 | iterdict(v)
55 | return d
56 |
57 |
58 | def accuracy(output, target):
59 | _, pred = output.topk(1)
60 | pred = pred.view(-1)
61 |
62 | correct = pred.eq(target).sum()
63 |
64 | return correct.item(), target.size(0) - correct.item()
65 |
66 |
67 | def lr_decay(optimizer, step, lr, decay_step, gamma):
68 | lr = lr * gamma ** (step/decay_step)
69 | for param_group in optimizer.param_groups:
70 | param_group['lr'] = lr
71 | return lr
72 |
73 |
74 | def step_decay(optimizer, step, lr, decay_step, gamma):
75 | lr = lr * gamma ** (step / decay_step)
76 | for param_group in optimizer.param_groups:
77 | param_group['lr'] = lr
78 | return lr
79 |
80 |
81 | def read_yaml(filename):
82 | return yaml.load(open(filename, 'r'))
83 |
84 |
85 | def write_yaml(filename, object):
86 | with open(filename, 'w') as f:
87 | yaml.dump(object, f)
88 |
89 |
90 | def save_dict_to_yaml(obj, filename, mode='w'):
91 | with open(filename, mode) as f:
92 | yaml.dump(obj, f, default_flow_style=False)
93 |
94 |
95 | def save_to_file(obj, filename, mode='w'):
96 | with open(filename, mode) as f:
97 | f.write(obj)
98 |
99 |
100 | def concatenate_dicts(dict_list, dim=0):
101 | rdict = dict.fromkeys(dict_list[0].keys())
102 | for k in rdict.keys():
103 | rdict[k] = torch.cat([d[k] for d in dict_list], dim=dim)
104 | return rdict
105 |
106 |
107 | def bool_to_string(x: Union[List[bool],bool]) -> Union[List[str],str]:
108 | """
109 | boolean to string conversion
110 | :param x: list or bool to be converted
111 | :return: string converted thing
112 | """
113 | if isinstance(x, bool):
114 | return [str(x)]
115 | for i, j in enumerate(x):
116 | x[i]=str(j)
117 | return x
118 |
119 |
120 | def checkpoint2model(checkpoint, key='gen_state_dict'):
121 | state_dict = checkpoint[key]
122 | print(f'Performance of loaded model on 3DPW is {checkpoint["performance"]:.2f}mm')
123 | # del state_dict['regressor.mean_theta']
124 | return state_dict
125 |
126 |
127 | def get_optimizer(model, optim_type, lr, weight_decay, momentum):
128 | if optim_type in ['sgd', 'SGD']:
129 | opt = torch.optim.SGD(lr=lr, params=model.parameters(), momentum=momentum)
130 | elif optim_type in ['Adam', 'adam', 'ADAM']:
131 | opt = torch.optim.Adam(lr=lr, params=model.parameters(), weight_decay=weight_decay)
132 | else:
133 | raise ModuleNotFoundError
134 | return opt
135 |
136 |
137 | def create_logger(logdir, phase='train'):
138 | os.makedirs(logdir, exist_ok=True)
139 |
140 | log_file = osp.join(logdir, f'{phase}_log.txt')
141 |
142 | head = '%(asctime)-15s %(message)s'
143 | logging.basicConfig(filename=log_file,
144 | format=head)
145 | logger = logging.getLogger()
146 | logger.setLevel(logging.INFO)
147 | console = logging.StreamHandler()
148 | logging.getLogger('').addHandler(console)
149 |
150 | return logger
151 |
152 |
153 | class AverageMeter(object):
154 | def __init__(self):
155 | self.val = 0
156 | self.avg = 0
157 | self.sum = 0
158 | self.count = 0
159 |
160 | def update(self, val, n=1):
161 | self.val = val
162 | self.sum += val * n
163 | self.count += n
164 | self.avg = self.sum / self.count
165 |
166 |
167 | def prepare_output_dir(cfg, cfg_file):
168 |
169 | # ==== create logdir
170 | logtime = time.strftime('%d-%m-%Y_%H-%M-%S')
171 | logdir = f'{logtime}_{cfg.EXP_NAME}'
172 |
173 | logdir = osp.join(cfg.OUTPUT_DIR, logdir)
174 | os.makedirs(logdir, exist_ok=True)
175 | shutil.copy(src=cfg_file, dst=osp.join(cfg.OUTPUT_DIR, 'config.yaml'))
176 |
177 | cfg.LOGDIR = logdir
178 |
179 | # save config
180 | save_dict_to_yaml(cfg, osp.join(cfg.LOGDIR, 'config.yaml'))
181 |
182 | return cfg
183 |
--------------------------------------------------------------------------------
/lib/core/evaluate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import time
18 | import torch
19 | import shutil
20 | import logging
21 | import numpy as np
22 | import os.path as osp
23 | from progress.bar import Bar
24 |
25 | from lib.core.config import VIBE_DATA_DIR
26 | from lib.utils.utils import move_dict_to_device, AverageMeter
27 |
28 | from lib.utils.eval_utils import (
29 | compute_accel,
30 | compute_error_accel,
31 | compute_error_verts,
32 | batch_compute_similarity_transform_torch,
33 | )
34 |
35 | logger = logging.getLogger(__name__)
36 |
37 | class Evaluator():
38 | def __init__(
39 | self,
40 | test_loader,
41 | model,
42 | device=None,
43 | ):
44 | self.test_loader = test_loader
45 | self.model = model
46 | self.device = device
47 |
48 | self.evaluation_accumulators = dict.fromkeys(['pred_j3d', 'target_j3d', 'target_theta', 'pred_verts'])
49 |
50 | if self.device is None:
51 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
52 |
53 | def validate(self):
54 | self.model.eval()
55 |
56 | start = time.time()
57 |
58 | summary_string = ''
59 |
60 | bar = Bar('Validation', fill='#', max=len(self.test_loader))
61 |
62 | if self.evaluation_accumulators is not None:
63 | for k,v in self.evaluation_accumulators.items():
64 | self.evaluation_accumulators[k] = []
65 |
66 | J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()
67 |
68 | for i, target in enumerate(self.test_loader):
69 |
70 | # video = video.to(self.device)
71 | move_dict_to_device(target, self.device)
72 |
73 | # <=============
74 | with torch.no_grad():
75 | inp = target['features']
76 |
77 | preds = self.model(inp, J_regressor=J_regressor)
78 |
79 | # convert to 14 keypoint format for evaluation
80 | # if self.use_spin:
81 | n_kp = preds[-1]['kp_3d'].shape[-2]
82 | pred_j3d = preds[-1]['kp_3d'].view(-1, n_kp, 3).cpu().numpy()
83 | target_j3d = target['kp_3d'].view(-1, n_kp, 3).cpu().numpy()
84 | pred_verts = preds[-1]['verts'].view(-1, 6890, 3).cpu().numpy()
85 | target_theta = target['theta'].view(-1, 85).cpu().numpy()
86 |
87 |
88 | self.evaluation_accumulators['pred_verts'].append(pred_verts)
89 | self.evaluation_accumulators['target_theta'].append(target_theta)
90 |
91 | self.evaluation_accumulators['pred_j3d'].append(pred_j3d)
92 | self.evaluation_accumulators['target_j3d'].append(target_j3d)
93 | # =============>
94 |
95 | batch_time = time.time() - start
96 |
97 | summary_string = f'({i + 1}/{len(self.test_loader)}) | batch: {batch_time * 10.0:.4}ms | ' \
98 | f'Total: {bar.elapsed_td} | ETA: {bar.eta_td:}'
99 |
100 | bar.suffix = summary_string
101 | bar.next()
102 |
103 | bar.finish()
104 |
105 | logger.info(summary_string)
106 |
107 | def evaluate(self):
108 |
109 | for k, v in self.evaluation_accumulators.items():
110 | self.evaluation_accumulators[k] = np.vstack(v)
111 |
112 | pred_j3ds = self.evaluation_accumulators['pred_j3d']
113 | target_j3ds = self.evaluation_accumulators['target_j3d']
114 |
115 | pred_j3ds = torch.from_numpy(pred_j3ds).float()
116 | target_j3ds = torch.from_numpy(target_j3ds).float()
117 |
118 | print(f'Evaluating on {pred_j3ds.shape[0]} number of poses...')
119 | pred_pelvis = (pred_j3ds[:,[2],:] + pred_j3ds[:,[3],:]) / 2.0
120 | target_pelvis = (target_j3ds[:,[2],:] + target_j3ds[:,[3],:]) / 2.0
121 |
122 |
123 | pred_j3ds -= pred_pelvis
124 | target_j3ds -= target_pelvis
125 |
126 | # Absolute error (MPJPE)
127 | errors = torch.sqrt(((pred_j3ds - target_j3ds) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
128 | S1_hat = batch_compute_similarity_transform_torch(pred_j3ds, target_j3ds)
129 | errors_pa = torch.sqrt(((S1_hat - target_j3ds) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
130 | pred_verts = self.evaluation_accumulators['pred_verts']
131 | target_theta = self.evaluation_accumulators['target_theta']
132 |
133 | m2mm = 1000
134 |
135 | pve = np.mean(compute_error_verts(target_theta=target_theta, pred_verts=pred_verts)) * m2mm
136 | accel = np.mean(compute_accel(pred_j3ds)) * m2mm
137 | accel_err = np.mean(compute_error_accel(joints_pred=pred_j3ds, joints_gt=target_j3ds)) * m2mm
138 | mpjpe = np.mean(errors) * m2mm
139 | pa_mpjpe = np.mean(errors_pa) * m2mm
140 |
141 | eval_dict = {
142 | 'mpjpe': mpjpe,
143 | 'pa-mpjpe': pa_mpjpe,
144 | 'pve': pve,
145 | 'accel': accel,
146 | 'accel_err': accel_err
147 | }
148 |
149 | log_str = ' '.join([f'{k.upper()}: {v:.4f},'for k,v in eval_dict.items()])
150 | print(log_str)
151 |
152 | def run(self):
153 | self.validate()
154 | self.evaluate()
--------------------------------------------------------------------------------
/doc/demo.md:
--------------------------------------------------------------------------------
1 | # Demo
2 |
3 | ## Flags
4 |
5 | - `--vid_file (str)`: Path to input video file or a YouTube link. If you provide a YouTube link it will be downloaded
6 | to a temporary folder and then processed.
7 |
8 | - `--output_folder (str)`: Path to folder to store the VIBE predictions and output renderings.
9 |
10 | - `--tracking_method (str), default=bbox`: Defines the tracking method to compute bboxes and tracklets of people in the input video.
11 | Available options are `bbox` or `pose`. `bbox` tracking is available [here](https://github.com/mkocabas/multi-person-tracker)
12 | as a standalone python package. For `pose` tracking, you need to install
13 | [STAF](https://github.com/soulslicer/openpose/tree/staf), extension of OpenPose to
14 | multi-person posetracking recently introduced in [1]().
15 |
16 | - `--detector (str), default=yolo`: Defines the type of detector to be used by `bbox` tracking method if enabled. Available options are
17 | `maskrcnn` and `yolo`. `maskrcnn` is more accurate but slower compared to `yolo`. Refer to [speed comparison](demo.md#runtime-performance) for further information.
18 |
19 | - `--yolo_img_size (int), default=416`: Input image size of YOLO detector.
20 |
21 | - `--tracker_batch_size (int), default=12`: Batch size of the bbox tracker. If you get memory error, you need to reduce it.
22 |
23 | - `--staf_dir (str)`: Path to folder where STAF pose tracker installed. This path should point to the main directory of staf.
24 |
25 | - `--vibe_batch_size (int), default=450`: Batch size of VIBE model.
26 |
27 | - `--display`: Enable this flag if you want to visualize the output of tracking and pose & shape estimation interactively.
28 |
29 | - `--run_smplify`: Enable this flag if you want to refine the results of VIBE using Temporal SMPLify algorithm.
30 | For this option, you have to set `--tracking_method` option to `pose`.
31 |
32 | - `--no_render`: This flag disables the final rendering of VIBE results. Useful if you only want to get VIBE predictions.
33 |
34 | - `--wireframe`: Enable this if you would like to render wireframe meshes in the final rendering.
35 |
36 | - `--sideview`: Render the output meshes from an alternate viewpoint. Default alternate viewpoint is -90 degrees in y axis.
37 | Note that this option doubles the rendering time.
38 |
39 | - `--save_obj`: Save output meshes as .obj files.
40 |
41 | ## Examples
42 | - Run VIBE on a video file using bbox tracker and visualize the results with wireframe meshes:
43 | ```bash
44 | python demo_video.py --vid_file sample_video.mp4 --output_folder output/ --tracking_method bbox --detector maskrcnn --display --wireframe
45 | ```
46 |
47 | - Run VIBE on a YouTube video using pose tracker and run Temporal SMPLify to further refine the predictions:
48 | ```bash
49 | python demo_video.py --vid_file sample_video.mp4 --output_folder output/ --tracking_method pose --display --run_smplify
50 | ```
51 |
52 | - Change the default batch sizes to avoid possible memory errors:
53 | ```bash
54 | python demo_video.py --vid_file sample_video.mp4 --output_folder output/ --tracker_batch_size 2 --vibe_batch_size 64
55 | ```
56 |
57 | ## Output Format
58 |
59 | If demo finishes succesfully, it needs to create a file named `vibe_output.pkl` in the `--output_folder`.
60 | We can inspect what this file contains by:
61 |
62 | ```python
63 | >>> import joblib # you may use native pickle here as well
64 |
65 | >>> output = joblib.load('output/group_dance/vibe_output.pkl')
66 |
67 | >>> print(output.keys())
68 |
69 | dict_keys([1, 2, 3, 4]) # these are the track ids for each subject appearing in the video
70 |
71 | >>> for k,v in output[1].items(): print(k,v.shape)
72 |
73 | pred_cam (n_frames, 3) # weak perspective camera parameters in cropped image space (s,tx,ty)
74 | orig_cam (n_frames, 4) # weak perspective camera parameters in original image space (sx,sy,tx,ty)
75 | verts (n_frames, 6890, 3) # SMPL mesh vertices
76 | pose (n_frames, 72) # SMPL pose parameters
77 | betas (n_frames, 10) # SMPL body shape parameters
78 | joints3d (n_frames, 49, 3) # SMPL 3D joints
79 | joints2d (n_frames, 21, 3) # 2D keypoint detections by STAF if pose tracking enabled otherwise None
80 | bboxes (n_frames, 4) # bbox detections (cx,cy,w,h)
81 | frame_ids (n_frames,) # frame ids in which subject with tracking id #1 appears
82 |
83 | ```
84 | You can find the names & order of 3d joints [here](https://github.com/mkocabas/VIBE/blob/master/lib/data_utils/kp_utils.py#L212) and 2D joints [here](https://github.com/mkocabas/VIBE/blob/master/lib/data_utils/kp_utils.py#L187).
85 |
86 | ## Runtime Performance
87 | Here is the breakdown of runtime speeds per step namely tracking and VIBE. This results are obtained by running VIBE
88 | on a [video](https://www.youtube.com/watch?v=Opry3F6aB1I) containing 5 people.
89 |
90 | ```bash
91 | python demo.py --vid_file https://www.youtube.com/watch?v=Opry3F6aB1I --output_folder output/ --vibe_batch_size 32 --no_render
92 | ```
93 |
94 | | Tracker | GPU | Tracking Time (ms/img) | Tracking FPS | VIBE Time (ms/image) | VIBE FPS | Total FPS |
95 | |-----------------|:---------:|:----------------------:|:------------:|:--------------------:|:--------:|:---------:|
96 | | STAF-pose | RTX2080Ti | 23.2 | 43 | 16.1 | 61 | 21 |
97 | | MaskRCNN-bbox | RTX2080Ti | 68.0 | 15 | 16.1 | 61 | 11 |
98 | | YOLOv3-416-bbox | RTX2080Ti | 12.7 | 79 | 16.1 | 61 | 29 |
99 | | YOLOv3-608-bbox | RTX2080Ti | 22.2 | 45 | 16.1 | 61 | 23 |
100 |
101 | **Note**: Above table does not include the time spent during rendering of the final output.
102 | We use pyrender with GPU accelaration and it takes 2-3 FPS per image. Please let us know if you know any faster alternative.
103 |
104 | ## References
105 | [1] Pose tracker is from [STAF implementation](https://github.com/soulslicer/openpose/tree/staf)
106 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | os.environ['PYOPENGL_PLATFORM'] = 'egl'
19 |
20 | import torch
21 | import pprint
22 | import random
23 | import numpy as np
24 | import torch.backends.cudnn as cudnn
25 | from torch.utils.tensorboard import SummaryWriter
26 |
27 | from lib.core.loss import VIBELoss
28 | from lib.core.trainer import Trainer
29 | from lib.core.config import parse_args
30 | from lib.utils.utils import prepare_output_dir
31 | from lib.models import VIBE, MotionDiscriminator
32 | from lib.dataset.loaders import get_data_loaders
33 | from lib.utils.utils import create_logger, get_optimizer
34 |
35 |
36 | def main(cfg):
37 | if cfg.SEED_VALUE >= 0:
38 | print(f'Seed value for the experiment {cfg.SEED_VALUE}')
39 | os.environ['PYTHONHASHSEED'] = str(cfg.SEED_VALUE)
40 | random.seed(cfg.SEED_VALUE)
41 | torch.manual_seed(cfg.SEED_VALUE)
42 | np.random.seed(cfg.SEED_VALUE)
43 |
44 | logger = create_logger(cfg.LOGDIR, phase='train')
45 |
46 | logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
47 | logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')
48 |
49 | logger.info(pprint.pformat(cfg))
50 |
51 | # cudnn related setting
52 | cudnn.benchmark = cfg.CUDNN.BENCHMARK
53 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
54 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
55 |
56 | writer = SummaryWriter(log_dir=cfg.LOGDIR)
57 | writer.add_text('config', pprint.pformat(cfg), 0)
58 |
59 | # ========= Dataloaders ========= #
60 | data_loaders = get_data_loaders(cfg)
61 |
62 | # ========= Compile Loss ========= #
63 | loss = VIBELoss(
64 | e_loss_weight=cfg.LOSS.KP_2D_W,
65 | e_3d_loss_weight=cfg.LOSS.KP_3D_W,
66 | e_pose_loss_weight=cfg.LOSS.POSE_W,
67 | e_shape_loss_weight=cfg.LOSS.SHAPE_W,
68 | d_motion_loss_weight=cfg.LOSS.D_MOTION_LOSS_W,
69 | )
70 |
71 | # ========= Initialize networks, optimizers and lr_schedulers ========= #
72 | generator = VIBE(
73 | n_layers=cfg.MODEL.TGRU.NUM_LAYERS,
74 | batch_size=cfg.TRAIN.BATCH_SIZE,
75 | seqlen=cfg.DATASET.SEQLEN,
76 | hidden_size=cfg.MODEL.TGRU.HIDDEN_SIZE,
77 | pretrained=cfg.TRAIN.PRETRAINED_REGRESSOR,
78 | add_linear=cfg.MODEL.TGRU.ADD_LINEAR,
79 | bidirectional=cfg.MODEL.TGRU.BIDIRECTIONAL,
80 | use_residual=cfg.MODEL.TGRU.RESIDUAL,
81 | ).to(cfg.DEVICE)
82 |
83 | if cfg.TRAIN.PRETRAINED != '' and os.path.isfile(cfg.TRAIN.PRETRAINED):
84 | checkpoint = torch.load(cfg.TRAIN.PRETRAINED)
85 | best_performance = checkpoint['performance']
86 | generator.load_state_dict(checkpoint['gen_state_dict'])
87 | print(f'==> Loaded pretrained model from {cfg.TRAIN.PRETRAINED}...')
88 | print(f'Performance on 3DPW test set {best_performance}')
89 | else:
90 | print(f'{cfg.TRAIN.PRETRAINED} is not a pretrained model!!!!')
91 |
92 | gen_optimizer = get_optimizer(
93 | model=generator,
94 | optim_type=cfg.TRAIN.GEN_OPTIM,
95 | lr=cfg.TRAIN.GEN_LR,
96 | weight_decay=cfg.TRAIN.GEN_WD,
97 | momentum=cfg.TRAIN.GEN_MOMENTUM,
98 | )
99 |
100 | motion_discriminator = MotionDiscriminator(
101 | rnn_size=cfg.TRAIN.MOT_DISCR.HIDDEN_SIZE,
102 | input_size=69,
103 | num_layers=cfg.TRAIN.MOT_DISCR.NUM_LAYERS,
104 | output_size=1,
105 | feature_pool=cfg.TRAIN.MOT_DISCR.FEATURE_POOL,
106 | attention_size=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.SIZE,
107 | attention_layers=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.LAYERS,
108 | attention_dropout=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.DROPOUT
109 | ).to(cfg.DEVICE)
110 |
111 | dis_motion_optimizer = get_optimizer(
112 | model=motion_discriminator,
113 | optim_type=cfg.TRAIN.MOT_DISCR.OPTIM,
114 | lr=cfg.TRAIN.MOT_DISCR.LR,
115 | weight_decay=cfg.TRAIN.MOT_DISCR.WD,
116 | momentum=cfg.TRAIN.MOT_DISCR.MOMENTUM
117 | )
118 |
119 | motion_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
120 | dis_motion_optimizer,
121 | mode='min',
122 | factor=0.1,
123 | patience=cfg.TRAIN.LR_PATIENCE,
124 | verbose=True,
125 | )
126 |
127 | lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
128 | gen_optimizer,
129 | mode='min',
130 | factor=0.1,
131 | patience=cfg.TRAIN.LR_PATIENCE,
132 | verbose=True,
133 | )
134 |
135 | # ========= Start Training ========= #
136 | Trainer(
137 | data_loaders=data_loaders,
138 | generator=generator,
139 | motion_discriminator=motion_discriminator,
140 | criterion=loss,
141 | dis_motion_optimizer=dis_motion_optimizer,
142 | dis_motion_update_steps=cfg.TRAIN.MOT_DISCR.UPDATE_STEPS,
143 | gen_optimizer=gen_optimizer,
144 | start_epoch=cfg.TRAIN.START_EPOCH,
145 | end_epoch=cfg.TRAIN.END_EPOCH,
146 | device=cfg.DEVICE,
147 | writer=writer,
148 | debug=cfg.DEBUG,
149 | logdir=cfg.LOGDIR,
150 | lr_scheduler=lr_scheduler,
151 | motion_lr_scheduler=motion_lr_scheduler,
152 | resume=cfg.TRAIN.RESUME,
153 | num_iters_per_epoch=cfg.TRAIN.NUM_ITERS_PER_EPOCH,
154 | debug_freq=cfg.DEBUG_FREQ,
155 | ).fit()
156 |
157 |
158 | if __name__ == '__main__':
159 | cfg, cfg_file = parse_args()
160 | cfg = prepare_output_dir(cfg, cfg_file)
161 |
162 | main(cfg)
163 |
--------------------------------------------------------------------------------
/lib/models/vibe.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import torch
19 | import os.path as osp
20 | import torch.nn as nn
21 | import torch.nn.functional as F
22 |
23 | from lib.core.config import VIBE_DATA_DIR
24 | from lib.models.spin import Regressor, hmr
25 |
26 |
27 | class TemporalEncoder(nn.Module):
28 | def __init__(
29 | self,
30 | n_layers=1,
31 | hidden_size=2048,
32 | add_linear=False,
33 | bidirectional=False,
34 | use_residual=True
35 | ):
36 | super(TemporalEncoder, self).__init__()
37 |
38 | self.gru = nn.GRU(
39 | input_size=2048,
40 | hidden_size=hidden_size,
41 | bidirectional=bidirectional,
42 | num_layers=n_layers
43 | )
44 |
45 | self.linear = None
46 | if bidirectional:
47 | self.linear = nn.Linear(hidden_size*2, 2048)
48 | elif add_linear:
49 | self.linear = nn.Linear(hidden_size, 2048)
50 | self.use_residual = use_residual
51 |
52 | def forward(self, x):
53 | n,t,f = x.shape
54 | x = x.permute(1,0,2) # NTF -> TNF
55 | y, _ = self.gru(x)
56 | if self.linear:
57 | y = F.relu(y)
58 | y = self.linear(y.view(-1, y.size(-1)))
59 | y = y.view(t,n,f)
60 | if self.use_residual and y.shape[-1] == 2048:
61 | y = y + x
62 | y = y.permute(1,0,2) # TNF -> NTF
63 | return y
64 |
65 |
66 | class VIBE(nn.Module):
67 | def __init__(
68 | self,
69 | seqlen,
70 | batch_size=64,
71 | n_layers=1,
72 | hidden_size=2048,
73 | add_linear=False,
74 | bidirectional=False,
75 | use_residual=True,
76 | pretrained=osp.join(VIBE_DATA_DIR, 'spin_model_checkpoint.pth.tar'),
77 | ):
78 |
79 | super(VIBE, self).__init__()
80 |
81 | self.seqlen = seqlen
82 | self.batch_size = batch_size
83 |
84 | self.encoder = TemporalEncoder(
85 | n_layers=n_layers,
86 | hidden_size=hidden_size,
87 | bidirectional=bidirectional,
88 | add_linear=add_linear,
89 | use_residual=use_residual,
90 | )
91 |
92 | # regressor can predict cam, pose and shape params in an iterative way
93 | self.regressor = Regressor()
94 |
95 | if pretrained and os.path.isfile(pretrained):
96 | pretrained_dict = torch.load(pretrained)['model']
97 |
98 | self.regressor.load_state_dict(pretrained_dict, strict=False)
99 | print(f'=> loaded pretrained model from \'{pretrained}\'')
100 |
101 |
102 | def forward(self, input, J_regressor=None):
103 | # input size NTF
104 | batch_size, seqlen = input.shape[:2]
105 |
106 | feature = self.encoder(input)
107 | feature = feature.reshape(-1, feature.size(-1))
108 |
109 | smpl_output = self.regressor(feature, J_regressor=J_regressor)
110 | for s in smpl_output:
111 | s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
112 | s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
113 | s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
114 | s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
115 | s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)
116 |
117 | return smpl_output
118 |
119 |
120 | class VIBE_Demo(nn.Module):
121 | def __init__(
122 | self,
123 | seqlen,
124 | batch_size=64,
125 | n_layers=1,
126 | hidden_size=2048,
127 | add_linear=False,
128 | bidirectional=False,
129 | use_residual=True,
130 | pretrained=osp.join(VIBE_DATA_DIR, 'spin_model_checkpoint.pth.tar'),
131 | ):
132 |
133 | super(VIBE_Demo, self).__init__()
134 |
135 | self.seqlen = seqlen
136 | self.batch_size = batch_size
137 |
138 | self.encoder = TemporalEncoder(
139 | n_layers=n_layers,
140 | hidden_size=hidden_size,
141 | bidirectional=bidirectional,
142 | add_linear=add_linear,
143 | use_residual=use_residual,
144 | )
145 |
146 | self.hmr = hmr()
147 | checkpoint = torch.load(pretrained)
148 | self.hmr.load_state_dict(checkpoint['model'], strict=False)
149 |
150 | # regressor can predict cam, pose and shape params in an iterative way
151 | self.regressor = Regressor()
152 |
153 | if pretrained and os.path.isfile(pretrained):
154 | pretrained_dict = torch.load(pretrained)['model']
155 |
156 | self.regressor.load_state_dict(pretrained_dict, strict=False)
157 | print(f'=> loaded pretrained model from \'{pretrained}\'')
158 |
159 |
160 | def forward(self, input, J_regressor=None):
161 | # input size NTF
162 | batch_size, seqlen, nc, h, w = input.shape
163 |
164 | feature = self.hmr.feature_extractor(input.reshape(-1, nc, h, w))
165 |
166 | feature = feature.reshape(batch_size, seqlen, -1)
167 | feature = self.encoder(feature)
168 | feature = feature.reshape(-1, feature.size(-1))
169 |
170 | smpl_output = self.regressor(feature, J_regressor=J_regressor)
171 |
172 | for s in smpl_output:
173 | s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
174 | s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
175 | s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
176 | s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
177 | s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)
178 |
179 | return smpl_output
180 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | License
2 |
3 | Software Copyright License for non-commercial scientific research purposes
4 | Please read carefully the following terms and conditions and any accompanying documentation before you download
5 | and/or use the VIBE model, data and software, (the "Model & Software"), including 3D meshes, software, and scripts.
6 | By downloading and/or using the Model & Software (including downloading, cloning, installing, and any other use
7 | of this github repository), you acknowledge that you have read these terms and conditions, understand them, and
8 | agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use
9 | the Model & Software. Any infringement of the terms of this agreement will automatically terminate your rights
10 | under this License
11 |
12 | Ownership / Licensees
13 | The Software and the associated materials has been developed at the
14 |
15 | Max Planck Institute for Intelligent Systems (hereinafter "MPI").
16 |
17 | Any copyright or patent right is owned by and proprietary material of the
18 |
19 | Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter
20 | collectively “Max-Planck”)
21 |
22 | hereinafter the “Licensor”.
23 |
24 | This software includes the SMPL Body Model. By downloading this software, you are agreeing to be bound by the terms of the SMPL Model License
25 |
26 | https://smpl.is.tue.mpg.de/modellicense
27 |
28 | which is necessary to create SMPL body models.
29 |
30 | SMPL bodies that are generated with VIBE can be distributed freely under the SMPL Body License
31 |
32 | https://smpl.is.tue.mpg.de/bodylicense
33 |
34 | License Grant
35 | Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right:
36 |
37 | To install the Model & Software on computers owned, leased or otherwise controlled by you and/or your organization;
38 | To use the Model & Software for the sole purpose of performing non-commercial scientific research, non-commercial
39 | education, or non-commercial artistic projects;
40 | Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation,
41 | incorporation in a commercial product, use in a commercial service, or production of other artifacts for
42 | commercial purposes. The Model & Software may not be reproduced, modified and/or made available in any form to
43 | any third party without Max-Planck’s prior written permission.
44 |
45 | The Model & Software may not be used for pornographic purposes or to generate pornographic material whether
46 | commercial or not. This license also prohibits the use of the Model & Software to train methods/algorithms/neural
47 | networks/etc. for commercial use of any kind. By downloading the Model & Software,
48 | you agree not to reverse engineer it.
49 |
50 | No Distribution
51 | The Model & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered
52 | for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive
53 | purposes only.
54 |
55 | Disclaimer of Representations and Warranties
56 | You expressly acknowledge and agree that the Model & Software results from basic research, is provided “AS IS”,
57 | may contain errors, and that any use of the Model & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS
58 | OR WARRANTIES OF ANY KIND CONCERNING THE MODEL & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY
59 | LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor
60 | makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of
61 | the Model & Software, (ii) that the use of the Model & Software will not infringe any patents, copyrights or other
62 | intellectual property rights of a third party, and (iii) that the use of the Model & Software will not cause any
63 | damage of any kind to you or a third party.
64 |
65 | Limitation of Liability
66 | Because this Model & Software License Agreement qualifies as a donation, according to Section 521 of the German
67 | Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only.
68 | If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee
69 | for the resulting damage.
70 |
71 | Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have
72 | arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be
73 | liable in accordance with the German Product Liability Act in the event of product liability. The foregoing
74 | applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded.
75 | Patent claims generated through the usage of the Model & Software cannot be directed towards the copyright holders.
76 | The Model & Software is provided in the state of development the licensor defines. If modified or extended by
77 | Licensee, the Licensor makes no claims about the fitness of the Model & Software and is not responsible
78 | for any problems such modifications cause.
79 |
80 | No Maintenance Services
81 | You understand and agree that Licensor is under no obligation to provide either maintenance services,
82 | update services, notices of latent defects, or corrections of defects with regard to the Model & Software.
83 | Licensor nevertheless reserves the right to update, modify, or discontinue the Model & Software at any time.
84 |
85 | Defects of the Model & Software must be notified in writing to the Licensor with a comprehensible description
86 | of the error symptoms. The notification of the defect should enable the reproduction of the error.
87 | The Licensee is encouraged to communicate any use, results, modification or publication.
88 |
89 | Publications using the Model & Software
90 | You acknowledge that the Model & Software is a valuable scientific resource and agree to appropriately reference
91 | the following paper in any publication making use of the Model & Software.
92 |
93 | Citation:
94 |
95 | @inproceedings{VIBE:CVPR:2020,
96 | title = {{VIBE}: Video Inference for Human Body Pose and Shape Estimation},
97 | author = {Kocabas, Muhammed and Athanasiou, Nikos and Black, Michael J.},
98 | booktitle = {Computer Vision and Pattern Recognition (CVPR)},
99 | month = jun,
100 | year = {2020},
101 | month_numeric = {6}
102 | }
103 |
104 | Commercial licensing opportunities
105 | For commercial uses of the Software, please send email to ps-license@tue.mpg.de
106 |
107 | This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention.
108 |
--------------------------------------------------------------------------------
/lib/data_utils/posetrack_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import sys
18 | sys.path.append('.')
19 |
20 | import glob
21 | import joblib
22 | import argparse
23 | import numpy as np
24 | import json
25 | import os.path as osp
26 |
27 | from lib.models import spin
28 | from lib.core.config import VIBE_DB_DIR
29 | from lib.utils.utils import tqdm_enumerate
30 | from lib.data_utils.feature_extractor import extract_features
31 | from lib.data_utils.kp_utils import get_posetrack_original_kp_names, convert_kps
32 |
33 | def read_data(folder, set):
34 | dataset = {
35 | 'img_name' : [] ,
36 | 'joints2D': [],
37 | 'bbox': [],
38 | 'vid_name': [],
39 | 'features': [],
40 | }
41 |
42 | model = spin.get_pretrained_hmr()
43 |
44 | file_names = glob.glob(osp.join(folder, 'posetrack_data/annotations/', f'{set}/*.json'))
45 | file_names = sorted(file_names)
46 | nn_corrupted = 0
47 | tot_frames = 0
48 | min_frame_number = 8
49 |
50 | for fid,fname in tqdm_enumerate(file_names):
51 | if fname == osp.join(folder, 'annotations/train/021133_mpii_train.json'):
52 | continue
53 |
54 | with open(fname, 'r') as entry:
55 | anns = json.load(entry)
56 | # num_frames = anns['images'][0]['nframes']
57 | anns['images'] = [item for item in anns['images'] if item['is_labeled'] ]
58 | num_frames = len(anns['images'])
59 | frame2imgname = dict()
60 | for el in anns['images']:
61 | frame2imgname[el['frame_id']] = el['file_name']
62 |
63 | num_people = -1
64 | for x in anns['annotations']:
65 | if num_people < x['track_id']:
66 | num_people = x['track_id']
67 | num_people += 1
68 | posetrack_joints = get_posetrack_original_kp_names()
69 | idxs = [anns['categories'][0]['keypoints'].index(h) for h in posetrack_joints if h in anns['categories'][0]['keypoints']]
70 | for x in anns['annotations']:
71 | kps = np.array(x['keypoints']).reshape((17,3))
72 | kps = kps[idxs,:]
73 | x['keypoints'] = list(kps.flatten())
74 |
75 | tot_frames += num_people * num_frames
76 | for p_id in range(num_people):
77 |
78 | annot_pid = [(item['keypoints'], item['bbox'], item['image_id'])
79 | for item in anns['annotations']
80 | if item['track_id'] == p_id and not(np.count_nonzero(item['keypoints']) == 0) ]
81 |
82 | if len(annot_pid) < min_frame_number:
83 | nn_corrupted += len(annot_pid)
84 | continue
85 |
86 | bbox = np.zeros((len(annot_pid),4))
87 | # perm_idxs = get_perm_idxs('posetrack', 'common')
88 | kp_2d = np.zeros((len(annot_pid), len(annot_pid[0][0])//3 ,3))
89 | img_paths = np.zeros((len(annot_pid)))
90 |
91 | for i, (key2djnts, bbox_p, image_id) in enumerate(annot_pid):
92 |
93 | if (bbox_p[2]==0 or bbox_p[3]==0) :
94 | nn_corrupted +=1
95 | continue
96 |
97 | img_paths[i] = image_id
98 | key2djnts[2::3] = len(key2djnts[2::3])*[1]
99 |
100 | kp_2d[i,:] = np.array(key2djnts).reshape(int(len(key2djnts)/3),3) # [perm_idxs, :]
101 | for kp_loc in kp_2d[i,:]:
102 | if kp_loc[0] == 0 and kp_loc[1] == 0:
103 | kp_loc[2] = 0
104 |
105 |
106 | x_tl = bbox_p[0]
107 | y_tl = bbox_p[1]
108 | w = bbox_p[2]
109 | h = bbox_p[3]
110 | bbox_p[0] = x_tl + w / 2
111 | bbox_p[1] = y_tl + h / 2
112 | #
113 |
114 | w = h = np.where(w / h > 1, w, h)
115 | w = h = h * 0.8
116 | bbox_p[2] = w
117 | bbox_p[3] = h
118 | bbox[i, :] = bbox_p
119 |
120 | img_paths = list(img_paths)
121 | img_paths = [osp.join(folder, frame2imgname[item]) if item != 0 else 0 for item in img_paths ]
122 |
123 | bbx_idxs = []
124 | for bbx_id, bbx in enumerate(bbox):
125 | if np.count_nonzero(bbx) == 0:
126 | bbx_idxs += [bbx_id]
127 |
128 | kp_2d = np.delete(kp_2d, bbx_idxs, 0)
129 | img_paths = np.delete(np.array(img_paths), bbx_idxs, 0)
130 | bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0)
131 |
132 | # Convert to common 2d keypoint format
133 | if bbox.size == 0 or bbox.shape[0] < min_frame_number:
134 | nn_corrupted += 1
135 | continue
136 |
137 | kp_2d = convert_kps(kp_2d, src='posetrack', dst='spin')
138 |
139 | dataset['vid_name'].append(np.array([f'{fname}_{p_id}']*img_paths.shape[0]))
140 | dataset['img_name'].append(np.array(img_paths))
141 | dataset['joints2D'].append(kp_2d)
142 | dataset['bbox'].append(np.array(bbox))
143 |
144 | # compute_features
145 | features = extract_features(
146 | model,
147 | np.array(img_paths),
148 | bbox,
149 | kp_2d=kp_2d,
150 | dataset='spin',
151 | debug=False,
152 | )
153 |
154 | assert kp_2d.shape[0] == img_paths.shape[0] == bbox.shape[0]
155 |
156 | dataset['features'].append(features)
157 |
158 |
159 | print(nn_corrupted, tot_frames)
160 | for k in dataset.keys():
161 | dataset[k] = np.array(dataset[k])
162 |
163 | for k in dataset.keys():
164 | dataset[k] = np.concatenate(dataset[k])
165 |
166 | for k,v in dataset.items():
167 | print(k, v.shape)
168 |
169 | return dataset
170 |
171 |
172 | if __name__ == '__main__':
173 | parser = argparse.ArgumentParser()
174 | parser.add_argument('--dir', type=str, help='dataset directory', default='data/posetrack')
175 | args = parser.parse_args()
176 |
177 | dataset_train = read_data(args.dir, 'train')
178 | joblib.dump(dataset_train, osp.join(VIBE_DB_DIR, 'posetrack_train_db.pt'))
179 |
--------------------------------------------------------------------------------
/lib/data_utils/threedpw_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import sys
18 | sys.path.append('.')
19 |
20 | import os
21 | import cv2
22 | import torch
23 | import joblib
24 | import argparse
25 | import numpy as np
26 | import pickle as pkl
27 | import os.path as osp
28 | from tqdm import tqdm
29 |
30 | from lib.models import spin
31 | from lib.data_utils.kp_utils import *
32 | from lib.core.config import VIBE_DB_DIR, VIBE_DATA_DIR
33 | from lib.utils.smooth_bbox import get_smooth_bbox_params
34 | from lib.models.smpl import SMPL, SMPL_MODEL_DIR, H36M_TO_J14
35 | from lib.data_utils.feature_extractor import extract_features
36 | from lib.utils.geometry import batch_rodrigues, rotation_matrix_to_angle_axis
37 |
38 | NUM_JOINTS = 24
39 | VIS_THRESH = 0.3
40 | MIN_KP = 6
41 |
42 | def read_data(folder, set, debug=False):
43 |
44 | dataset = {
45 | 'vid_name': [],
46 | 'frame_id': [],
47 | 'joints3D': [],
48 | 'joints2D': [],
49 | 'shape': [],
50 | 'pose': [],
51 | 'bbox': [],
52 | 'img_name': [],
53 | 'features': [],
54 | 'valid': [],
55 | }
56 |
57 | model = spin.get_pretrained_hmr()
58 |
59 | sequences = [x.split('.')[0] for x in os.listdir(osp.join(folder, 'sequenceFiles', set))]
60 |
61 | J_regressor = None
62 |
63 | smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
64 | if set == 'test' or set == 'validation':
65 | J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()
66 |
67 | for i, seq in tqdm(enumerate(sequences)):
68 |
69 | data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl')
70 |
71 | data = pkl.load(open(data_file, 'rb'), encoding='latin1')
72 |
73 | img_dir = osp.join(folder, 'imageFiles', seq)
74 |
75 | num_people = len(data['poses'])
76 | num_frames = len(data['img_frame_ids'])
77 | assert (data['poses2d'][0].shape[0] == num_frames)
78 |
79 | for p_id in range(num_people):
80 | pose = torch.from_numpy(data['poses'][p_id]).float()
81 | shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat(pose.size(0), 1)
82 | trans = torch.from_numpy(data['trans'][p_id]).float()
83 | j2d = data['poses2d'][p_id].transpose(0,2,1)
84 | cam_pose = data['cam_poses']
85 | campose_valid = data['campose_valid'][p_id]
86 |
87 | # ======== Align the mesh params ======== #
88 | rot = pose[:, :3]
89 | rot_mat = batch_rodrigues(rot)
90 |
91 | Rc = torch.from_numpy(cam_pose[:, :3, :3]).float()
92 | Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3))
93 | rot = rotation_matrix_to_angle_axis(Rs)
94 | pose[:, :3] = rot
95 | # ======== Align the mesh params ======== #
96 |
97 | output = smpl(betas=shape, body_pose=pose[:,3:], global_orient=pose[:,:3], transl=trans)
98 | # verts = output.vertices
99 | j3d = output.joints
100 |
101 | if J_regressor is not None:
102 | vertices = output.vertices
103 | J_regressor_batch = J_regressor[None, :].expand(vertices.shape[0], -1, -1).to(vertices.device)
104 | j3d = torch.matmul(J_regressor_batch, vertices)
105 | j3d = j3d[:, H36M_TO_J14, :]
106 |
107 | img_paths = []
108 | for i_frame in range(num_frames):
109 | img_path = os.path.join(img_dir + '/image_{:05d}.jpg'.format(i_frame))
110 | img_paths.append(img_path)
111 |
112 | bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(j2d, vis_thresh=VIS_THRESH, sigma=8)
113 |
114 | # process bbox_params
115 | c_x = bbox_params[:,0]
116 | c_y = bbox_params[:,1]
117 | scale = bbox_params[:,2]
118 | w = h = 150. / scale
119 | w = h = h * 1.1
120 | bbox = np.vstack([c_x,c_y,w,h]).T
121 |
122 | # process keypoints
123 | j2d[:, :, 2] = j2d[:, :, 2] > 0.3 # set the visibility flags
124 | # Convert to common 2d keypoint format
125 | perm_idxs = get_perm_idxs('3dpw', 'common')
126 | perm_idxs += [0, 0] # no neck, top head
127 | j2d = j2d[:, perm_idxs]
128 | j2d[:, 12:, 2] = 0.0
129 |
130 | # print('j2d', j2d[time_pt1:time_pt2].shape)
131 | # print('campose', campose_valid[time_pt1:time_pt2].shape)
132 |
133 | img_paths_array = np.array(img_paths)[time_pt1:time_pt2]
134 | dataset['vid_name'].append(np.array([f'{seq}_{p_id}']*num_frames)[time_pt1:time_pt2])
135 | dataset['frame_id'].append(np.arange(0, num_frames)[time_pt1:time_pt2])
136 | dataset['img_name'].append(img_paths_array)
137 | dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2])
138 | dataset['joints2D'].append(j2d[time_pt1:time_pt2])
139 | dataset['shape'].append(shape.numpy()[time_pt1:time_pt2])
140 | dataset['pose'].append(pose.numpy()[time_pt1:time_pt2])
141 | dataset['bbox'].append(bbox)
142 | dataset['valid'].append(campose_valid[time_pt1:time_pt2])
143 |
144 | features = extract_features(model, img_paths_array, bbox,
145 | kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2)
146 | dataset['features'].append(features)
147 |
148 | for k in dataset.keys():
149 | dataset[k] = np.concatenate(dataset[k])
150 | print(k, dataset[k].shape)
151 |
152 | # Filter out keypoints
153 | indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0]
154 | for k in dataset.keys():
155 | dataset[k] = dataset[k][indices_to_use]
156 |
157 | return dataset
158 |
159 |
160 | if __name__ == '__main__':
161 | parser = argparse.ArgumentParser()
162 | parser.add_argument('--dir', type=str, help='dataset directory', default='data/3dpw')
163 | args = parser.parse_args()
164 |
165 | debug = False
166 |
167 | dataset = read_data(args.dir, 'validation', debug=debug)
168 | joblib.dump(dataset, osp.join(VIBE_DB_DIR, '3dpw_val_db.pt'))
169 |
170 | dataset = read_data(args.dir, 'test', debug=debug)
171 | joblib.dump(dataset, osp.join(VIBE_DB_DIR, '3dpw_test_db.pt'))
172 |
--------------------------------------------------------------------------------
/lib/dataset/dataset_3d.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import os
18 | import torch
19 | import random
20 | import logging
21 | import numpy as np
22 | import os.path as osp
23 | import joblib
24 |
25 | from torch.utils.data import Dataset
26 | from lib.core.config import VIBE_DB_DIR
27 | from lib.data_utils.kp_utils import convert_kps
28 | from lib.data_utils.img_utils import normalize_2d_kp, transfrom_keypoints, split_into_chunks
29 |
30 | logger = logging.getLogger(__name__)
31 |
32 | class Dataset3D(Dataset):
33 | def __init__(self, set, seqlen, overlap=0., folder=None, dataset_name=None, debug=False):
34 |
35 | self.folder = folder
36 | self.set = set
37 | self.dataset_name = dataset_name
38 | self.seqlen = seqlen
39 | self.stride = int(seqlen * (1-overlap))
40 | self.debug = debug
41 | self.db = self.load_db()
42 | self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
43 |
44 | def __len__(self):
45 | return len(self.vid_indices)
46 |
47 | def __getitem__(self, index):
48 | return self.get_single_item(index)
49 |
50 | def load_db(self):
51 | db_file = osp.join(VIBE_DB_DIR, f'{self.dataset_name}_{self.set}_db.pt')
52 |
53 | if osp.isfile(db_file):
54 | db = joblib.load(db_file)
55 | else:
56 | raise ValueError(f'{db_file} do not exists')
57 |
58 | print(f'Loaded {self.dataset_name} dataset from {db_file}')
59 | return db
60 |
61 | def get_single_item(self, index):
62 | start_index, end_index = self.vid_indices[index]
63 |
64 | is_train = self.set == 'train'
65 |
66 | if self.dataset_name == '3dpw':
67 | kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1], src='common', dst='spin')
68 | kp_3d = self.db['joints3D'][start_index:end_index + 1]
69 | elif self.dataset_name == 'mpii3d':
70 | kp_2d = self.db['joints2D'][start_index:end_index + 1]
71 | if is_train:
72 | kp_3d = self.db['joints3D'][start_index:end_index + 1]
73 | else:
74 | kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common')
75 | elif self.dataset_name == 'h36m':
76 | kp_2d = self.db['joints2D'][start_index:end_index + 1]
77 | if is_train:
78 | kp_3d = self.db['joints3D'][start_index:end_index + 1]
79 | else:
80 | kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common')
81 |
82 | kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
83 | nj = 14 if not is_train else 49
84 | kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16)
85 |
86 |
87 | if self.dataset_name == '3dpw':
88 | pose = self.db['pose'][start_index:end_index+1]
89 | shape = self.db['shape'][start_index:end_index+1]
90 | w_smpl = torch.ones(self.seqlen).float()
91 | w_3d = torch.ones(self.seqlen).float()
92 | elif self.dataset_name == 'h36m':
93 | if not is_train:
94 | pose = np.zeros((kp_2d.shape[0], 72))
95 | shape = np.zeros((kp_2d.shape[0], 10))
96 | w_smpl = torch.zeros(self.seqlen).float()
97 | w_3d = torch.ones(self.seqlen).float()
98 | else:
99 | pose = self.db['pose'][start_index:end_index + 1]
100 | shape = self.db['shape'][start_index:end_index + 1]
101 | w_smpl = torch.ones(self.seqlen).float()
102 | w_3d = torch.ones(self.seqlen).float()
103 | elif self.dataset_name == 'mpii3d':
104 | pose = np.zeros((kp_2d.shape[0], 72))
105 | shape = np.zeros((kp_2d.shape[0], 10))
106 | w_smpl = torch.zeros(self.seqlen).float()
107 | w_3d = torch.ones(self.seqlen).float()
108 |
109 | bbox = self.db['bbox'][start_index:end_index + 1]
110 | input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()
111 |
112 | theta_tensor = np.zeros((self.seqlen, 85), dtype=np.float16)
113 |
114 | for idx in range(self.seqlen):
115 | # crop image and transform 2d keypoints
116 | kp_2d[idx,:,:2], trans = transfrom_keypoints(
117 | kp_2d=kp_2d[idx,:,:2],
118 | center_x=bbox[idx,0],
119 | center_y=bbox[idx,1],
120 | width=bbox[idx,2],
121 | height=bbox[idx,3],
122 | patch_width=224,
123 | patch_height=224,
124 | do_augment=False,
125 | )
126 |
127 | kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
128 |
129 | # theta shape (85,)
130 | theta = np.concatenate((np.array([1., 0., 0.]), pose[idx], shape[idx]), axis=0)
131 |
132 | kp_2d_tensor[idx] = kp_2d[idx]
133 | theta_tensor[idx] = theta
134 | kp_3d_tensor[idx] = kp_3d[idx]
135 |
136 | target = {
137 | 'features': input,
138 | 'theta': torch.from_numpy(theta_tensor).float(), # camera, pose and shape
139 | 'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
140 | 'kp_3d': torch.from_numpy(kp_3d_tensor).float(), # 3D keypoints
141 | 'w_smpl': w_smpl,
142 | 'w_3d': w_3d,
143 | }
144 |
145 | if self.dataset_name == 'mpii3d' and not is_train:
146 | target['valid'] = self.db['valid_i'][start_index:end_index+1]
147 |
148 | if self.dataset_name == '3dpw' and not is_train:
149 | vn = self.db['vid_name'][start_index:end_index + 1]
150 | fi = self.db['frame_id'][start_index:end_index + 1]
151 | target['instance_id'] = [f'{v}/{f}'for v,f in zip(vn,fi)]
152 |
153 |
154 |
155 | # if self.dataset_name == '3dpw' and not self.is_train:
156 | # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist()
157 | # target['imgname'] = np.array(target['imgname'])
158 | # print(target['imgname'].dtype)
159 | # target['center'] = self.db['bbox'][start_index:end_index+1, :2]
160 | # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1])
161 |
162 | if self.debug:
163 | from lib.data_utils.img_utils import get_single_image_crop
164 |
165 | if self.dataset_name == 'mpii3d':
166 | video = self.db['img_name'][start_index:end_index+1]
167 | # print(video)
168 | elif self.dataset_name == 'h36m':
169 | video = self.db['img_name'][start_index:end_index + 1]
170 | else:
171 | vid_name = self.db['vid_name'][start_index]
172 | vid_name = '_'.join(vid_name.split('_')[:-1])
173 | f = osp.join(self.folder, 'imageFiles', vid_name)
174 | video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')]
175 | frame_idxs = self.db['frame_id'][start_index:end_index + 1]
176 | # print(f, frame_idxs)
177 | video = [video_file_list[i] for i in frame_idxs]
178 |
179 | video = torch.cat(
180 | [get_single_image_crop(image, bbox).unsqueeze(0) for image, bbox in zip(video, bbox)], dim=0
181 | )
182 |
183 | target['video'] = video
184 |
185 | return target
186 |
187 |
188 |
189 |
190 |
191 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # VIBE: Video Inference for Human Body Pose and Shape Estimation [CVPR-2020]
2 | [](https://arxiv.org/abs/1912.05656) [](https://colab.research.google.com/drive/1dFfwxZ52MN86FA6uFNypMEdFShd2euQA) [](https://paperswithcode.com/sota/3d-human-pose-estimation-on-3dpw?p=vibe-video-inference-for-human-body-pose-and)
3 |
4 |
5 |
6 |
7 |
40 |
41 |
42 |
91 |
92 |