├── lib
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   ├── evaluate.py
    │   └── loss.py
    ├── utils
    │   ├── __init__.py
    │   ├── one_euro_filter.py
    │   ├── smooth_pose.py
    │   ├── pose_tracker.py
    │   ├── smooth_bbox.py
    │   ├── renderer.py
    │   ├── utils.py
    │   ├── eval_utils.py
    │   └── demo_utils.py
    ├── models
    │   ├── __init__.py
    │   ├── attention.py
    │   ├── motion_discriminator.py
    │   ├── smpl.py
    │   └── vibe.py
    ├── dataset
    │   ├── __init__.py
    │   ├── penn_action.py
    │   ├── posetrack.py
    │   ├── threedpw.py
    │   ├── mpii3d.py
    │   ├── amass.py
    │   ├── insta.py
    │   ├── loaders.py
    │   ├── inference.py
    │   ├── dataset_2d.py
    │   └── dataset_3d.py
    ├── data_utils
    │   ├── feature_extractor.py
    │   ├── penn_action_utils.py
    │   ├── amass_utils.py
    │   ├── posetrack_utils.py
    │   └── threedpw_utils.py
    └── smplify
    │   ├── prior.py
    │   ├── losses.py
    │   └── temporal_smplify.py
├── girl_dance.mp4
├── .gitignore
├── scripts
    ├── prepare_data.sh
    ├── install_pip.sh
    ├── install_conda.sh
    └── prepare_training_data.sh
├── doc
    ├── eval.md
    ├── train.md
    └── demo.md
├── requirements.txt
├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   └── feature_request.md
├── configs
    ├── config.yaml
    └── config_wo_3dpw.yaml
├── tests
    ├── test_2d_datasets.py
    └── test_3d_datasets.py
├── eval.py
├── vibe_demo.ipynb
├── train.py
├── LICENSE
└── README.md


/lib/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/girl_dance.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cedro3/VIBE/master/girl_dance.mp4


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | data
3 | __pycache__/
4 | vibe-env/
5 | output/
6 | *.mp4
7 | results
8 | 


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .vibe import VIBE
2 | from .motion_discriminator import MotionDiscriminator
3 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dataset_2d import Dataset2D
 2 | from .dataset_3d import Dataset3D
 3 | 
 4 | from .insta import Insta
 5 | from .amass import AMASS
 6 | from .mpii3d import MPII3D
 7 | from .threedpw import ThreeDPW
 8 | from .posetrack import PoseTrack
 9 | from .penn_action import PennAction
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/prepare_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | mkdir -p data
 4 | cd data
 5 | gdown "https://drive.google.com/uc?id=1untXhYOLQtpNEy4GTY_0fL_H-k6cTf_r"
 6 | unzip vibe_data.zip
 7 | rm vibe_data.zip
 8 | cd ..
 9 | mv data/vibe_data/sample_video.mp4 .
10 | mkdir -p $HOME/.torch/models/
11 | mv data/vibe_data/yolov3.weights $HOME/.torch/models/
12 | 


--------------------------------------------------------------------------------
/scripts/install_pip.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | echo "Creating virtual environment"
 4 | python3.7 -m venv vibe-env
 5 | echo "Activating virtual environment"
 6 | 
 7 | source $PWD/vibe-env/bin/activate
 8 | 
 9 | $PWD/vibe-env/bin/pip install numpy==1.17.5 torch==1.4.0 torchvision==0.5.0
10 | $PWD/vibe-env/bin/pip install git+https://github.com/giacaglia/pytube.git --upgrade
11 | $PWD/vibe-env/bin/pip install -r requirements.txt
12 | 


--------------------------------------------------------------------------------
/scripts/install_conda.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | export CONDA_ENV_NAME=vibe-env
 4 | echo $CONDA_ENV_NAME
 5 | 
 6 | conda create -n $CONDA_ENV_NAME python=3.7
 7 | 
 8 | eval "$(conda shell.bash hook)"
 9 | conda activate $CONDA_ENV_NAME
10 | 
11 | which python
12 | which pip
13 | 
14 | pip install numpy==1.17.5 torch==1.4.0 torchvision==0.5.0
15 | pip install git+https://github.com/giacaglia/pytube.git --upgrade
16 | pip install -r requirements.txt
17 | 


--------------------------------------------------------------------------------
/doc/eval.md:
--------------------------------------------------------------------------------
 1 | # Evaluation
 2 | 
 3 | Run the commands below to evaluate a pretrained model.
 4 | 
 5 | ```shell script
 6 | python eval.py --cfg configs/config.yaml
 7 | ```
 8 | 
 9 | Change the `TRAIN.PRETRAINED` field of the config file to the checkpoint you would like to evaluate.
10 | You should be able to obtain the output below:
11 | 
12 | ```shell script
13 | # TRAIN.PRETRAINED = 'data/vibe_data/vibe_model_wo_3dpw.pth.tar'
14 | ...Evaluating on 3DPW test set...
15 | MPJPE: 93.5881, PA-MPJPE: 56.5608, PVE: 113.4118, ACCEL: 27.1242, ACCEL_ERR: 27.9877
16 | ```
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tqdm==4.28.1
 2 | yacs==0.1.6
 3 | h5py==2.10.0
 4 | numpy==1.17.5
 5 | scipy==1.4.1
 6 | numba==0.47.0
 7 | smplx==0.1.13
 8 | gdown==3.6.4
 9 | PyYAML==5.3.1
10 | joblib==0.14.1
11 | pillow==6.2.1
12 | trimesh==3.5.25
13 | pyrender==0.1.36
14 | progress==1.5
15 | filterpy==1.4.5
16 | matplotlib==3.1.3
17 | tensorflow==1.15.4
18 | tensorboard==2.1.0
19 | torchvision==0.5.0
20 | scikit-image==0.16.2
21 | scikit-video==1.1.11
22 | opencv-python==4.1.2.30
23 | llvmlite==0.32.1
24 | git+https://github.com/mattloper/chumpy.git
25 | git+https://github.com/mkocabas/yolov3-pytorch.git
26 | git+https://github.com/mkocabas/multi-person-tracker.git
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: Use this to report bugs
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Thanks for your interest in our research!
11 | 
12 | If you have problems running our code, please include;
13 | 
14 | 1. your operating system and the version,
15 | 2. your python version,
16 | 3. your pytorch version,
17 | 4. the stack trace of the error that you see,
18 | 
19 | Specifically, if you have an issue with pyrender or OpenGL setup & installation, please refer to pyrender [docs](https://pyrender.readthedocs.io/en/latest/) or [github issues](https://github.com/mmatl/pyrender/issues).
20 | 


--------------------------------------------------------------------------------
/scripts/prepare_training_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | mkdir -p ./data/vibe_db
 4 | export PYTHONPATH="./:$PYTHONPATH"
 5 | 
 6 | # AMASS
 7 | python lib/data_utils/amass_utils.py --dir ./data/amass
 8 | 
 9 | # InstaVariety
10 | # Comment this if you already downloaded the preprocessed file
11 | python lib/data_utils/insta_utils.py --dir ./data/insta_variety
12 | 
13 | # 3DPW
14 | python lib/data_utils/threedpw_utils.py --dir ./data/3dpw
15 | 
16 | # MPI-INF-3D-HP
17 | python lib/data_utils/mpii3d_utils.py --dir ./data/mpi_inf_3dhp
18 | 
19 | # PoseTrack
20 | python lib/data_utils/posetrack_utils.py --dir ./data/posetrack
21 | 
22 | # PennAction
23 | python lib/data_utils/penn_action_utils.py --dir ./data/penn_action
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Use this to suggest an idea for this project
 4 | title: "[FEATURE]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/configs/config.yaml:
--------------------------------------------------------------------------------
 1 | DEBUG: false
 2 | DEBUG_FREQ: 5
 3 | LOGDIR: ''
 4 | DEVICE: 'cuda'
 5 | EXP_NAME: 'vibe'
 6 | OUTPUT_DIR: 'results/vibe_tests'
 7 | NUM_WORKERS: 8
 8 | SEED_VALUE: -1
 9 | DATASET:
10 |   SEQLEN: 16
11 | LOSS:
12 |   KP_2D_W: 300.0
13 |   KP_3D_W: 300.0
14 |   SHAPE_W: 0.06
15 |   POSE_W: 60.0
16 |   D_MOTION_LOSS_W: 0.5
17 | TRAIN:
18 |   BATCH_SIZE: 32
19 |   NUM_ITERS_PER_EPOCH: 500
20 |   PRETRAINED: ''
21 |   PRETRAINED_REGRESSOR: 'data/vibe_data/spin_model_checkpoint.pth.tar'
22 |   RESUME: ''
23 |   START_EPOCH: 0
24 |   END_EPOCH: 30
25 |   LR_PATIENCE: 5
26 |   DATA_2D_RATIO: 0.6
27 |   DATASETS_2D:
28 |     - 'Insta'
29 | #    - 'PoseTrack'
30 | #    - 'PennAction'
31 |   DATASETS_3D:
32 | #    - 'ThreeDPW'
33 |     - 'MPII3D'
34 |   DATASET_EVAL: 'ThreeDPW'
35 |   GEN_LR: 0.00005
36 |   GEN_WD: 0.0
37 |   MOT_DISCR:
38 |     OPTIM: 'Adam'
39 |     LR: 0.0001
40 |     WD: 0.0001
41 |     MOMENTUM: 0.9
42 |     HIDDEN_SIZE: 1024
43 |     NUM_LAYERS: 2
44 |     FEATURE_POOL: 'attention'
45 |     ATT:
46 |       LAYERS: 3
47 |       SIZE: 1024
48 |       DROPOUT: 0.2
49 | MODEL:
50 |   TEMPORAL_TYPE: 'gru'
51 |   TGRU:
52 |     NUM_LAYERS: 2
53 |     ADD_LINEAR: true
54 |     RESIDUAL: true
55 |     BIDIRECTIONAL: false
56 |     HIDDEN_SIZE: 1024


--------------------------------------------------------------------------------
/configs/config_wo_3dpw.yaml:
--------------------------------------------------------------------------------
 1 | DEBUG: false
 2 | DEBUG_FREQ: 5
 3 | LOGDIR: ''
 4 | DEVICE: 'cuda'
 5 | EXP_NAME: 'vibe'
 6 | OUTPUT_DIR: 'results/vibe_wo_3dpw'
 7 | NUM_WORKERS: 8
 8 | SEED_VALUE: -1
 9 | DATASET:
10 |   SEQLEN: 16
11 | LOSS:
12 |   KP_2D_W: 300.0
13 |   KP_3D_W: 300.0
14 |   SHAPE_W: 0.06
15 |   POSE_W: 60.0
16 |   D_MOTION_LOSS_W: 0.5
17 | TRAIN:
18 |   BATCH_SIZE: 32
19 |   NUM_ITERS_PER_EPOCH: 500
20 |   PRETRAINED: ''
21 |   PRETRAINED_REGRESSOR: 'data/vibe_data/spin_model_checkpoint.pth.tar'
22 |   RESUME: ''
23 |   START_EPOCH: 0
24 |   END_EPOCH: 30
25 |   LR_PATIENCE: 5
26 |   DATA_2D_RATIO: 0.6
27 |   DATASETS_2D:
28 |     - 'Insta'
29 | #    - 'PoseTrack'
30 | #    - 'PennAction'
31 |   DATASETS_3D:
32 | #    - 'ThreeDPW'
33 |     - 'MPII3D'
34 |   DATASET_EVAL: 'ThreeDPW'
35 |   GEN_LR: 0.00005
36 |   GEN_WD: 0.0
37 |   MOT_DISCR:
38 |     OPTIM: 'Adam'
39 |     LR: 0.0001
40 |     WD: 0.0001
41 |     MOMENTUM: 0.9
42 |     HIDDEN_SIZE: 1024
43 |     NUM_LAYERS: 2
44 |     FEATURE_POOL: 'attention'
45 |     ATT:
46 |       LAYERS: 3
47 |       SIZE: 1024
48 |       DROPOUT: 0.2
49 | MODEL:
50 |   TEMPORAL_TYPE: 'gru'
51 |   TGRU:
52 |     NUM_LAYERS: 2
53 |     ADD_LINEAR: true
54 |     RESIDUAL: true
55 |     BIDIRECTIONAL: false
56 |     HIDDEN_SIZE: 1024


--------------------------------------------------------------------------------
/lib/dataset/penn_action.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from lib.dataset import Dataset2D
18 | from lib.core.config import PENNACTION_DIR
19 | 
20 | 
21 | class PennAction(Dataset2D):
22 |     def __init__(self, seqlen, overlap=0.75, debug=False):
23 |         db_name = 'pennaction'
24 | 
25 |         super(PennAction, self).__init__(
26 |             seqlen = seqlen,
27 |             folder=PENNACTION_DIR,
28 |             dataset_name=db_name,
29 |             debug=debug,
30 |             overlap=overlap,
31 |         )
32 |         print(f'{db_name} - number of dataset objects {self.__len__()}')
33 | 


--------------------------------------------------------------------------------
/lib/dataset/posetrack.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from lib.dataset import Dataset2D
18 | from lib.core.config import POSETRACK_DIR
19 | 
20 | 
21 | class PoseTrack(Dataset2D):
22 |     def __init__(self, seqlen, overlap=0.75, folder=None, debug=False):
23 |         db_name = 'posetrack'
24 |         super(PoseTrack, self).__init__(
25 |             seqlen = seqlen,
26 |             folder=POSETRACK_DIR,
27 |             dataset_name=db_name,
28 |             debug=debug,
29 |             overlap=overlap,
30 |         )
31 |         print(f'{db_name} - number of dataset objects {self.__len__()}')
32 | 


--------------------------------------------------------------------------------
/tests/test_2d_datasets.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | 
 4 | import torch
 5 | import numpy as np
 6 | import skimage.io as io
 7 | import matplotlib.pyplot as plt
 8 | from torch.utils.data import DataLoader
 9 | 
10 | from lib.dataset import *
11 | from lib.utils.vis import batch_draw_skeleton, batch_visualize_preds
12 | 
13 | 
14 | def debug_2d_data(dataset, DEBUG=True):
15 |     is_train = True
16 |     seqlen = 32
17 |     batch_size = 1
18 |     db = eval(dataset)(seqlen=seqlen, debug=DEBUG)
19 | 
20 |     dataloader = DataLoader(
21 |         dataset=db,
22 |         batch_size=batch_size,
23 |         shuffle=True,
24 |         num_workers=1,
25 |     )
26 | 
27 |     for i, target in enumerate(dataloader):
28 |         for k, v in target.items():
29 |             print(k, v.shape)
30 | 
31 |         if DEBUG:
32 |             if dataset is 'Insta':
33 |                 input = torch.ones(batch_size, seqlen, 3, 224, 224)[0]
34 |             else:
35 |                 input = target['video'][0]
36 |             single_target = {k: v[0] for k, v in target.items()}
37 | 
38 |             dataset_name = 'spin'
39 |             plt.figure(figsize=(19.2,10.8))
40 |             images = batch_draw_skeleton(input, single_target, dataset=dataset_name, max_images=4)
41 |             plt.imshow(images)
42 |             plt.show()
43 | 
44 |         if i == 20:
45 |             break
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     debug_2d_data('Insta', DEBUG=True)
50 | 


--------------------------------------------------------------------------------
/lib/utils/one_euro_filter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | 
 5 | def smoothing_factor(t_e, cutoff):
 6 |     r = 2 * math.pi * cutoff * t_e
 7 |     return r / (r + 1)
 8 | 
 9 | 
10 | def exponential_smoothing(a, x, x_prev):
11 |     return a * x + (1 - a) * x_prev
12 | 
13 | 
14 | class OneEuroFilter:
15 |     def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0,
16 |                  d_cutoff=1.0):
17 |         """Initialize the one euro filter."""
18 |         # The parameters.
19 |         self.min_cutoff = float(min_cutoff)
20 |         self.beta = float(beta)
21 |         self.d_cutoff = float(d_cutoff)
22 |         # Previous values.
23 |         self.x_prev = x0
24 |         self.dx_prev = dx0
25 |         self.t_prev = t0
26 | 
27 |     def __call__(self, t, x):
28 |         """Compute the filtered signal."""
29 |         t_e = t - self.t_prev
30 | 
31 |         # The filtered derivative of the signal.
32 |         a_d = smoothing_factor(t_e, self.d_cutoff)
33 |         dx = (x - self.x_prev) / t_e
34 |         dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
35 | 
36 |         # The filtered signal.
37 |         cutoff = self.min_cutoff + self.beta * np.abs(dx_hat)
38 |         a = smoothing_factor(t_e, cutoff)
39 |         x_hat = exponential_smoothing(a, x, self.x_prev)
40 | 
41 |         # Memorize the previous values.
42 |         self.x_prev = x_hat
43 |         self.dx_prev = dx_hat
44 |         self.t_prev = t
45 | 
46 |         return x_hat
47 | 


--------------------------------------------------------------------------------
/lib/dataset/threedpw.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from lib.dataset import Dataset3D
18 | from lib.core.config import THREEDPW_DIR
19 | 
20 | class ThreeDPW(Dataset3D):
21 |     def __init__(self, set, seqlen, overlap=0.75, debug=False):
22 |         db_name = '3dpw'
23 | 
24 |         # during testing we don't need data augmentation
25 |         # but we can use it as an ensemble
26 |         is_train = False
27 |         overlap = overlap if is_train else 0.
28 |         print('3DPW Dataset overlap ratio: ', overlap)
29 |         super(ThreeDPW, self).__init__(
30 |             set=set,
31 |             folder=THREEDPW_DIR,
32 |             seqlen=seqlen,
33 |             overlap=overlap,
34 |             dataset_name=db_name,
35 |             debug=debug,
36 |         )
37 |         print(f'{db_name} - number of dataset objects {self.__len__()}')


--------------------------------------------------------------------------------
/lib/dataset/mpii3d.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from lib.dataset import Dataset3D
18 | from lib.core.config import MPII3D_DIR
19 | 
20 | 
21 | class MPII3D(Dataset3D):
22 |     def __init__(self, set, seqlen, overlap=0, debug=False):
23 |         db_name = 'mpii3d'
24 | 
25 |         # during testing we don't need data augmentation
26 |         # but we can use it as an ensemble
27 |         is_train = set == 'train'
28 |         overlap = overlap if is_train else 0.
29 |         print('MPII3D Dataset overlap ratio: ', overlap)
30 |         super(MPII3D, self).__init__(
31 |             set = set,
32 |             folder=MPII3D_DIR,
33 |             seqlen=seqlen,
34 |             overlap=overlap,
35 |             dataset_name=db_name,
36 |             debug=debug,
37 |         )
38 |         print(f'{db_name} - number of dataset objects {self.__len__()}')


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from lib.dataset import ThreeDPW
 5 | from lib.models import VIBE
 6 | from lib.core.evaluate import Evaluator
 7 | from lib.core.config import parse_args
 8 | from torch.utils.data import DataLoader
 9 | 
10 | 
11 | def main(cfg):
12 |     print('...Evaluating on 3DPW test set...')
13 | 
14 |     model = VIBE(
15 |         n_layers=cfg.MODEL.TGRU.NUM_LAYERS,
16 |         batch_size=cfg.TRAIN.BATCH_SIZE,
17 |         seqlen=cfg.DATASET.SEQLEN,
18 |         hidden_size=cfg.MODEL.TGRU.HIDDEN_SIZE,
19 |         pretrained=cfg.TRAIN.PRETRAINED_REGRESSOR,
20 |         add_linear=cfg.MODEL.TGRU.ADD_LINEAR,
21 |         bidirectional=cfg.MODEL.TGRU.BIDIRECTIONAL,
22 |         use_residual=cfg.MODEL.TGRU.RESIDUAL,
23 |     ).to(cfg.DEVICE)
24 | 
25 |     if cfg.TRAIN.PRETRAINED != '' and os.path.isfile(cfg.TRAIN.PRETRAINED):
26 |         checkpoint = torch.load(cfg.TRAIN.PRETRAINED)
27 |         best_performance = checkpoint['performance']
28 |         model.load_state_dict(checkpoint['gen_state_dict'])
29 |         print(f'==> Loaded pretrained model from {cfg.TRAIN.PRETRAINED}...')
30 |         print(f'Performance on 3DPW test set {best_performance}')
31 |     else:
32 |         print(f'{cfg.TRAIN.PRETRAINED} is not a pretrained model!!!!')
33 |         exit()
34 | 
35 |     test_db = ThreeDPW(set='test', seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
36 | 
37 |     test_loader = DataLoader(
38 |         dataset=test_db,
39 |         batch_size=cfg.TRAIN.BATCH_SIZE,
40 |         shuffle=False,
41 |         num_workers=cfg.NUM_WORKERS,
42 |     )
43 | 
44 |     Evaluator(
45 |         model=model,
46 |         device=cfg.DEVICE,
47 |         test_loader=test_loader,
48 |     ).run()
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     cfg, cfg_file = parse_args()
53 | 
54 |     main(cfg)
55 | 


--------------------------------------------------------------------------------
/tests/test_3d_datasets.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | import time
 4 | from lib.dataset import *
 5 | import matplotlib.pyplot as plt
 6 | from torch.utils.data import DataLoader
 7 | from lib.models.smpl import SMPL, SMPL_MODEL_DIR
 8 | from lib.utils.vis import batch_draw_skeleton, batch_visualize_preds
 9 | 
10 | dataset = 'MPII3D'
11 | seqlen = 16
12 | DEBUG = True
13 | 
14 | db = eval(dataset)(set='val', seqlen=seqlen, debug=DEBUG)
15 | 
16 | dataloader = DataLoader(
17 |     dataset=db,
18 |     batch_size=4,
19 |     shuffle=True,
20 |     num_workers=1,
21 | )
22 | 
23 | smpl = SMPL(SMPL_MODEL_DIR)
24 | 
25 | start = time.time()
26 | for i, target in enumerate(dataloader):
27 |     data_time = time.time() - start
28 |     start = time.time()
29 |     print(f'Data loading time {data_time:.4f}')
30 | 
31 |     for k, v in target.items():
32 |         print(k, v.shape)
33 | 
34 |     if DEBUG:
35 |         input = target['video'][0]
36 |         single_target = {k: v[0] for k, v in target.items()}
37 | 
38 |         if dataset == 'MPII3D':
39 |             images = batch_draw_skeleton(input, single_target, dataset='spin', max_images=4)
40 |             plt.imshow(images)
41 |             plt.show()
42 |         else:
43 |             theta = single_target['theta']
44 |             pose, shape = theta[:, 3:75], theta[:, 75:]
45 | 
46 |             # verts, j3d, smpl_j3d = smpl(pose, shape)
47 | 
48 |             pred_output = smpl(betas=shape, body_pose=pose[:, 3:], global_orient=pose[:, :3], pose2rot=True)
49 | 
50 |             single_target['verts'] = pred_output.vertices
51 | 
52 |             images = batch_visualize_preds(input, single_target, single_target, max_images=4, dataset='spin')
53 |             # images = batch_draw_skeleton(input, single_target, dataset='common', max_images=10)
54 |             plt.imshow(images)
55 |             plt.show()
56 | 
57 |     if i == 100:
58 |         break


--------------------------------------------------------------------------------
/lib/dataset/amass.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import torch
18 | import joblib
19 | import numpy as np
20 | import os.path as osp
21 | from torch.utils.data import Dataset
22 | 
23 | from lib.core.config import VIBE_DB_DIR
24 | from lib.data_utils.img_utils import split_into_chunks
25 | 
26 | class AMASS(Dataset):
27 |     def __init__(self, seqlen):
28 |         self.seqlen = seqlen
29 | 
30 |         self.stride = seqlen
31 | 
32 |         self.db = self.load_db()
33 |         self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
34 |         del self.db['vid_name']
35 |         print(f'AMASS dataset number of videos: {len(self.vid_indices)}')
36 | 
37 |     def __len__(self):
38 |         return len(self.vid_indices)
39 | 
40 |     def __getitem__(self, index):
41 |         return self.get_single_item(index)
42 | 
43 |     def load_db(self):
44 |         db_file = osp.join(VIBE_DB_DIR, 'amass_db.pt')
45 |         db = joblib.load(db_file)
46 |         return db
47 | 
48 |     def get_single_item(self, index):
49 |         start_index, end_index = self.vid_indices[index]
50 |         thetas = self.db['theta'][start_index:end_index+1]
51 | 
52 |         cam = np.array([1., 0., 0.])[None, ...]
53 |         cam = np.repeat(cam, thetas.shape[0], axis=0)
54 |         theta = np.concatenate([cam, thetas], axis=-1)
55 | 
56 |         target = {
57 |             'theta': torch.from_numpy(theta).float(),  # cam, pose and shape
58 |         }
59 |         return target
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/lib/utils/smooth_pose.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import torch
18 | import numpy as np
19 | 
20 | from lib.models.smpl import SMPL, SMPL_MODEL_DIR
21 | from lib.utils.one_euro_filter import OneEuroFilter
22 | 
23 | 
24 | def smooth_pose(pred_pose, pred_betas, min_cutoff=0.004, beta=0.7):
25 |     # min_cutoff: Decreasing the minimum cutoff frequency decreases slow speed jitter
26 |     # beta: Increasing the speed coefficient(beta) decreases speed lag.
27 | 
28 |     one_euro_filter = OneEuroFilter(
29 |         np.zeros_like(pred_pose[0]),
30 |         pred_pose[0],
31 |         min_cutoff=min_cutoff,
32 |         beta=beta,
33 |     )
34 | 
35 |     smpl = SMPL(model_path=SMPL_MODEL_DIR)
36 | 
37 |     pred_pose_hat = np.zeros_like(pred_pose)
38 | 
39 |     # initialize
40 |     pred_pose_hat[0] = pred_pose[0]
41 | 
42 |     pred_verts_hat = []
43 |     pred_joints3d_hat = []
44 | 
45 |     smpl_output = smpl(
46 |         betas=torch.from_numpy(pred_betas[0]).unsqueeze(0),
47 |         body_pose=torch.from_numpy(pred_pose[0, 1:]).unsqueeze(0),
48 |         global_orient=torch.from_numpy(pred_pose[0, 0:1]).unsqueeze(0),
49 |     )
50 |     pred_verts_hat.append(smpl_output.vertices.detach().cpu().numpy())
51 |     pred_joints3d_hat.append(smpl_output.joints.detach().cpu().numpy())
52 | 
53 |     for idx, pose in enumerate(pred_pose[1:]):
54 |         idx += 1
55 | 
56 |         t = np.ones_like(pose) * idx
57 |         pose = one_euro_filter(t, pose)
58 |         pred_pose_hat[idx] = pose
59 | 
60 |         smpl_output = smpl(
61 |             betas=torch.from_numpy(pred_betas[idx]).unsqueeze(0),
62 |             body_pose=torch.from_numpy(pred_pose_hat[idx, 1:]).unsqueeze(0),
63 |             global_orient=torch.from_numpy(pred_pose_hat[idx, 0:1]).unsqueeze(0),
64 |         )
65 |         pred_verts_hat.append(smpl_output.vertices.detach().cpu().numpy())
66 |         pred_joints3d_hat.append(smpl_output.joints.detach().cpu().numpy())
67 | 
68 |     return np.vstack(pred_verts_hat), pred_pose_hat, np.vstack(pred_joints3d_hat)


--------------------------------------------------------------------------------
/lib/dataset/insta.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import h5py
18 | import torch
19 | import logging
20 | import numpy as np
21 | import os.path as osp
22 | 
23 | from torch.utils.data import Dataset
24 | from lib.core.config import VIBE_DB_DIR
25 | from lib.data_utils.kp_utils import convert_kps
26 | from lib.data_utils.img_utils import normalize_2d_kp, split_into_chunks
27 | 
28 | logger = logging.getLogger(__name__)
29 | 
30 | class Insta(Dataset):
31 |     def __init__(self, seqlen, overlap=0., debug=False):
32 |         self.seqlen = seqlen
33 |         self.stride = int(seqlen * (1-overlap))
34 | 
35 |         self.h5_file = osp.join(VIBE_DB_DIR, 'insta_train_db.h5')
36 | 
37 |         with h5py.File(self.h5_file, 'r') as db:
38 |             self.db = db
39 |             self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
40 | 
41 |         print(f'InstaVariety number of dataset objects {self.__len__()}')
42 | 
43 |     def __len__(self):
44 |         return len(self.vid_indices)
45 | 
46 |     def __getitem__(self, index):
47 |         return self.get_single_item(index)
48 | 
49 |     def get_single_item(self, index):
50 |         start_index, end_index = self.vid_indices[index]
51 | 
52 |         with h5py.File(self.h5_file, 'r') as db:
53 |             self.db = db
54 | 
55 |             kp_2d = self.db['joints2D'][start_index:end_index + 1]
56 |             kp_2d = convert_kps(kp_2d, src='insta', dst='spin')
57 |             kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
58 | 
59 | 
60 |             input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()
61 | 
62 |             vid_name = self.db['vid_name'][start_index:end_index + 1]
63 |             frame_id = self.db['frame_id'][start_index:end_index + 1].astype(str)
64 |             instance_id = np.array([v.decode('ascii') + f for v, f in zip(vid_name, frame_id)])
65 | 
66 |         for idx in range(self.seqlen):
67 |             kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
68 |             kp_2d_tensor[idx] = kp_2d[idx]
69 | 
70 |         target = {
71 |             'features': input,
72 |             'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
73 |             # 'instance_id': instance_id
74 |         }
75 | 
76 |         return target


--------------------------------------------------------------------------------
/lib/dataset/loaders.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from torch.utils.data import ConcatDataset, DataLoader
18 | 
19 | from lib.dataset import *
20 | 
21 | 
22 | def get_data_loaders(cfg):
23 |     def get_2d_datasets(dataset_names):
24 |         datasets = []
25 |         for dataset_name in dataset_names:
26 |             db = eval(dataset_name)(seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
27 |             datasets.append(db)
28 |         return ConcatDataset(datasets)
29 | 
30 |     def get_3d_datasets(dataset_names):
31 |         datasets = []
32 |         for dataset_name in dataset_names:
33 |             db = eval(dataset_name)(set='train', seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
34 |             datasets.append(db)
35 |         return ConcatDataset(datasets)
36 | 
37 |     # ===== 2D keypoint datasets =====
38 |     train_2d_dataset_names = cfg.TRAIN.DATASETS_2D
39 |     train_2d_db = get_2d_datasets(train_2d_dataset_names)
40 | 
41 |     data_2d_batch_size = int(cfg.TRAIN.BATCH_SIZE * cfg.TRAIN.DATA_2D_RATIO)
42 |     data_3d_batch_size = cfg.TRAIN.BATCH_SIZE - data_2d_batch_size
43 | 
44 |     train_2d_loader = DataLoader(
45 |         dataset=train_2d_db,
46 |         batch_size=data_2d_batch_size,
47 |         shuffle=True,
48 |         num_workers=cfg.NUM_WORKERS,
49 |     )
50 | 
51 |     # ===== 3D keypoint datasets =====
52 |     train_3d_dataset_names = cfg.TRAIN.DATASETS_3D
53 |     train_3d_db = get_3d_datasets(train_3d_dataset_names)
54 | 
55 |     train_3d_loader = DataLoader(
56 |         dataset=train_3d_db,
57 |         batch_size=data_3d_batch_size,
58 |         shuffle=True,
59 |         num_workers=cfg.NUM_WORKERS,
60 |     )
61 | 
62 |     # ===== Motion Discriminator dataset =====
63 |     motion_disc_db = AMASS(seqlen=cfg.DATASET.SEQLEN)
64 | 
65 |     motion_disc_loader = DataLoader(
66 |         dataset=motion_disc_db,
67 |         batch_size=cfg.TRAIN.BATCH_SIZE,
68 |         shuffle=True,
69 |         num_workers=cfg.NUM_WORKERS,
70 |     )
71 | 
72 |     # ===== Evaluation dataset =====
73 |     valid_db = eval(cfg.TRAIN.DATASET_EVAL)(set='val', seqlen=cfg.DATASET.SEQLEN, debug=cfg.DEBUG)
74 | 
75 |     valid_loader = DataLoader(
76 |         dataset=valid_db,
77 |         batch_size=cfg.TRAIN.BATCH_SIZE,
78 |         shuffle=False,
79 |         num_workers=cfg.NUM_WORKERS,
80 |     )
81 | 
82 |     return train_2d_loader, train_3d_loader, motion_disc_loader, valid_loader


--------------------------------------------------------------------------------
/lib/models/attention.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import torch
18 | from torch import nn
19 | 
20 | def init_weights(m):
21 |     if type(m) == nn.Linear:
22 |         torch.nn.init.uniform_(m.weight, -0.1, 0.1)
23 |         m.bias.data.fill_(0.01)
24 | 
25 | class SelfAttention(nn.Module):
26 |     def __init__(self, attention_size,
27 |                  batch_first=False,
28 |                  layers=1,
29 |                  dropout=.0,
30 |                  non_linearity="tanh"):
31 |         super(SelfAttention, self).__init__()
32 | 
33 |         self.batch_first = batch_first
34 | 
35 |         if non_linearity == "relu":
36 |             activation = nn.ReLU()
37 |         else:
38 |             activation = nn.Tanh()
39 | 
40 |         modules = []
41 |         for i in range(layers - 1):
42 |             modules.append(nn.Linear(attention_size, attention_size))
43 |             modules.append(activation)
44 |             modules.append(nn.Dropout(dropout))
45 | 
46 |         # last attention layer must output 1
47 |         modules.append(nn.Linear(attention_size, 1))
48 |         modules.append(activation)
49 |         modules.append(nn.Dropout(dropout))
50 | 
51 |         self.attention = nn.Sequential(*modules)
52 |         self.attention.apply(init_weights) 
53 |         self.softmax = nn.Softmax(dim=-1)
54 | 
55 | 
56 |     def forward(self, inputs):
57 | 
58 |         ##################################################################
59 |         # STEP 1 - perform dot product
60 |         # of the attention vector and each hidden state
61 |         ##################################################################
62 | 
63 |         # inputs is a 3D Tensor: batch, len, hidden_size
64 |         # scores is a 2D Tensor: batch, len
65 |         scores = self.attention(inputs).squeeze()
66 |         scores = self.softmax(scores)
67 | 
68 |         ##################################################################
69 |         # Step 2 - Weighted sum of hidden states, by the attention scores
70 |         ##################################################################
71 | 
72 |         # multiply each hidden state with the attention weights
73 |         weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))
74 | 
75 |         # sum the hidden states
76 |         # representations = weighted.sum(1).squeeze()
77 |         representations = weighted.sum(1).squeeze()
78 |         return representations, scores
79 | 
80 | 


--------------------------------------------------------------------------------
/vibe_demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "colab_type": "text",
  7 |     "id": "view-in-github"
  8 |    },
  9 |    "source": [
 10 |     "<a href=\"https://colab.research.google.com/github/cedro3/VIBE/blob/master/vibe_demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {
 16 |     "id": "kJe1q2JFK4LZ"
 17 |    },
 18 |    "source": [
 19 |     "# セットアップ"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {
 26 |     "id": "Tvd4cfPk5a0e"
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# githubからコードをコピー\n",
 31 |     "!git clone https://github.com/cedro3/VIBE.git\n",
 32 |     "%cd VIBE/\n",
 33 |     "\n",
 34 |     "# ライブラリを取得\n",
 35 |     "!pip install torch==1.4.0 numpy==1.17.5\n",
 36 |     "!pip install git+https://github.com/giacaglia/pytube.git --upgrade\n",
 37 |     "!pip install -r requirements.txt\n",
 38 |     "\n",
 39 |     "# 学習済み重みとSMPLデータのダウンロード\n",
 40 |     "!source scripts/prepare_data.sh"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {
 46 |     "id": "nflTgaAWLqsu"
 47 |    },
 48 |    "source": [
 49 |     "# デモの実行\n",
 50 |     "最後に --sideview オプションを追加すると横からのView推定も行います。"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {
 57 |     "id": "qVNszfLQ7rC9"
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# デモの実行\n",
 62 |     "!python demo.py --vid_file girl_dance.mp4 --output_folder output/ "
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "id": "j8zxBa_K-FJf"
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# 作成した動画を再生する\n",
 74 |     "from IPython.display import HTML\n",
 75 |     "from base64 import b64encode\n",
 76 |     "\n",
 77 |     "def video(path):\n",
 78 |     "  mp4 = open(path,'rb').read()\n",
 79 |     "  data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
 80 |     "  return HTML('<video width=500 controls loop> <source src=\"%s\" type=\"video/mp4\"></video>' % data_url)\n",
 81 |     "\n",
 82 |     "video('output/girl_dance/girl_dance_vibe_result.mp4')  "
 83 |    ]
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "accelerator": "GPU",
 88 |   "colab": {
 89 |    "collapsed_sections": [],
 90 |    "include_colab_link": true,
 91 |    "name": "vibe_demo",
 92 |    "provenance": [],
 93 |    "toc_visible": true
 94 |   },
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.7.9"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 1
115 | }
116 | 


--------------------------------------------------------------------------------
/lib/utils/pose_tracker.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import os
18 | import json
19 | import shutil
20 | import subprocess
21 | import numpy as np
22 | import os.path as osp
23 | 
24 | 
25 | def run_openpose(
26 |         video_file,
27 |         output_folder,
28 |         staf_folder,
29 |         vis=False,
30 | ):
31 |     pwd = os.getcwd()
32 | 
33 |     os.chdir(staf_folder)
34 | 
35 |     render = 1 if vis else 0
36 |     display = 2 if vis else 0
37 |     cmd = [
38 |         'build/examples/openpose/openpose.bin',
39 |         '--model_pose', 'BODY_21A',
40 |         '--tracking', '1',
41 |         '--render_pose', str(render),
42 |         '--video', video_file,
43 |         '--write_json', output_folder,
44 |         '--display', str(display)
45 |     ]
46 | 
47 |     print('Executing', ' '.join(cmd))
48 |     subprocess.call(cmd)
49 |     os.chdir(pwd)
50 | 
51 | 
52 | def read_posetrack_keypoints(output_folder):
53 | 
54 |     people = dict()
55 | 
56 |     for idx, result_file in enumerate(sorted(os.listdir(output_folder))):
57 |         json_file = osp.join(output_folder, result_file)
58 |         data = json.load(open(json_file))
59 |         # print(idx, data)
60 |         for person in data['people']:
61 |             person_id = person['person_id'][0]
62 |             joints2d  = person['pose_keypoints_2d']
63 |             if person_id in people.keys():
64 |                 people[person_id]['joints2d'].append(joints2d)
65 |                 people[person_id]['frames'].append(idx)
66 |             else:
67 |                 people[person_id] = {
68 |                     'joints2d': [],
69 |                     'frames': [],
70 |                 }
71 |                 people[person_id]['joints2d'].append(joints2d)
72 |                 people[person_id]['frames'].append(idx)
73 | 
74 |     for k in people.keys():
75 |         people[k]['joints2d'] = np.array(people[k]['joints2d']).reshape((len(people[k]['joints2d']), -1, 3))
76 |         people[k]['frames'] = np.array(people[k]['frames'])
77 | 
78 |     return people
79 | 
80 | 
81 | def run_posetracker(video_file, staf_folder, posetrack_output_folder='/tmp', display=False):
82 |     posetrack_output_folder = os.path.join(
83 |         posetrack_output_folder,
84 |         f'{os.path.basename(video_file)}_posetrack'
85 |     )
86 | 
87 |     # run posetrack on video
88 |     run_openpose(
89 |         video_file,
90 |         posetrack_output_folder,
91 |         vis=display,
92 |         staf_folder=staf_folder
93 |     )
94 | 
95 |     people_dict = read_posetrack_keypoints(posetrack_output_folder)
96 | 
97 |     shutil.rmtree(posetrack_output_folder)
98 | 
99 |     return people_dict


--------------------------------------------------------------------------------
/lib/models/motion_discriminator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import torch
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | from torch.nn.utils import spectral_norm
21 | from lib.models.attention import SelfAttention
22 | 
23 | class MotionDiscriminator(nn.Module):
24 | 
25 |     def __init__(self,
26 |                  rnn_size,
27 |                  input_size,
28 |                  num_layers,
29 |                  output_size=2,
30 |                  feature_pool="concat",
31 |                  use_spectral_norm=False,
32 |                  attention_size=1024,
33 |                  attention_layers=1,
34 |                  attention_dropout=0.5):
35 | 
36 |         super(MotionDiscriminator, self).__init__()
37 |         self.input_size = input_size
38 |         self.rnn_size = rnn_size
39 |         self.feature_pool = feature_pool
40 |         self.num_layers = num_layers
41 |         self.attention_size = attention_size
42 |         self.attention_layers = attention_layers
43 |         self.attention_dropout = attention_dropout
44 | 
45 |         self.gru = nn.GRU(self.input_size, self.rnn_size, num_layers=num_layers)
46 | 
47 |         linear_size = self.rnn_size if not feature_pool == "concat" else self.rnn_size * 2
48 | 
49 |         if feature_pool == "attention" :
50 |             self.attention = SelfAttention(attention_size=self.attention_size,
51 |                                        layers=self.attention_layers,
52 |                                        dropout=self.attention_dropout)
53 |         if use_spectral_norm:
54 |             self.fc = spectral_norm(nn.Linear(linear_size, output_size))
55 |         else:
56 |             self.fc = nn.Linear(linear_size, output_size)
57 | 
58 |     def forward(self, sequence):
59 |         """
60 |         sequence: of shape [batch_size, seq_len, input_size]
61 |         """
62 |         batchsize, seqlen, input_size = sequence.shape
63 |         sequence = torch.transpose(sequence, 0, 1)
64 | 
65 |         outputs, state = self.gru(sequence)
66 | 
67 |         if self.feature_pool == "concat":
68 |             outputs = F.relu(outputs)
69 |             avg_pool = F.adaptive_avg_pool1d(outputs.permute(1, 2, 0), 1).view(batchsize, -1)
70 |             max_pool = F.adaptive_max_pool1d(outputs.permute(1, 2, 0), 1).view(batchsize, -1)
71 |             output = self.fc(torch.cat([avg_pool, max_pool], dim=1))
72 |         elif self.feature_pool == "attention":
73 |             outputs = outputs.permute(1, 0, 2)
74 |             y, attentions = self.attention(outputs)
75 |             output = self.fc(y)
76 |         else:
77 |             output = self.fc(outputs[-1])
78 | 
79 |         return output
80 | 


--------------------------------------------------------------------------------
/lib/dataset/inference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import os
18 | import cv2
19 | import numpy as np
20 | import os.path as osp
21 | from torch.utils.data import Dataset
22 | from torchvision.transforms.functional import to_tensor
23 | 
24 | from lib.utils.smooth_bbox import get_all_bbox_params
25 | from lib.data_utils.img_utils import get_single_image_crop_demo
26 | 
27 | 
28 | class Inference(Dataset):
29 |     def __init__(self, image_folder, frames, bboxes=None, joints2d=None, scale=1.0, crop_size=224):
30 |         self.image_file_names = [
31 |             osp.join(image_folder, x)
32 |             for x in os.listdir(image_folder)
33 |             if x.endswith('.png') or x.endswith('.jpg')
34 |         ]
35 |         self.image_file_names = sorted(self.image_file_names)
36 |         self.image_file_names = np.array(self.image_file_names)[frames]
37 |         self.bboxes = bboxes
38 |         self.joints2d = joints2d
39 |         self.scale = scale
40 |         self.crop_size = crop_size
41 |         self.frames = frames
42 |         self.has_keypoints = True if joints2d is not None else False
43 | 
44 |         self.norm_joints2d = np.zeros_like(self.joints2d)
45 | 
46 |         if self.has_keypoints:
47 |             bboxes, time_pt1, time_pt2 = get_all_bbox_params(joints2d, vis_thresh=0.3)
48 |             bboxes[:, 2:] = 150. / bboxes[:, 2:]
49 |             self.bboxes = np.stack([bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 2]]).T
50 | 
51 |             self.image_file_names = self.image_file_names[time_pt1:time_pt2]
52 |             self.joints2d = joints2d[time_pt1:time_pt2]
53 |             self.frames = frames[time_pt1:time_pt2]
54 | 
55 |     def __len__(self):
56 |         return len(self.image_file_names)
57 | 
58 |     def __getitem__(self, idx):
59 |         img = cv2.cvtColor(cv2.imread(self.image_file_names[idx]), cv2.COLOR_BGR2RGB)
60 | 
61 |         bbox = self.bboxes[idx]
62 | 
63 |         j2d = self.joints2d[idx] if self.has_keypoints else None
64 | 
65 |         norm_img, raw_img, kp_2d = get_single_image_crop_demo(
66 |             img,
67 |             bbox,
68 |             kp_2d=j2d,
69 |             scale=self.scale,
70 |             crop_size=self.crop_size)
71 |         if self.has_keypoints:
72 |             return norm_img, kp_2d
73 |         else:
74 |             return norm_img
75 | 
76 | 
77 | class ImageFolder(Dataset):
78 |     def __init__(self, image_folder):
79 |         self.image_file_names = [
80 |             osp.join(image_folder, x)
81 |             for x in os.listdir(image_folder)
82 |             if x.endswith('.png') or x.endswith('.jpg')
83 |         ]
84 |         self.image_file_names = sorted(self.image_file_names)
85 | 
86 |     def __len__(self):
87 |         return len(self.image_file_names)
88 | 
89 |     def __getitem__(self, idx):
90 |         img = cv2.cvtColor(cv2.imread(self.image_file_names[idx]), cv2.COLOR_BGR2RGB)
91 |         return to_tensor(img)
92 | 


--------------------------------------------------------------------------------
/doc/train.md:
--------------------------------------------------------------------------------
  1 | # Training Instructions
  2 | 
  3 | Throughout the documentation we refer to VIBE root folder as `$ROOT`.
  4 | 
  5 | ## Data Preparation
  6 | During training, VIBE uses precomputed image features to reduce training time. Thus, we process the datasets into a
  7 | standard format before using them for training. To obtain these standard training files, you need to run:
  8 | 
  9 | ```shell script
 10 | source scripts/prepare_training_data.sh
 11 | ```
 12 | 
 13 | This script will first create a folder for the dataset files `$ROOT/data/vibe_db`, then process each dataset and save
 14 | output files to this directory. Before proceeding, you need to download each of the datasets listed
 15 | below, then modify the `--dir` argument in the script to point the
 16 | directory of each dataset.
 17 | 
 18 | 
 19 | 
 20 | ## Datasets
 21 | 
 22 | - **AMASS** (https://amass.is.tue.mpg.de)
 23 | 
 24 | Directory structure:
 25 | 
 26 | ```shell script
 27 | amass
 28 | |-- ACCAD
 29 | |-- BioMotionLab_NTroje
 30 | |-- CMU
 31 | |-- ...
 32 | `-- Transitions_mocap
 33 | ```
 34 | 
 35 | - **InstaVariety**
 36 | 
 37 | For your convenience, we uploaded the preprocessed InstaVariety data 
 38 | [here](https://owncloud.tuebingen.mpg.de/index.php/s/MKLnHtPjwn24y9C) (size: 18 GB). 
 39 | After downloading the file, put it under
 40 | `$ROOT/data/vibe_db`. Do not forget to verify checksum for sanity check: 
 41 | ```
 42 | md5sum    : 8ec335d1d48bd54687ad5c9a6eeb2999
 43 | sha256sum : 7eadff77043cd85b49cbba8bfc9111c4305792ca64da1b92fb40fa702689dfa9
 44 | ```
 45 | 
 46 | You may also preprocess the dataset yourself by downloading the 
 47 | [preprocessed tfrecords](https://github.com/akanazawa/human_dynamics/blob/master/doc/insta_variety.md#pre-processed-tfrecords) 
 48 | provided by the authors of Temporal HMR.
 49 | 
 50 | Directory structure:
 51 | ```shell script
 52 | insta_variety
 53 | |-- train
 54 | |   |-- insta_variety_00_copy00_hmr_noS5.ckpt-642561.tfrecord
 55 | |   |-- insta_variety_01_copy00_hmr_noS5.ckpt-642561.tfrecord
 56 | |   `-- ...
 57 | `-- test
 58 |     |-- insta_variety_00_copy00_hmr_noS5.ckpt-642561.tfrecord
 59 |     |-- insta_variety_01_copy00_hmr_noS5.ckpt-642561.tfrecord
 60 |     `-- ...
 61 | ```
 62 | 
 63 | - **MPI-3D-HP** (http://gvv.mpi-inf.mpg.de/3dhp-dataset)
 64 | 
 65 | Donwload the dataset using the bash script provided by the authors. We will be using standard cameras only, so wall and ceiling
 66 | cameras aren't needed. Then, run this 
 67 | [script](https://gist.github.com/mkocabas/cc6fe78aac51f97859e45f46476882b6) to extract frames of videos.
 68 | 
 69 | Directory structure:
 70 | ```shell script
 71 | 
 72 | mpi_inf_3dhp
 73 | |-- S1
 74 | |   |-- Seq1
 75 | |   |-- Seq2
 76 | |-- S2
 77 | |   |-- Seq1
 78 | |   |-- Seq2
 79 | |-- ...
 80 | `-- util
 81 | ```
 82 | 
 83 | - **3DPW** (https://virtualhumans.mpi-inf.mpg.de/3DPW)
 84 | 
 85 | Directory structure: 
 86 | ```shell script
 87 | 3dpw
 88 | |-- imageFiles
 89 | |   |-- courtyard_arguing_00
 90 | |   |-- courtyard_backpack_00
 91 | |   |-- ...
 92 | `-- sequenceFiles
 93 |     |-- test
 94 |     |-- train
 95 |     `-- validation
 96 | ```
 97 | 
 98 | - **PennAction** (http://dreamdragon.github.io/PennAction/)
 99 | 
100 | Directory structure: 
101 | ```shell script
102 | pennaction
103 | |-- frames
104 | |   |-- 0000
105 | |   |-- 0001
106 | |   |-- ...
107 | `-- labels
108 |     |-- 0000.mat
109 |     |-- 0001.mat
110 |     `-- ...
111 | ```
112 | 
113 | - **PoseTrack** (https://posetrack.net/)
114 | 
115 | Directory structure: 
116 | ```shell script
117 | posetrack
118 | |-- images
119 | |   |-- train
120 | |   |-- val
121 | |   |-- test
122 | `-- posetrack_data
123 |     `-- annotations
124 |         |-- train
125 |         |-- val
126 |         `-- test
127 | ```
128 | 
129 | 
130 | 
131 | ## Training
132 | Run the command below to start training.
133 | 
134 | ```shell script
135 | python train.py --cfg configs/config.yaml
136 | ```
137 | 
138 | See [`configs/config.yaml`](configs/config.yaml) or [`config.py`](lib/core/config.py) to 
139 | play with different configurations.
140 | 


--------------------------------------------------------------------------------
/lib/models/smpl.py:
--------------------------------------------------------------------------------
 1 | # This script is borrowed and extended from https://github.com/nkolot/SPIN/blob/master/models/hmr.py
 2 | # Adhere to their licence to use this script
 3 | 
 4 | import torch
 5 | import numpy as np
 6 | import os.path as osp
 7 | from smplx import SMPL as _SMPL
 8 | from smplx.body_models import ModelOutput
 9 | from smplx.lbs import vertices2joints
10 | 
11 | from lib.core.config import VIBE_DATA_DIR
12 | 
13 | # Map joints to SMPL joints
14 | JOINT_MAP = {
15 |     'OP Nose': 24, 'OP Neck': 12, 'OP RShoulder': 17,
16 |     'OP RElbow': 19, 'OP RWrist': 21, 'OP LShoulder': 16,
17 |     'OP LElbow': 18, 'OP LWrist': 20, 'OP MidHip': 0,
18 |     'OP RHip': 2, 'OP RKnee': 5, 'OP RAnkle': 8,
19 |     'OP LHip': 1, 'OP LKnee': 4, 'OP LAnkle': 7,
20 |     'OP REye': 25, 'OP LEye': 26, 'OP REar': 27,
21 |     'OP LEar': 28, 'OP LBigToe': 29, 'OP LSmallToe': 30,
22 |     'OP LHeel': 31, 'OP RBigToe': 32, 'OP RSmallToe': 33, 'OP RHeel': 34,
23 |     'Right Ankle': 8, 'Right Knee': 5, 'Right Hip': 45,
24 |     'Left Hip': 46, 'Left Knee': 4, 'Left Ankle': 7,
25 |     'Right Wrist': 21, 'Right Elbow': 19, 'Right Shoulder': 17,
26 |     'Left Shoulder': 16, 'Left Elbow': 18, 'Left Wrist': 20,
27 |     'Neck (LSP)': 47, 'Top of Head (LSP)': 48,
28 |     'Pelvis (MPII)': 49, 'Thorax (MPII)': 50,
29 |     'Spine (H36M)': 51, 'Jaw (H36M)': 52,
30 |     'Head (H36M)': 53, 'Nose': 24, 'Left Eye': 26,
31 |     'Right Eye': 25, 'Left Ear': 28, 'Right Ear': 27
32 | }
33 | JOINT_NAMES = [
34 |     'OP Nose', 'OP Neck', 'OP RShoulder',
35 |     'OP RElbow', 'OP RWrist', 'OP LShoulder',
36 |     'OP LElbow', 'OP LWrist', 'OP MidHip',
37 |     'OP RHip', 'OP RKnee', 'OP RAnkle',
38 |     'OP LHip', 'OP LKnee', 'OP LAnkle',
39 |     'OP REye', 'OP LEye', 'OP REar',
40 |     'OP LEar', 'OP LBigToe', 'OP LSmallToe',
41 |     'OP LHeel', 'OP RBigToe', 'OP RSmallToe', 'OP RHeel',
42 |     'Right Ankle', 'Right Knee', 'Right Hip',
43 |     'Left Hip', 'Left Knee', 'Left Ankle',
44 |     'Right Wrist', 'Right Elbow', 'Right Shoulder',
45 |     'Left Shoulder', 'Left Elbow', 'Left Wrist',
46 |     'Neck (LSP)', 'Top of Head (LSP)',
47 |     'Pelvis (MPII)', 'Thorax (MPII)',
48 |     'Spine (H36M)', 'Jaw (H36M)',
49 |     'Head (H36M)', 'Nose', 'Left Eye',
50 |     'Right Eye', 'Left Ear', 'Right Ear'
51 | ]
52 | 
53 | JOINT_IDS = {JOINT_NAMES[i]: i for i in range(len(JOINT_NAMES))}
54 | JOINT_REGRESSOR_TRAIN_EXTRA = osp.join(VIBE_DATA_DIR, 'J_regressor_extra.npy')
55 | SMPL_MEAN_PARAMS = osp.join(VIBE_DATA_DIR, 'smpl_mean_params.npz')
56 | SMPL_MODEL_DIR = VIBE_DATA_DIR
57 | H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
58 | H36M_TO_J14 = H36M_TO_J17[:14]
59 | 
60 | 
61 | class SMPL(_SMPL):
62 |     """ Extension of the official SMPL implementation to support more joints """
63 | 
64 |     def __init__(self, *args, **kwargs):
65 |         super(SMPL, self).__init__(*args, **kwargs)
66 |         joints = [JOINT_MAP[i] for i in JOINT_NAMES]
67 |         J_regressor_extra = np.load(JOINT_REGRESSOR_TRAIN_EXTRA)
68 |         self.register_buffer('J_regressor_extra', torch.tensor(J_regressor_extra, dtype=torch.float32))
69 |         self.joint_map = torch.tensor(joints, dtype=torch.long)
70 | 
71 |     def forward(self, *args, **kwargs):
72 |         kwargs['get_skin'] = True
73 |         smpl_output = super(SMPL, self).forward(*args, **kwargs)
74 |         extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices)
75 |         joints = torch.cat([smpl_output.joints, extra_joints], dim=1)
76 |         joints = joints[:, self.joint_map, :]
77 |         output = ModelOutput(vertices=smpl_output.vertices,
78 |                              global_orient=smpl_output.global_orient,
79 |                              body_pose=smpl_output.body_pose,
80 |                              joints=joints,
81 |                              betas=smpl_output.betas,
82 |                              full_pose=smpl_output.full_pose)
83 |         return output
84 | 
85 | 
86 | def get_smpl_faces():
87 |     smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
88 |     return smpl.faces


--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import argparse
 18 | from yacs.config import CfgNode as CN
 19 | 
 20 | # CONSTANTS
 21 | # You may modify them at will
 22 | VIBE_DB_DIR = 'data/vibe_db'
 23 | AMASS_DIR = 'data/amass'
 24 | INSTA_DIR = 'data/insta_variety'
 25 | MPII3D_DIR = 'data/mpi_inf_3dhp'
 26 | THREEDPW_DIR = 'data/3dpw'
 27 | PENNACTION_DIR = 'data/penn_action'
 28 | POSETRACK_DIR = 'data/posetrack'
 29 | VIBE_DATA_DIR = 'data/vibe_data'
 30 | 
 31 | # Configuration variables
 32 | cfg = CN()
 33 | 
 34 | cfg.OUTPUT_DIR = 'results'
 35 | cfg.EXP_NAME = 'default'
 36 | cfg.DEVICE = 'cuda'
 37 | cfg.DEBUG = True
 38 | cfg.LOGDIR = ''
 39 | cfg.NUM_WORKERS = 8
 40 | cfg.DEBUG_FREQ = 1000
 41 | cfg.SEED_VALUE = -1
 42 | 
 43 | cfg.CUDNN = CN()
 44 | cfg.CUDNN.BENCHMARK = True
 45 | cfg.CUDNN.DETERMINISTIC = False
 46 | cfg.CUDNN.ENABLED = True
 47 | 
 48 | cfg.TRAIN = CN()
 49 | cfg.TRAIN.DATASETS_2D = ['Insta']
 50 | cfg.TRAIN.DATASETS_3D = ['MPII3D']
 51 | cfg.TRAIN.DATASET_EVAL = 'ThreeDPW'
 52 | cfg.TRAIN.BATCH_SIZE = 32
 53 | cfg.TRAIN.DATA_2D_RATIO = 0.5
 54 | cfg.TRAIN.START_EPOCH = 0
 55 | cfg.TRAIN.END_EPOCH = 5
 56 | cfg.TRAIN.PRETRAINED_REGRESSOR = ''
 57 | cfg.TRAIN.PRETRAINED = ''
 58 | cfg.TRAIN.RESUME = ''
 59 | cfg.TRAIN.NUM_ITERS_PER_EPOCH = 1000
 60 | cfg.TRAIN.LR_PATIENCE = 5
 61 | 
 62 | # <====== generator optimizer
 63 | cfg.TRAIN.GEN_OPTIM = 'Adam'
 64 | cfg.TRAIN.GEN_LR = 1e-4
 65 | cfg.TRAIN.GEN_WD = 1e-4
 66 | cfg.TRAIN.GEN_MOMENTUM = 0.9
 67 | 
 68 | # <====== motion discriminator optimizer
 69 | cfg.TRAIN.MOT_DISCR = CN()
 70 | cfg.TRAIN.MOT_DISCR.OPTIM = 'SGD'
 71 | cfg.TRAIN.MOT_DISCR.LR = 1e-2
 72 | cfg.TRAIN.MOT_DISCR.WD = 1e-4
 73 | cfg.TRAIN.MOT_DISCR.MOMENTUM = 0.9
 74 | cfg.TRAIN.MOT_DISCR.UPDATE_STEPS = 1
 75 | cfg.TRAIN.MOT_DISCR.FEATURE_POOL = 'concat'
 76 | cfg.TRAIN.MOT_DISCR.HIDDEN_SIZE = 1024
 77 | cfg.TRAIN.MOT_DISCR.NUM_LAYERS = 1
 78 | cfg.TRAIN.MOT_DISCR.ATT = CN()
 79 | cfg.TRAIN.MOT_DISCR.ATT.SIZE = 1024
 80 | cfg.TRAIN.MOT_DISCR.ATT.LAYERS = 1
 81 | cfg.TRAIN.MOT_DISCR.ATT.DROPOUT = 0.1
 82 | 
 83 | cfg.DATASET = CN()
 84 | cfg.DATASET.SEQLEN = 20
 85 | cfg.DATASET.OVERLAP = 0.5
 86 | 
 87 | cfg.LOSS = CN()
 88 | cfg.LOSS.KP_2D_W = 60.
 89 | cfg.LOSS.KP_3D_W = 30.
 90 | cfg.LOSS.SHAPE_W = 0.001
 91 | cfg.LOSS.POSE_W = 1.0
 92 | cfg.LOSS.D_MOTION_LOSS_W = 1.
 93 | 
 94 | cfg.MODEL = CN()
 95 | 
 96 | cfg.MODEL.TEMPORAL_TYPE = 'gru'
 97 | 
 98 | # GRU model hyperparams
 99 | cfg.MODEL.TGRU = CN()
100 | cfg.MODEL.TGRU.NUM_LAYERS = 1
101 | cfg.MODEL.TGRU.ADD_LINEAR = False
102 | cfg.MODEL.TGRU.RESIDUAL = False
103 | cfg.MODEL.TGRU.HIDDEN_SIZE = 2048
104 | cfg.MODEL.TGRU.BIDIRECTIONAL = False
105 | 
106 | 
107 | def get_cfg_defaults():
108 |     """Get a yacs CfgNode object with default values for my_project."""
109 |     # Return a clone so that the defaults will not be altered
110 |     # This is for the "local variable" use pattern
111 |     return cfg.clone()
112 | 
113 | 
114 | def update_cfg(cfg_file):
115 |     cfg = get_cfg_defaults()
116 |     cfg.merge_from_file(cfg_file)
117 |     return cfg.clone()
118 | 
119 | 
120 | def parse_args():
121 |     parser = argparse.ArgumentParser()
122 |     parser.add_argument('--cfg', type=str, help='cfg file path')
123 | 
124 |     args = parser.parse_args()
125 |     print(args, end='\n\n')
126 | 
127 |     cfg_file = args.cfg
128 |     if args.cfg is not None:
129 |         cfg = update_cfg(args.cfg)
130 |     else:
131 |         cfg = get_cfg_defaults()
132 | 
133 |     return cfg, cfg_file
134 | 


--------------------------------------------------------------------------------
/lib/data_utils/feature_extractor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import torch
 19 | import torchvision
 20 | import numpy as np
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | from lib.utils.vis import batch_visualize_preds
 24 | from lib.data_utils.img_utils import get_single_image_crop, convert_cvimg_to_tensor
 25 | 
 26 | 
 27 | def extract_features(model, video, bbox, debug=False, batch_size=200, kp_2d=None, dataset=None, scale=1.3):
 28 |     '''
 29 |     :param model: pretrained HMR model, use lib/models/hmr.py:get_pretrained_hmr()
 30 |     :param video: video filename, torch.Tensor in shape (num_frames,W,H,C)
 31 |     :param bbox: bbox array in shape (T,4)
 32 |     :param debug: boolean, true if you want to debug HMR predictions
 33 |     :param batch_size: batch size for HMR input
 34 |     :return: features: resnet50 features np.ndarray -> shape (num_frames, 4)
 35 |     '''
 36 |     device = 'cuda'
 37 | 
 38 |     if isinstance(video, torch.Tensor) or isinstance(video, np.ndarray):
 39 |         video = video
 40 |     elif isinstance(video, str):
 41 |         if os.path.isfile(video):
 42 |             video, _, _ = torchvision.io.read_video(video)
 43 |         else:
 44 |             raise ValueError(f'{video} is not a valid file.')
 45 |     else:
 46 |         raise ValueError(f'Unknown type {type(video)} for video object')
 47 | 
 48 |     # For debugging ground truth 2d keypoints
 49 |     if debug and kp_2d is not None:
 50 |         import cv2
 51 |         if isinstance(video[0], np.str_):
 52 |             print(video[0])
 53 |             frame = cv2.cvtColor(cv2.imread(video[0]), cv2.COLOR_BGR2RGB)
 54 |         elif isinstance(video[0], np.ndarray):
 55 |             frame = video[0]
 56 |         else:
 57 |             frame = video[0].numpy()
 58 |         for i in range(kp_2d.shape[1]):
 59 |             frame = cv2.circle(
 60 |                 frame.copy(),
 61 |                 (int(kp_2d[0,i,0]), int(kp_2d[0,i,1])),
 62 |                 thickness=3,
 63 |                 color=(255,0,0),
 64 |                 radius=3,
 65 |             )
 66 | 
 67 |         plt.imshow(frame)
 68 |         plt.show()
 69 | 
 70 |     if dataset == 'insta':
 71 |         video = torch.cat(
 72 |             [convert_cvimg_to_tensor(image).unsqueeze(0) for image in video], dim=0
 73 |         ).to(device)
 74 |     else:
 75 |         # crop bbox locations
 76 |         video = torch.cat(
 77 |             [get_single_image_crop(image, bbox, scale=scale).unsqueeze(0) for image, bbox in zip(video, bbox)], dim=0
 78 |         ).to(device)
 79 | 
 80 |     features = []
 81 | 
 82 |     # split video into batches of frames
 83 |     frames = torch.split(video, batch_size)
 84 | 
 85 |     with torch.no_grad():
 86 |         for images in frames:
 87 | 
 88 |             if not debug:
 89 |                 pred = model.feature_extractor(images)
 90 |                 features.append(pred.cpu())
 91 |                 del pred, images
 92 |             else:
 93 |                 preds = model(images)
 94 |                 dataset = 'spin' # dataset if dataset else 'common'
 95 |                 result_image = batch_visualize_preds(
 96 |                     images,
 97 |                     preds[-1],
 98 |                     target_exists=False,
 99 |                     max_images=4,
100 |                     dataset=dataset,
101 |                 )
102 | 
103 |                 plt.figure(figsize=(19.2, 10.8))
104 |                 plt.axis('off')
105 |                 plt.imshow(result_image)
106 |                 plt.show()
107 | 
108 |                 del preds, images
109 |                 return 0
110 | 
111 |         features = torch.cat(features, dim=0)
112 | 
113 |     return features.numpy()
114 | 


--------------------------------------------------------------------------------
/lib/data_utils/penn_action_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import sys
 18 | sys.path.append('.')
 19 | 
 20 | import glob
 21 | import torch
 22 | import joblib
 23 | import argparse
 24 | from tqdm import tqdm
 25 | import os.path as osp
 26 | from skimage import io
 27 | from scipy.io import loadmat
 28 | 
 29 | from lib.models import spin
 30 | from lib.data_utils.kp_utils import *
 31 | from lib.core.config import VIBE_DB_DIR
 32 | from lib.data_utils.img_utils import get_bbox_from_kp2d
 33 | from lib.data_utils.feature_extractor import extract_features
 34 | 
 35 | 
 36 | def calc_kpt_bound(kp_2d):
 37 |     MAX_COORD = 10000
 38 |     x = kp_2d[:, 0]
 39 |     y = kp_2d[:, 1]
 40 |     z = kp_2d[:, 2]
 41 |     u = MAX_COORD
 42 |     d = -1
 43 |     l = MAX_COORD
 44 |     r = -1
 45 |     for idx, vis in enumerate(z):
 46 |         if vis == 0:  # skip invisible joint
 47 |             continue
 48 |         u = min(u, y[idx])
 49 |         d = max(d, y[idx])
 50 |         l = min(l, x[idx])
 51 |         r = max(r, x[idx])
 52 |     return u, d, l, r
 53 | 
 54 | 
 55 | def load_mat(path):
 56 |     mat = loadmat(path)
 57 |     del mat['pose'], mat['__header__'], mat['__globals__'], mat['__version__'], mat['train'], mat['action']
 58 |     mat['nframes'] = mat['nframes'][0][0]
 59 | 
 60 |     return mat
 61 | 
 62 | 
 63 | def read_data(folder):
 64 |     dataset = {
 65 |         'img_name' : [],
 66 |         'joints2D': [],
 67 |         'bbox': [],
 68 |         'vid_name': [],
 69 |         'features': [],
 70 |     }
 71 | 
 72 |     model = spin.get_pretrained_hmr()
 73 | 
 74 |     file_names = sorted(glob.glob(folder + '/labels/'+'*.mat'))
 75 | 
 76 |     for fname in tqdm(file_names):
 77 |         vid_dict=load_mat(fname)
 78 |         imgs = sorted(glob.glob(folder + '/frames/'+ fname.strip().split('/')[-1].split('.')[0]+'/*.jpg'))
 79 |         kp_2d = np.zeros((vid_dict['nframes'], 13, 3))
 80 |         perm_idxs = get_perm_idxs('pennaction', 'common')
 81 | 
 82 |         kp_2d[:, :, 0] = vid_dict['x']
 83 |         kp_2d[:, :, 1] = vid_dict['y']
 84 |         kp_2d[:, :, 2] = vid_dict['visibility']
 85 |         kp_2d = kp_2d[:, perm_idxs, :]
 86 | 
 87 |         # fix inconsistency
 88 |         n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3))
 89 |         n_kp_2d[:, :12, :] = kp_2d[:, :-1, :]
 90 |         n_kp_2d[:, 13, :] = kp_2d[:, 12, :]
 91 |         kp_2d = n_kp_2d
 92 | 
 93 |         bbox = np.zeros((vid_dict['nframes'], 4))
 94 | 
 95 |         for fr_id, fr in enumerate(kp_2d):
 96 |             u, d, l, r = calc_kpt_bound(fr)
 97 |             center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32)
 98 |             c_x, c_y = center[0], center[1]
 99 |             w, h = r - l, d - u
100 |             w = h = np.where(w / h > 1, w, h)
101 | 
102 |             bbox[fr_id,:] = np.array([c_x, c_y, w, h])
103 | 
104 |         dataset['vid_name'].append(np.array([f'{fname}']* vid_dict['nframes']))
105 |         dataset['img_name'].append(np.array(imgs))
106 |         dataset['joints2D'].append(kp_2d)
107 |         dataset['bbox'].append(bbox)
108 | 
109 |         features = extract_features(model, np.array(imgs) , bbox, dataset='pennaction', debug=False)
110 |         dataset['features'].append(features)
111 | 
112 |     for k in dataset.keys():
113 |         dataset[k] = np.array(dataset[k])
114 |     for k in dataset.keys():
115 |         dataset[k] = np.concatenate(dataset[k])
116 | 
117 |     return dataset
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     parser = argparse.ArgumentParser()
122 |     parser.add_argument('--dir', type=str, help='dataset directory', default='data/pennaction')
123 |     args = parser.parse_args()
124 | 
125 |     dataset = read_data(args.dir)
126 |     joblib.dump(dataset, osp.join(VIBE_DB_DIR, 'pennaction_train_db.pt'))
127 | 
128 | 


--------------------------------------------------------------------------------
/lib/utils/smooth_bbox.py:
--------------------------------------------------------------------------------
  1 | # This script is borrowed from https://github.com/akanazawa/human_dynamics/blob/master/src/util/smooth_bbox.py
  2 | # Adhere to their licence to use this script
  3 | 
  4 | import numpy as np
  5 | import scipy.signal as signal
  6 | from scipy.ndimage.filters import gaussian_filter1d
  7 | 
  8 | 
  9 | def get_smooth_bbox_params(kps, vis_thresh=2, kernel_size=11, sigma=3):
 10 |     """
 11 |     Computes smooth bounding box parameters from keypoints:
 12 |       1. Computes bbox by rescaling the person to be around 150 px.
 13 |       2. Linearly interpolates bbox params for missing annotations.
 14 |       3. Median filtering
 15 |       4. Gaussian filtering.
 16 | 
 17 |     Recommended thresholds:
 18 |       * detect-and-track: 0
 19 |       * 3DPW: 0.1
 20 | 
 21 |     Args:
 22 |         kps (list): List of kps (Nx3) or None.
 23 |         vis_thresh (float): Threshold for visibility.
 24 |         kernel_size (int): Kernel size for median filtering (must be odd).
 25 |         sigma (float): Sigma for gaussian smoothing.
 26 | 
 27 |     Returns:
 28 |         Smooth bbox params [cx, cy, scale], start index, end index
 29 |     """
 30 |     bbox_params, start, end = get_all_bbox_params(kps, vis_thresh)
 31 |     smoothed = smooth_bbox_params(bbox_params, kernel_size, sigma)
 32 |     smoothed = np.vstack((np.zeros((start, 3)), smoothed))
 33 |     return smoothed, start, end
 34 | 
 35 | 
 36 | def kp_to_bbox_param(kp, vis_thresh):
 37 |     """
 38 |     Finds the bounding box parameters from the 2D keypoints.
 39 | 
 40 |     Args:
 41 |         kp (Kx3): 2D Keypoints.
 42 |         vis_thresh (float): Threshold for visibility.
 43 | 
 44 |     Returns:
 45 |         [center_x, center_y, scale]
 46 |     """
 47 |     if kp is None:
 48 |         return
 49 |     vis = kp[:, 2] > vis_thresh
 50 |     if not np.any(vis):
 51 |         return
 52 |     min_pt = np.min(kp[vis, :2], axis=0)
 53 |     max_pt = np.max(kp[vis, :2], axis=0)
 54 |     person_height = np.linalg.norm(max_pt - min_pt)
 55 |     if person_height < 0.5:
 56 |         return
 57 |     center = (min_pt + max_pt) / 2.
 58 |     scale = 150. / person_height
 59 |     return np.append(center, scale)
 60 | 
 61 | 
 62 | def get_all_bbox_params(kps, vis_thresh=2):
 63 |     """
 64 |     Finds bounding box parameters for all keypoints.
 65 | 
 66 |     Look for sequences in the middle with no predictions and linearly
 67 |     interpolate the bbox params for those
 68 | 
 69 |     Args:
 70 |         kps (list): List of kps (Kx3) or None.
 71 |         vis_thresh (float): Threshold for visibility.
 72 | 
 73 |     Returns:
 74 |         bbox_params, start_index (incl), end_index (excl)
 75 |     """
 76 |     # keeps track of how many indices in a row with no prediction
 77 |     num_to_interpolate = 0
 78 |     start_index = -1
 79 |     bbox_params = np.empty(shape=(0, 3), dtype=np.float32)
 80 | 
 81 |     for i, kp in enumerate(kps):
 82 |         bbox_param = kp_to_bbox_param(kp, vis_thresh=vis_thresh)
 83 |         if bbox_param is None:
 84 |             num_to_interpolate += 1
 85 |             continue
 86 | 
 87 |         if start_index == -1:
 88 |             # Found the first index with a prediction!
 89 |             start_index = i
 90 |             num_to_interpolate = 0
 91 | 
 92 |         if num_to_interpolate > 0:
 93 |             # Linearly interpolate each param.
 94 |             previous = bbox_params[-1]
 95 |             # This will be 3x(n+2)
 96 |             interpolated = np.array(
 97 |                 [np.linspace(prev, curr, num_to_interpolate + 2)
 98 |                  for prev, curr in zip(previous, bbox_param)])
 99 |             bbox_params = np.vstack((bbox_params, interpolated.T[1:-1]))
100 |             num_to_interpolate = 0
101 |         bbox_params = np.vstack((bbox_params, bbox_param))
102 | 
103 |     return bbox_params, start_index, i - num_to_interpolate + 1
104 | 
105 | 
106 | def smooth_bbox_params(bbox_params, kernel_size=11, sigma=8):
107 |     """
108 |     Applies median filtering and then gaussian filtering to bounding box
109 |     parameters.
110 | 
111 |     Args:
112 |         bbox_params (Nx3): [cx, cy, scale].
113 |         kernel_size (int): Kernel size for median filtering (must be odd).
114 |         sigma (float): Sigma for gaussian smoothing.
115 | 
116 |     Returns:
117 |         Smoothed bounding box parameters (Nx3).
118 |     """
119 |     smoothed = np.array([signal.medfilt(param, kernel_size)
120 |                          for param in bbox_params.T]).T
121 |     return np.array([gaussian_filter1d(traj, sigma) for traj in smoothed.T]).T
122 | 


--------------------------------------------------------------------------------
/lib/utils/renderer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import math
 18 | import trimesh
 19 | import pyrender
 20 | import numpy as np
 21 | from pyrender.constants import RenderFlags
 22 | from lib.models.smpl import get_smpl_faces
 23 | 
 24 | 
 25 | class WeakPerspectiveCamera(pyrender.Camera):
 26 |     def __init__(self,
 27 |                  scale,
 28 |                  translation,
 29 |                  znear=pyrender.camera.DEFAULT_Z_NEAR,
 30 |                  zfar=None,
 31 |                  name=None):
 32 |         super(WeakPerspectiveCamera, self).__init__(
 33 |             znear=znear,
 34 |             zfar=zfar,
 35 |             name=name,
 36 |         )
 37 |         self.scale = scale
 38 |         self.translation = translation
 39 | 
 40 |     def get_projection_matrix(self, width=None, height=None):
 41 |         P = np.eye(4)
 42 |         P[0, 0] = self.scale[0]
 43 |         P[1, 1] = self.scale[1]
 44 |         P[0, 3] = self.translation[0] * self.scale[0]
 45 |         P[1, 3] = -self.translation[1] * self.scale[1]
 46 |         P[2, 2] = -1
 47 |         return P
 48 | 
 49 | 
 50 | class Renderer:
 51 |     def __init__(self, resolution=(224,224), orig_img=False, wireframe=False):
 52 |         self.resolution = resolution
 53 | 
 54 |         self.faces = get_smpl_faces()
 55 |         self.orig_img = orig_img
 56 |         self.wireframe = wireframe
 57 |         self.renderer = pyrender.OffscreenRenderer(
 58 |             viewport_width=self.resolution[0],
 59 |             viewport_height=self.resolution[1],
 60 |             point_size=1.0
 61 |         )
 62 | 
 63 |         # set the scene
 64 |         self.scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0], ambient_light=(0.3, 0.3, 0.3))
 65 | 
 66 |         light = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=1)
 67 | 
 68 |         light_pose = np.eye(4)
 69 |         light_pose[:3, 3] = [0, -1, 1]
 70 |         self.scene.add(light, pose=light_pose)
 71 | 
 72 |         light_pose[:3, 3] = [0, 1, 1]
 73 |         self.scene.add(light, pose=light_pose)
 74 | 
 75 |         light_pose[:3, 3] = [1, 1, 2]
 76 |         self.scene.add(light, pose=light_pose)
 77 | 
 78 |     def render(self, img, verts, cam, angle=None, axis=None, mesh_filename=None, color=[1.0, 1.0, 0.9]):
 79 | 
 80 |         mesh = trimesh.Trimesh(vertices=verts, faces=self.faces, process=False)
 81 | 
 82 |         Rx = trimesh.transformations.rotation_matrix(math.radians(180), [1, 0, 0])
 83 |         mesh.apply_transform(Rx)
 84 | 
 85 |         if mesh_filename is not None:
 86 |             mesh.export(mesh_filename)
 87 | 
 88 |         if angle and axis:
 89 |             R = trimesh.transformations.rotation_matrix(math.radians(angle), axis)
 90 |             mesh.apply_transform(R)
 91 | 
 92 |         sx, sy, tx, ty = cam
 93 | 
 94 |         camera = WeakPerspectiveCamera(
 95 |             scale=[sx, sy],
 96 |             translation=[tx, ty],
 97 |             zfar=1000.
 98 |         )
 99 | 
100 |         material = pyrender.MetallicRoughnessMaterial(
101 |             metallicFactor=0.0,
102 |             alphaMode='OPAQUE',
103 |             baseColorFactor=(color[0], color[1], color[2], 1.0)
104 |         )
105 | 
106 |         mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
107 | 
108 |         mesh_node = self.scene.add(mesh, 'mesh')
109 | 
110 |         camera_pose = np.eye(4)
111 |         cam_node = self.scene.add(camera, pose=camera_pose)
112 | 
113 |         if self.wireframe:
114 |             render_flags = RenderFlags.RGBA | RenderFlags.ALL_WIREFRAME
115 |         else:
116 |             render_flags = RenderFlags.RGBA
117 | 
118 |         rgb, _ = self.renderer.render(self.scene, flags=render_flags)
119 |         valid_mask = (rgb[:, :, -1] > 0)[:, :, np.newaxis]
120 |         output_img = rgb[:, :, :-1] * valid_mask + (1 - valid_mask) * img
121 |         image = output_img.astype(np.uint8)
122 | 
123 |         self.scene.remove_node(mesh_node)
124 |         self.scene.remove_node(cam_node)
125 | 
126 |         return image
127 | 


--------------------------------------------------------------------------------
/lib/data_utils/amass_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import joblib
 19 | import argparse
 20 | import numpy as np
 21 | import os.path as osp
 22 | from tqdm import tqdm
 23 | 
 24 | from lib.core.config import VIBE_DB_DIR
 25 | 
 26 | dict_keys = ['betas', 'dmpls', 'gender', 'mocap_framerate', 'poses', 'trans']
 27 | 
 28 | # extract SMPL joints from SMPL-H model
 29 | joints_to_use = np.array([
 30 |     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
 31 |     11, 12, 13, 14, 15, 16, 17, 18, 19,
 32 |     20, 21, 22, 37
 33 | ])
 34 | joints_to_use = np.arange(0,156).reshape((-1,3))[joints_to_use].reshape(-1)
 35 | 
 36 | all_sequences = [
 37 |     'ACCAD',
 38 |     'BioMotionLab_NTroje',
 39 |     'CMU',
 40 |     'EKUT',
 41 |     'Eyes_Japan_Dataset',
 42 |     'HumanEva',
 43 |     'KIT',
 44 |     'MPI_HDM05',
 45 |     'MPI_Limits',
 46 |     'MPI_mosh',
 47 |     'SFU',
 48 |     'SSM_synced',
 49 |     'TCD_handMocap',
 50 |     'TotalCapture',
 51 |     'Transitions_mocap',
 52 | ]
 53 | 
 54 | def read_data(folder, sequences):
 55 |     # sequences = [osp.join(folder, x) for x in sorted(os.listdir(folder)) if osp.isdir(osp.join(folder, x))]
 56 | 
 57 |     if sequences == 'all':
 58 |         sequences = all_sequences
 59 | 
 60 |     db = {
 61 |         'theta': [],
 62 |         'vid_name': [],
 63 |     }
 64 | 
 65 |     for seq_name in sequences:
 66 |         print(f'Reading {seq_name} sequence...')
 67 |         seq_folder = osp.join(folder, seq_name)
 68 | 
 69 |         thetas, vid_names = read_single_sequence(seq_folder, seq_name)
 70 |         seq_name_list = np.array([seq_name]*thetas.shape[0])
 71 |         print(seq_name, 'number of videos', thetas.shape[0])
 72 |         db['theta'].append(thetas)
 73 |         db['vid_name'].append(vid_names)
 74 | 
 75 |     db['theta'] = np.concatenate(db['theta'], axis=0)
 76 |     db['vid_name'] = np.concatenate(db['vid_name'], axis=0)
 77 | 
 78 |     return db
 79 | 
 80 | 
 81 | 
 82 | def read_single_sequence(folder, seq_name):
 83 |     subjects = os.listdir(folder)
 84 | 
 85 |     thetas = []
 86 |     vid_names = []
 87 | 
 88 |     for subject in tqdm(subjects):
 89 |         actions = [x for x in os.listdir(osp.join(folder, subject)) if x.endswith('.npz')]
 90 | 
 91 |         for action in actions:
 92 |             fname = osp.join(folder, subject, action)
 93 |             
 94 |             if fname.endswith('shape.npz'):
 95 |                 continue
 96 |                 
 97 |             data = np.load(fname)
 98 |                 
 99 |             pose = data['poses'][:, joints_to_use]
100 | 
101 |             if pose.shape[0] < 60:
102 |                 continue
103 | 
104 |             shape = np.repeat(data['betas'][:10][np.newaxis], pose.shape[0], axis=0)
105 |             theta = np.concatenate([pose,shape], axis=1)
106 |             vid_name = np.array([f'{seq_name}_{subject}_{action[:-4]}']*pose.shape[0])
107 | 
108 |             vid_names.append(vid_name)
109 |             thetas.append(theta)
110 | 
111 |     return np.concatenate(thetas, axis=0), np.concatenate(vid_names, axis=0)
112 | 
113 | 
114 | def read_seq_data(folder, nsubjects, fps):
115 |     subjects = os.listdir(folder)
116 |     sequences = {}
117 | 
118 |     assert nsubjects < len(subjects), 'nsubjects should be less than len(subjects)'
119 | 
120 |     for subject in subjects[:nsubjects]:
121 |         actions = os.listdir(osp.join(folder, subject))
122 | 
123 |         for action in actions:
124 |             data = np.load(osp.join(folder, subject, action))
125 |             mocap_framerate = int(data['mocap_framerate'])
126 |             sampling_freq = mocap_framerate // fps
127 |             sequences[(subject, action)] = data['poses'][0::sampling_freq, joints_to_use]
128 | 
129 |     train_set = {}
130 |     test_set = {}
131 | 
132 |     for i, (k,v) in enumerate(sequences.items()):
133 |         if i < len(sequences.keys()) - len(sequences.keys()) // 4:
134 |             train_set[k] = v
135 |         else:
136 |             test_set[k] = v
137 | 
138 |     return train_set, test_set
139 | 
140 | if __name__ == '__main__':
141 |     parser = argparse.ArgumentParser()
142 |     parser.add_argument('--dir', type=str, help='dataset directory', default='data/amass')
143 |     args = parser.parse_args()
144 | 
145 |     db = read_data(args.dir, sequences=all_sequences)
146 |     db_file = osp.join(VIBE_DB_DIR, 'amass_db.pt')
147 |     print(f'Saving AMASS dataset to {db_file}')
148 |     joblib.dump(db, db_file)
149 | 


--------------------------------------------------------------------------------
/lib/dataset/dataset_2d.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import torch
 19 | import random
 20 | import logging
 21 | import numpy as np
 22 | import os.path as osp
 23 | import joblib
 24 | 
 25 | from torch.utils.data import Dataset
 26 | 
 27 | from lib.core.config import VIBE_DB_DIR
 28 | from lib.data_utils.kp_utils import convert_kps
 29 | from lib.data_utils.img_utils import normalize_2d_kp, transfrom_keypoints, split_into_chunks
 30 | 
 31 | logger = logging.getLogger(__name__)
 32 | 
 33 | class Dataset2D(Dataset):
 34 |     def __init__(self, seqlen, overlap=0.,
 35 |                  folder=None, dataset_name=None, debug=False):
 36 | 
 37 |         self.folder = folder
 38 |         self.dataset_name = dataset_name
 39 |         self.seqlen = seqlen
 40 |         self.stride = int(seqlen * (1-overlap))
 41 |         self.debug = debug
 42 |         self.db = self.load_db()
 43 |         self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
 44 | 
 45 | 
 46 |     def __len__(self):
 47 |         return len(self.vid_indices)
 48 | 
 49 |     def __getitem__(self, index):
 50 |         return self.get_single_item(index)
 51 | 
 52 |     def load_db(self):
 53 |         set = 'train'
 54 | 
 55 |         db_file = osp.join(VIBE_DB_DIR, f'{self.dataset_name}_{set}_db.pt')
 56 | 
 57 |         if osp.isfile(db_file):
 58 |             db = joblib.load(db_file)
 59 |         else:
 60 |             raise ValueError(f'{db_file} do not exists')
 61 | 
 62 |         print(f'Loaded {self.dataset_name} dataset from {db_file}')
 63 |         return db
 64 | 
 65 |     def get_single_item(self, index):
 66 |         start_index, end_index = self.vid_indices[index]
 67 | 
 68 |         kp_2d = self.db['joints2D'][start_index:end_index+1]
 69 |         if self.dataset_name != 'posetrack':
 70 |             kp_2d = convert_kps(kp_2d, src=self.dataset_name, dst='spin')
 71 |         kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
 72 | 
 73 |         bbox  = self.db['bbox'][start_index:end_index+1]
 74 | 
 75 |         input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()
 76 | 
 77 | 
 78 |         for idx in range(self.seqlen):
 79 |             # crop image and transform 2d keypoints
 80 |             kp_2d[idx,:,:2], trans = transfrom_keypoints(
 81 |                 kp_2d=kp_2d[idx,:,:2],
 82 |                 center_x=bbox[idx,0],
 83 |                 center_y=bbox[idx,1],
 84 |                 width=bbox[idx,2],
 85 |                 height=bbox[idx,3],
 86 |                 patch_width=224,
 87 |                 patch_height=224,
 88 |                 do_augment=False,
 89 |             )
 90 | 
 91 |             kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
 92 |             kp_2d_tensor[idx] = kp_2d[idx]
 93 | 
 94 |         vid_name = self.db['vid_name'][start_index:end_index+1]
 95 |         frame_id = self.db['img_name'][start_index:end_index+1].astype(str)
 96 |         instance_id = np.array([v+f for v,f in zip(vid_name, frame_id)])
 97 | 
 98 |         target = {
 99 |             'features': input,
100 |             'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
101 |             # 'instance_id': instance_id,
102 |         }
103 | 
104 |         if self.debug:
105 |             from lib.data_utils.img_utils import get_single_image_crop
106 | 
107 |             vid_name = self.db['vid_name'][start_index]
108 | 
109 |             if self.dataset_name == 'pennaction':
110 |                 vid_folder = "frames"
111 |                 vid_name = vid_name.split('/')[-1].split('.')[0]
112 |                 img_id = "img_name"
113 |             elif self.dataset_name == 'posetrack':
114 |                 vid_folder = osp.join('images', vid_name.split('/')[-2])
115 |                 vid_name = vid_name.split('/')[-1].split('.')[0]
116 |                 img_id = "img_name"
117 |             else:
118 |                 vid_name = '_'.join(vid_name.split('_')[:-1])
119 |                 vid_folder = 'imageFiles'
120 |                 img_id= 'frame_id'
121 |             f = osp.join(self.folder, vid_folder, vid_name)
122 |             video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')]
123 |             frame_idxs = self.db[img_id][start_index:end_index + 1]
124 |             if self.dataset_name == 'pennaction' or self.dataset_name == 'posetrack':
125 |                 video = frame_idxs
126 |             else:
127 |                 video = [video_file_list[i] for i in frame_idxs]
128 | 
129 |             video = torch.cat(
130 |                 [get_single_image_crop(image, bbox).unsqueeze(0) for image, bbox in zip(video, bbox)], dim=0
131 |             )
132 | 
133 |             target['video'] = video
134 | 
135 |         return target
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import yaml
 19 | import time
 20 | import torch
 21 | import shutil
 22 | import logging
 23 | import operator
 24 | from tqdm import tqdm
 25 | from os import path as osp
 26 | from functools import reduce
 27 | from typing import List, Union
 28 | 
 29 | 
 30 | def move_dict_to_device(dict, device, tensor2float=False):
 31 |     for k,v in dict.items():
 32 |         if isinstance(v, torch.Tensor):
 33 |             if tensor2float:
 34 |                 dict[k] = v.float().to(device)
 35 |             else:
 36 |                 dict[k] = v.to(device)
 37 | 
 38 | 
 39 | def get_from_dict(dict, keys):
 40 |     return reduce(operator.getitem, keys, dict)
 41 | 
 42 | 
 43 | def tqdm_enumerate(iter):
 44 |     i = 0
 45 |     for y in tqdm(iter):
 46 |         yield i, y
 47 |         i += 1
 48 | 
 49 | 
 50 | def iterdict(d):
 51 |     for k,v in d.items():
 52 |         if isinstance(v, dict):
 53 |             d[k] = dict(v)
 54 |             iterdict(v)
 55 |     return d
 56 | 
 57 | 
 58 | def accuracy(output, target):
 59 |     _, pred = output.topk(1)
 60 |     pred = pred.view(-1)
 61 | 
 62 |     correct = pred.eq(target).sum()
 63 | 
 64 |     return correct.item(), target.size(0) - correct.item()
 65 | 
 66 | 
 67 | def lr_decay(optimizer, step, lr, decay_step, gamma):
 68 |     lr = lr * gamma ** (step/decay_step)
 69 |     for param_group in optimizer.param_groups:
 70 |         param_group['lr'] = lr
 71 |     return lr
 72 | 
 73 | 
 74 | def step_decay(optimizer, step, lr, decay_step, gamma):
 75 |     lr = lr * gamma ** (step / decay_step)
 76 |     for param_group in optimizer.param_groups:
 77 |         param_group['lr'] = lr
 78 |     return lr
 79 | 
 80 | 
 81 | def read_yaml(filename):
 82 |     return yaml.load(open(filename, 'r'))
 83 | 
 84 | 
 85 | def write_yaml(filename, object):
 86 |     with open(filename, 'w') as f:
 87 |         yaml.dump(object, f)
 88 | 
 89 | 
 90 | def save_dict_to_yaml(obj, filename, mode='w'):
 91 |     with open(filename, mode) as f:
 92 |         yaml.dump(obj, f, default_flow_style=False)
 93 | 
 94 | 
 95 | def save_to_file(obj, filename, mode='w'):
 96 |     with open(filename, mode) as f:
 97 |         f.write(obj)
 98 | 
 99 | 
100 | def concatenate_dicts(dict_list, dim=0):
101 |     rdict = dict.fromkeys(dict_list[0].keys())
102 |     for k in rdict.keys():
103 |         rdict[k] = torch.cat([d[k] for d in dict_list], dim=dim)
104 |     return rdict
105 | 
106 | 
107 | def bool_to_string(x: Union[List[bool],bool]) ->  Union[List[str],str]:
108 |     """
109 |     boolean to string conversion
110 |     :param x: list or bool to be converted
111 |     :return: string converted thing
112 |     """
113 |     if isinstance(x, bool):
114 |         return [str(x)]
115 |     for i, j in enumerate(x):
116 |         x[i]=str(j)
117 |     return x
118 | 
119 | 
120 | def checkpoint2model(checkpoint, key='gen_state_dict'):
121 |     state_dict = checkpoint[key]
122 |     print(f'Performance of loaded model on 3DPW is {checkpoint["performance"]:.2f}mm')
123 |     # del state_dict['regressor.mean_theta']
124 |     return state_dict
125 | 
126 | 
127 | def get_optimizer(model, optim_type, lr, weight_decay, momentum):
128 |     if optim_type in ['sgd', 'SGD']:
129 |         opt = torch.optim.SGD(lr=lr, params=model.parameters(), momentum=momentum)
130 |     elif optim_type in ['Adam', 'adam', 'ADAM']:
131 |         opt = torch.optim.Adam(lr=lr, params=model.parameters(), weight_decay=weight_decay)
132 |     else:
133 |         raise ModuleNotFoundError
134 |     return opt
135 | 
136 | 
137 | def create_logger(logdir, phase='train'):
138 |     os.makedirs(logdir, exist_ok=True)
139 | 
140 |     log_file = osp.join(logdir, f'{phase}_log.txt')
141 | 
142 |     head = '%(asctime)-15s %(message)s'
143 |     logging.basicConfig(filename=log_file,
144 |                         format=head)
145 |     logger = logging.getLogger()
146 |     logger.setLevel(logging.INFO)
147 |     console = logging.StreamHandler()
148 |     logging.getLogger('').addHandler(console)
149 | 
150 |     return logger
151 | 
152 | 
153 | class AverageMeter(object):
154 |     def __init__(self):
155 |         self.val = 0
156 |         self.avg = 0
157 |         self.sum = 0
158 |         self.count = 0
159 | 
160 |     def update(self, val, n=1):
161 |         self.val = val
162 |         self.sum += val * n
163 |         self.count += n
164 |         self.avg = self.sum / self.count
165 | 
166 | 
167 | def prepare_output_dir(cfg, cfg_file):
168 | 
169 |     # ==== create logdir
170 |     logtime = time.strftime('%d-%m-%Y_%H-%M-%S')
171 |     logdir = f'{logtime}_{cfg.EXP_NAME}'
172 | 
173 |     logdir = osp.join(cfg.OUTPUT_DIR, logdir)
174 |     os.makedirs(logdir, exist_ok=True)
175 |     shutil.copy(src=cfg_file, dst=osp.join(cfg.OUTPUT_DIR, 'config.yaml'))
176 | 
177 |     cfg.LOGDIR = logdir
178 | 
179 |     # save config
180 |     save_dict_to_yaml(cfg, osp.join(cfg.LOGDIR, 'config.yaml'))
181 | 
182 |     return cfg
183 | 


--------------------------------------------------------------------------------
/lib/core/evaluate.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import time
 18 | import torch
 19 | import shutil
 20 | import logging
 21 | import numpy as np
 22 | import os.path as osp
 23 | from progress.bar import Bar
 24 | 
 25 | from lib.core.config import VIBE_DATA_DIR
 26 | from lib.utils.utils import move_dict_to_device, AverageMeter
 27 | 
 28 | from lib.utils.eval_utils import (
 29 |     compute_accel,
 30 |     compute_error_accel,
 31 |     compute_error_verts,
 32 |     batch_compute_similarity_transform_torch,
 33 | )
 34 | 
 35 | logger = logging.getLogger(__name__)
 36 | 
 37 | class Evaluator():
 38 |     def __init__(
 39 |             self,
 40 |             test_loader,
 41 |             model,
 42 |             device=None,
 43 |     ):
 44 |         self.test_loader = test_loader
 45 |         self.model = model
 46 |         self.device = device
 47 | 
 48 |         self.evaluation_accumulators = dict.fromkeys(['pred_j3d', 'target_j3d', 'target_theta', 'pred_verts'])
 49 | 
 50 |         if self.device is None:
 51 |             self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 52 | 
 53 |     def validate(self):
 54 |         self.model.eval()
 55 | 
 56 |         start = time.time()
 57 | 
 58 |         summary_string = ''
 59 | 
 60 |         bar = Bar('Validation', fill='#', max=len(self.test_loader))
 61 | 
 62 |         if self.evaluation_accumulators is not None:
 63 |             for k,v in self.evaluation_accumulators.items():
 64 |                 self.evaluation_accumulators[k] = []
 65 | 
 66 |         J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()
 67 | 
 68 |         for i, target in enumerate(self.test_loader):
 69 | 
 70 |             # video = video.to(self.device)
 71 |             move_dict_to_device(target, self.device)
 72 | 
 73 |             # <=============
 74 |             with torch.no_grad():
 75 |                 inp = target['features']
 76 | 
 77 |                 preds = self.model(inp, J_regressor=J_regressor)
 78 | 
 79 |                 # convert to 14 keypoint format for evaluation
 80 |                 # if self.use_spin:
 81 |                 n_kp = preds[-1]['kp_3d'].shape[-2]
 82 |                 pred_j3d = preds[-1]['kp_3d'].view(-1, n_kp, 3).cpu().numpy()
 83 |                 target_j3d = target['kp_3d'].view(-1, n_kp, 3).cpu().numpy()
 84 |                 pred_verts = preds[-1]['verts'].view(-1, 6890, 3).cpu().numpy()
 85 |                 target_theta = target['theta'].view(-1, 85).cpu().numpy()
 86 | 
 87 | 
 88 |                 self.evaluation_accumulators['pred_verts'].append(pred_verts)
 89 |                 self.evaluation_accumulators['target_theta'].append(target_theta)
 90 | 
 91 |                 self.evaluation_accumulators['pred_j3d'].append(pred_j3d)
 92 |                 self.evaluation_accumulators['target_j3d'].append(target_j3d)
 93 |             # =============>
 94 | 
 95 |             batch_time = time.time() - start
 96 | 
 97 |             summary_string = f'({i + 1}/{len(self.test_loader)}) | batch: {batch_time * 10.0:.4}ms | ' \
 98 |                              f'Total: {bar.elapsed_td} | ETA: {bar.eta_td:}'
 99 | 
100 |             bar.suffix = summary_string
101 |             bar.next()
102 | 
103 |         bar.finish()
104 | 
105 |         logger.info(summary_string)
106 | 
107 |     def evaluate(self):
108 | 
109 |         for k, v in self.evaluation_accumulators.items():
110 |             self.evaluation_accumulators[k] = np.vstack(v)
111 | 
112 |         pred_j3ds = self.evaluation_accumulators['pred_j3d']
113 |         target_j3ds = self.evaluation_accumulators['target_j3d']
114 | 
115 |         pred_j3ds = torch.from_numpy(pred_j3ds).float()
116 |         target_j3ds = torch.from_numpy(target_j3ds).float()
117 | 
118 |         print(f'Evaluating on {pred_j3ds.shape[0]} number of poses...')
119 |         pred_pelvis = (pred_j3ds[:,[2],:] + pred_j3ds[:,[3],:]) / 2.0
120 |         target_pelvis = (target_j3ds[:,[2],:] + target_j3ds[:,[3],:]) / 2.0
121 | 
122 | 
123 |         pred_j3ds -= pred_pelvis
124 |         target_j3ds -= target_pelvis
125 | 
126 |         # Absolute error (MPJPE)
127 |         errors = torch.sqrt(((pred_j3ds - target_j3ds) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
128 |         S1_hat = batch_compute_similarity_transform_torch(pred_j3ds, target_j3ds)
129 |         errors_pa = torch.sqrt(((S1_hat - target_j3ds) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
130 |         pred_verts = self.evaluation_accumulators['pred_verts']
131 |         target_theta = self.evaluation_accumulators['target_theta']
132 | 
133 |         m2mm = 1000
134 | 
135 |         pve = np.mean(compute_error_verts(target_theta=target_theta, pred_verts=pred_verts)) * m2mm
136 |         accel = np.mean(compute_accel(pred_j3ds)) * m2mm
137 |         accel_err = np.mean(compute_error_accel(joints_pred=pred_j3ds, joints_gt=target_j3ds)) * m2mm
138 |         mpjpe = np.mean(errors) * m2mm
139 |         pa_mpjpe = np.mean(errors_pa) * m2mm
140 | 
141 |         eval_dict = {
142 |             'mpjpe': mpjpe,
143 |             'pa-mpjpe': pa_mpjpe,
144 |             'pve': pve,
145 |             'accel': accel,
146 |             'accel_err': accel_err
147 |         }
148 | 
149 |         log_str = ' '.join([f'{k.upper()}: {v:.4f},'for k,v in eval_dict.items()])
150 |         print(log_str)
151 | 
152 |     def run(self):
153 |         self.validate()
154 |         self.evaluate()


--------------------------------------------------------------------------------
/doc/demo.md:
--------------------------------------------------------------------------------
  1 | # Demo
  2 | 
  3 | ## Flags
  4 | 
  5 | - `--vid_file (str)`: Path to input video file or a YouTube link. If you provide a YouTube link it will be downloaded
  6 | to a temporary folder and then processed.
  7 | 
  8 | - `--output_folder (str)`: Path to folder to store the VIBE predictions and output renderings.
  9 | 
 10 | - `--tracking_method (str), default=bbox`: Defines the tracking method to compute bboxes and tracklets of people in the input video.
 11 |  Available options are `bbox` or `pose`. `bbox` tracking is available [here](https://github.com/mkocabas/multi-person-tracker) 
 12 |  as a standalone python package. For `pose` tracking, you need to install 
 13 |  [STAF](https://github.com/soulslicer/openpose/tree/staf), extension of OpenPose to 
 14 |  multi-person posetracking recently introduced in [1]().
 15 |  
 16 | - `--detector (str), default=yolo`: Defines the type of detector to be used by `bbox` tracking method if enabled. Available options are
 17 | `maskrcnn` and `yolo`. `maskrcnn` is more accurate but slower compared to `yolo`. Refer to [speed comparison](demo.md#runtime-performance) for further information.
 18 | 
 19 | - `--yolo_img_size (int), default=416`: Input image size of YOLO detector.
 20 | 
 21 | - `--tracker_batch_size (int), default=12`: Batch size of the bbox tracker. If you get memory error, you need to reduce it.  
 22 | 
 23 | - `--staf_dir (str)`: Path to folder where STAF pose tracker installed. This path should point to the main directory of staf.
 24 | 
 25 | - `--vibe_batch_size (int), default=450`: Batch size of VIBE model.
 26 | 
 27 | - `--display`: Enable this flag if you want to visualize the output of tracking and pose & shape estimation interactively.
 28 | 
 29 | - `--run_smplify`: Enable this flag if you want to refine the results of VIBE using Temporal SMPLify algorithm.
 30 | For this option, you have to set `--tracking_method` option to `pose`.
 31 | 
 32 | - `--no_render`: This flag disables the final rendering of VIBE results. Useful if you only want to get VIBE predictions.
 33 | 
 34 | - `--wireframe`: Enable this if you would like to render wireframe meshes in the final rendering. 
 35 | 
 36 | - `--sideview`: Render the output meshes from an alternate viewpoint. Default alternate viewpoint is -90 degrees in y axis.
 37 | Note that this option doubles the rendering time.
 38 | 
 39 | - `--save_obj`: Save output meshes as .obj files.
 40 | 
 41 | ## Examples
 42 | - Run VIBE on a video file using bbox tracker and visualize the results with wireframe meshes:
 43 | ```bash
 44 | python demo_video.py --vid_file sample_video.mp4 --output_folder output/ --tracking_method bbox --detector maskrcnn --display --wireframe
 45 | ```
 46 | 
 47 | - Run VIBE on a YouTube video using pose tracker and run Temporal SMPLify to further refine the predictions:
 48 | ```bash
 49 | python demo_video.py --vid_file sample_video.mp4 --output_folder output/ --tracking_method pose --display --run_smplify
 50 | ```
 51 | 
 52 | - Change the default batch sizes to avoid possible memory errors:
 53 | ```bash
 54 | python demo_video.py --vid_file sample_video.mp4 --output_folder output/ --tracker_batch_size 2 --vibe_batch_size 64
 55 | ```
 56 | 
 57 | ## Output Format
 58 | 
 59 | If demo finishes succesfully, it needs to create a file named `vibe_output.pkl` in the `--output_folder`.
 60 | We can inspect what this file contains by:
 61 | 
 62 | ```python
 63 | >>> import joblib # you may use native pickle here as well
 64 | 
 65 | >>> output = joblib.load('output/group_dance/vibe_output.pkl') 
 66 | 
 67 | >>> print(output.keys())  
 68 |                                                                                                                                                                                                                                                                                                                                                                                               
 69 | dict_keys([1, 2, 3, 4]) # these are the track ids for each subject appearing in the video
 70 | 
 71 | >>> for k,v in output[1].items(): print(k,v.shape) 
 72 | 
 73 | pred_cam (n_frames, 3)      # weak perspective camera parameters in cropped image space (s,tx,ty)
 74 | orig_cam (n_frames, 4)      # weak perspective camera parameters in original image space (sx,sy,tx,ty)
 75 | verts (n_frames, 6890, 3)   # SMPL mesh vertices
 76 | pose (n_frames, 72)         # SMPL pose parameters
 77 | betas (n_frames, 10)        # SMPL body shape parameters
 78 | joints3d (n_frames, 49, 3)  # SMPL 3D joints
 79 | joints2d (n_frames, 21, 3)  # 2D keypoint detections by STAF if pose tracking enabled otherwise None
 80 | bboxes (n_frames, 4)        # bbox detections (cx,cy,w,h)
 81 | frame_ids (n_frames,)       # frame ids in which subject with tracking id #1 appears
 82 | 
 83 | ```
 84 | You can find the names & order of 3d joints [here](https://github.com/mkocabas/VIBE/blob/master/lib/data_utils/kp_utils.py#L212) and 2D joints [here](https://github.com/mkocabas/VIBE/blob/master/lib/data_utils/kp_utils.py#L187).
 85 | 
 86 | ## Runtime Performance
 87 | Here is the breakdown of runtime speeds per step namely tracking and VIBE. This results are obtained by running VIBE
 88 | on a [video](https://www.youtube.com/watch?v=Opry3F6aB1I) containing 5 people.
 89 | 
 90 | ```bash
 91 | python demo.py --vid_file https://www.youtube.com/watch?v=Opry3F6aB1I --output_folder output/ --vibe_batch_size 32 --no_render
 92 | ```
 93 | 
 94 | | Tracker         |    GPU    | Tracking Time (ms/img) | Tracking FPS | VIBE Time (ms/image) | VIBE FPS | Total FPS |
 95 | |-----------------|:---------:|:----------------------:|:------------:|:--------------------:|:--------:|:---------:|
 96 | | STAF-pose       | RTX2080Ti |          23.2          |      43      |         16.1         |    61    |     21    |
 97 | | MaskRCNN-bbox   | RTX2080Ti |          68.0          |      15      |         16.1         |    61    |     11    |
 98 | | YOLOv3-416-bbox | RTX2080Ti |          12.7          |      79      |         16.1         |    61    |     29    |
 99 | | YOLOv3-608-bbox | RTX2080Ti |          22.2          |      45      |         16.1         |    61    |     23    |
100 | 
101 | **Note**: Above table does not include the time spent during rendering of the final output. 
102 | We use pyrender with GPU accelaration and it takes 2-3 FPS per image. Please let us know if you know any faster alternative.
103 | 
104 | ## References
105 | [1] Pose tracker is from [STAF implementation](https://github.com/soulslicer/openpose/tree/staf)
106 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | os.environ['PYOPENGL_PLATFORM'] = 'egl'
 19 | 
 20 | import torch
 21 | import pprint
 22 | import random
 23 | import numpy as np
 24 | import torch.backends.cudnn as cudnn
 25 | from torch.utils.tensorboard import SummaryWriter
 26 | 
 27 | from lib.core.loss import VIBELoss
 28 | from lib.core.trainer import Trainer
 29 | from lib.core.config import parse_args
 30 | from lib.utils.utils import prepare_output_dir
 31 | from lib.models import VIBE, MotionDiscriminator
 32 | from lib.dataset.loaders import get_data_loaders
 33 | from lib.utils.utils import create_logger, get_optimizer
 34 | 
 35 | 
 36 | def main(cfg):
 37 |     if cfg.SEED_VALUE >= 0:
 38 |         print(f'Seed value for the experiment {cfg.SEED_VALUE}')
 39 |         os.environ['PYTHONHASHSEED'] = str(cfg.SEED_VALUE)
 40 |         random.seed(cfg.SEED_VALUE)
 41 |         torch.manual_seed(cfg.SEED_VALUE)
 42 |         np.random.seed(cfg.SEED_VALUE)
 43 | 
 44 |     logger = create_logger(cfg.LOGDIR, phase='train')
 45 | 
 46 |     logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
 47 |     logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')
 48 | 
 49 |     logger.info(pprint.pformat(cfg))
 50 | 
 51 |     # cudnn related setting
 52 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
 53 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
 54 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 55 | 
 56 |     writer = SummaryWriter(log_dir=cfg.LOGDIR)
 57 |     writer.add_text('config', pprint.pformat(cfg), 0)
 58 | 
 59 |     # ========= Dataloaders ========= #
 60 |     data_loaders = get_data_loaders(cfg)
 61 | 
 62 |     # ========= Compile Loss ========= #
 63 |     loss = VIBELoss(
 64 |         e_loss_weight=cfg.LOSS.KP_2D_W,
 65 |         e_3d_loss_weight=cfg.LOSS.KP_3D_W,
 66 |         e_pose_loss_weight=cfg.LOSS.POSE_W,
 67 |         e_shape_loss_weight=cfg.LOSS.SHAPE_W,
 68 |         d_motion_loss_weight=cfg.LOSS.D_MOTION_LOSS_W,
 69 |     )
 70 | 
 71 |     # ========= Initialize networks, optimizers and lr_schedulers ========= #
 72 |     generator = VIBE(
 73 |         n_layers=cfg.MODEL.TGRU.NUM_LAYERS,
 74 |         batch_size=cfg.TRAIN.BATCH_SIZE,
 75 |         seqlen=cfg.DATASET.SEQLEN,
 76 |         hidden_size=cfg.MODEL.TGRU.HIDDEN_SIZE,
 77 |         pretrained=cfg.TRAIN.PRETRAINED_REGRESSOR,
 78 |         add_linear=cfg.MODEL.TGRU.ADD_LINEAR,
 79 |         bidirectional=cfg.MODEL.TGRU.BIDIRECTIONAL,
 80 |         use_residual=cfg.MODEL.TGRU.RESIDUAL,
 81 |     ).to(cfg.DEVICE)
 82 | 
 83 |     if cfg.TRAIN.PRETRAINED != '' and os.path.isfile(cfg.TRAIN.PRETRAINED):
 84 |         checkpoint = torch.load(cfg.TRAIN.PRETRAINED)
 85 |         best_performance = checkpoint['performance']
 86 |         generator.load_state_dict(checkpoint['gen_state_dict'])
 87 |         print(f'==> Loaded pretrained model from {cfg.TRAIN.PRETRAINED}...')
 88 |         print(f'Performance on 3DPW test set {best_performance}')
 89 |     else:
 90 |         print(f'{cfg.TRAIN.PRETRAINED} is not a pretrained model!!!!')
 91 | 
 92 |     gen_optimizer = get_optimizer(
 93 |         model=generator,
 94 |         optim_type=cfg.TRAIN.GEN_OPTIM,
 95 |         lr=cfg.TRAIN.GEN_LR,
 96 |         weight_decay=cfg.TRAIN.GEN_WD,
 97 |         momentum=cfg.TRAIN.GEN_MOMENTUM,
 98 |     )
 99 | 
100 |     motion_discriminator = MotionDiscriminator(
101 |         rnn_size=cfg.TRAIN.MOT_DISCR.HIDDEN_SIZE,
102 |         input_size=69,
103 |         num_layers=cfg.TRAIN.MOT_DISCR.NUM_LAYERS,
104 |         output_size=1,
105 |         feature_pool=cfg.TRAIN.MOT_DISCR.FEATURE_POOL,
106 |         attention_size=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.SIZE,
107 |         attention_layers=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.LAYERS,
108 |         attention_dropout=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.DROPOUT
109 |     ).to(cfg.DEVICE)
110 | 
111 |     dis_motion_optimizer = get_optimizer(
112 |         model=motion_discriminator,
113 |         optim_type=cfg.TRAIN.MOT_DISCR.OPTIM,
114 |         lr=cfg.TRAIN.MOT_DISCR.LR,
115 |         weight_decay=cfg.TRAIN.MOT_DISCR.WD,
116 |         momentum=cfg.TRAIN.MOT_DISCR.MOMENTUM
117 |     )
118 | 
119 |     motion_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
120 |         dis_motion_optimizer,
121 |         mode='min',
122 |         factor=0.1,
123 |         patience=cfg.TRAIN.LR_PATIENCE,
124 |         verbose=True,
125 |     )
126 | 
127 |     lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
128 |         gen_optimizer,
129 |         mode='min',
130 |         factor=0.1,
131 |         patience=cfg.TRAIN.LR_PATIENCE,
132 |         verbose=True,
133 |     )
134 | 
135 |     # ========= Start Training ========= #
136 |     Trainer(
137 |         data_loaders=data_loaders,
138 |         generator=generator,
139 |         motion_discriminator=motion_discriminator,
140 |         criterion=loss,
141 |         dis_motion_optimizer=dis_motion_optimizer,
142 |         dis_motion_update_steps=cfg.TRAIN.MOT_DISCR.UPDATE_STEPS,
143 |         gen_optimizer=gen_optimizer,
144 |         start_epoch=cfg.TRAIN.START_EPOCH,
145 |         end_epoch=cfg.TRAIN.END_EPOCH,
146 |         device=cfg.DEVICE,
147 |         writer=writer,
148 |         debug=cfg.DEBUG,
149 |         logdir=cfg.LOGDIR,
150 |         lr_scheduler=lr_scheduler,
151 |         motion_lr_scheduler=motion_lr_scheduler,
152 |         resume=cfg.TRAIN.RESUME,
153 |         num_iters_per_epoch=cfg.TRAIN.NUM_ITERS_PER_EPOCH,
154 |         debug_freq=cfg.DEBUG_FREQ,
155 |     ).fit()
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     cfg, cfg_file = parse_args()
160 |     cfg = prepare_output_dir(cfg, cfg_file)
161 | 
162 |     main(cfg)
163 | 


--------------------------------------------------------------------------------
/lib/models/vibe.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import torch
 19 | import os.path as osp
 20 | import torch.nn as nn
 21 | import torch.nn.functional as F
 22 | 
 23 | from lib.core.config import VIBE_DATA_DIR
 24 | from lib.models.spin import Regressor, hmr
 25 | 
 26 | 
 27 | class TemporalEncoder(nn.Module):
 28 |     def __init__(
 29 |             self,
 30 |             n_layers=1,
 31 |             hidden_size=2048,
 32 |             add_linear=False,
 33 |             bidirectional=False,
 34 |             use_residual=True
 35 |     ):
 36 |         super(TemporalEncoder, self).__init__()
 37 | 
 38 |         self.gru = nn.GRU(
 39 |             input_size=2048,
 40 |             hidden_size=hidden_size,
 41 |             bidirectional=bidirectional,
 42 |             num_layers=n_layers
 43 |         )
 44 | 
 45 |         self.linear = None
 46 |         if bidirectional:
 47 |             self.linear = nn.Linear(hidden_size*2, 2048)
 48 |         elif add_linear:
 49 |             self.linear = nn.Linear(hidden_size, 2048)
 50 |         self.use_residual = use_residual
 51 | 
 52 |     def forward(self, x):
 53 |         n,t,f = x.shape
 54 |         x = x.permute(1,0,2) # NTF -> TNF
 55 |         y, _ = self.gru(x)
 56 |         if self.linear:
 57 |             y = F.relu(y)
 58 |             y = self.linear(y.view(-1, y.size(-1)))
 59 |             y = y.view(t,n,f)
 60 |         if self.use_residual and y.shape[-1] == 2048:
 61 |             y = y + x
 62 |         y = y.permute(1,0,2) # TNF -> NTF
 63 |         return y
 64 | 
 65 | 
 66 | class VIBE(nn.Module):
 67 |     def __init__(
 68 |             self,
 69 |             seqlen,
 70 |             batch_size=64,
 71 |             n_layers=1,
 72 |             hidden_size=2048,
 73 |             add_linear=False,
 74 |             bidirectional=False,
 75 |             use_residual=True,
 76 |             pretrained=osp.join(VIBE_DATA_DIR, 'spin_model_checkpoint.pth.tar'),
 77 |     ):
 78 | 
 79 |         super(VIBE, self).__init__()
 80 | 
 81 |         self.seqlen = seqlen
 82 |         self.batch_size = batch_size
 83 | 
 84 |         self.encoder = TemporalEncoder(
 85 |             n_layers=n_layers,
 86 |             hidden_size=hidden_size,
 87 |             bidirectional=bidirectional,
 88 |             add_linear=add_linear,
 89 |             use_residual=use_residual,
 90 |         )
 91 | 
 92 |         # regressor can predict cam, pose and shape params in an iterative way
 93 |         self.regressor = Regressor()
 94 | 
 95 |         if pretrained and os.path.isfile(pretrained):
 96 |             pretrained_dict = torch.load(pretrained)['model']
 97 | 
 98 |             self.regressor.load_state_dict(pretrained_dict, strict=False)
 99 |             print(f'=> loaded pretrained model from \'{pretrained}\'')
100 | 
101 | 
102 |     def forward(self, input, J_regressor=None):
103 |         # input size NTF
104 |         batch_size, seqlen = input.shape[:2]
105 | 
106 |         feature = self.encoder(input)
107 |         feature = feature.reshape(-1, feature.size(-1))
108 | 
109 |         smpl_output = self.regressor(feature, J_regressor=J_regressor)
110 |         for s in smpl_output:
111 |             s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
112 |             s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
113 |             s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
114 |             s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
115 |             s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)
116 | 
117 |         return smpl_output
118 | 
119 | 
120 | class VIBE_Demo(nn.Module):
121 |     def __init__(
122 |             self,
123 |             seqlen,
124 |             batch_size=64,
125 |             n_layers=1,
126 |             hidden_size=2048,
127 |             add_linear=False,
128 |             bidirectional=False,
129 |             use_residual=True,
130 |             pretrained=osp.join(VIBE_DATA_DIR, 'spin_model_checkpoint.pth.tar'),
131 |     ):
132 | 
133 |         super(VIBE_Demo, self).__init__()
134 | 
135 |         self.seqlen = seqlen
136 |         self.batch_size = batch_size
137 | 
138 |         self.encoder = TemporalEncoder(
139 |             n_layers=n_layers,
140 |             hidden_size=hidden_size,
141 |             bidirectional=bidirectional,
142 |             add_linear=add_linear,
143 |             use_residual=use_residual,
144 |         )
145 | 
146 |         self.hmr = hmr()
147 |         checkpoint = torch.load(pretrained)
148 |         self.hmr.load_state_dict(checkpoint['model'], strict=False)
149 | 
150 |         # regressor can predict cam, pose and shape params in an iterative way
151 |         self.regressor = Regressor()
152 | 
153 |         if pretrained and os.path.isfile(pretrained):
154 |             pretrained_dict = torch.load(pretrained)['model']
155 | 
156 |             self.regressor.load_state_dict(pretrained_dict, strict=False)
157 |             print(f'=> loaded pretrained model from \'{pretrained}\'')
158 | 
159 | 
160 |     def forward(self, input, J_regressor=None):
161 |         # input size NTF
162 |         batch_size, seqlen, nc, h, w = input.shape
163 | 
164 |         feature = self.hmr.feature_extractor(input.reshape(-1, nc, h, w))
165 | 
166 |         feature = feature.reshape(batch_size, seqlen, -1)
167 |         feature = self.encoder(feature)
168 |         feature = feature.reshape(-1, feature.size(-1))
169 | 
170 |         smpl_output = self.regressor(feature, J_regressor=J_regressor)
171 | 
172 |         for s in smpl_output:
173 |             s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
174 |             s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
175 |             s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
176 |             s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
177 |             s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)
178 | 
179 |         return smpl_output
180 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | License
  2 | 
  3 | Software Copyright License for non-commercial scientific research purposes
  4 | Please read carefully the following terms and conditions and any accompanying documentation before you download
  5 | and/or use the VIBE model, data and software, (the "Model & Software"), including 3D meshes, software, and scripts.
  6 | By downloading and/or using the Model & Software (including downloading, cloning, installing, and any other use
  7 | of this github repository), you acknowledge that you have read these terms and conditions, understand them, and
  8 | agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use
  9 | the Model & Software. Any infringement of the terms of this agreement will automatically terminate your rights
 10 | under this License
 11 | 
 12 | Ownership / Licensees
 13 | The Software and the associated materials has been developed at the
 14 | 
 15 | Max Planck Institute for Intelligent Systems (hereinafter "MPI").
 16 | 
 17 | Any copyright or patent right is owned by and proprietary material of the
 18 | 
 19 | Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter
 20 | collectively “Max-Planck”)
 21 | 
 22 | hereinafter the “Licensor”.
 23 | 
 24 | This software includes the SMPL Body Model. By downloading this software, you are agreeing to be bound by the terms of the SMPL Model License
 25 | 
 26 |         https://smpl.is.tue.mpg.de/modellicense
 27 | 
 28 | which is necessary to create SMPL body models. 
 29 | 
 30 | SMPL bodies that are generated with VIBE can be distributed freely under the SMPL Body License
 31 | 
 32 |         https://smpl.is.tue.mpg.de/bodylicense
 33 | 
 34 | License Grant
 35 | Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right:
 36 | 
 37 | To install the Model & Software on computers owned, leased or otherwise controlled by you and/or your organization;
 38 | To use the Model & Software for the sole purpose of performing non-commercial scientific research, non-commercial
 39 | education, or non-commercial artistic projects;
 40 | Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation,
 41 | incorporation in a commercial product, use in a commercial service, or production of other artifacts for
 42 | commercial purposes. The Model & Software may not be reproduced, modified and/or made available in any form to
 43 | any third party without Max-Planck’s prior written permission.
 44 | 
 45 | The Model & Software may not be used for pornographic purposes or to generate pornographic material whether
 46 | commercial or not. This license also prohibits the use of the Model & Software to train methods/algorithms/neural
 47 | networks/etc. for commercial use of any kind. By downloading the Model & Software,
 48 | you agree not to reverse engineer it.
 49 | 
 50 | No Distribution
 51 | The Model & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered
 52 | for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive
 53 | purposes only.
 54 | 
 55 | Disclaimer of Representations and Warranties
 56 | You expressly acknowledge and agree that the Model & Software results from basic research, is provided “AS IS”,
 57 | may contain errors, and that any use of the Model & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS
 58 | OR WARRANTIES OF ANY KIND CONCERNING THE MODEL & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY
 59 | LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor
 60 | makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of
 61 | the Model & Software, (ii) that the use of the Model & Software will not infringe any patents, copyrights or other
 62 | intellectual property rights of a third party, and (iii) that the use of the Model & Software will not cause any
 63 | damage of any kind to you or a third party.
 64 | 
 65 | Limitation of Liability
 66 | Because this Model & Software License Agreement qualifies as a donation, according to Section 521 of the German
 67 | Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only.
 68 | If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee
 69 | for the resulting damage.
 70 | 
 71 | Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have
 72 | arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be
 73 | liable in accordance with the German Product Liability Act in the event of product liability. The foregoing
 74 | applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded.
 75 | Patent claims generated through the usage of the Model & Software cannot be directed towards the copyright holders.
 76 | The Model & Software is provided in the state of development the licensor defines. If modified or extended by
 77 | Licensee, the Licensor makes no claims about the fitness of the Model & Software and is not responsible
 78 | for any problems such modifications cause.
 79 | 
 80 | No Maintenance Services
 81 | You understand and agree that Licensor is under no obligation to provide either maintenance services,
 82 | update services, notices of latent defects, or corrections of defects with regard to the Model & Software.
 83 | Licensor nevertheless reserves the right to update, modify, or discontinue the Model & Software at any time.
 84 | 
 85 | Defects of the Model & Software must be notified in writing to the Licensor with a comprehensible description
 86 | of the error symptoms. The notification of the defect should enable the reproduction of the error.
 87 | The Licensee is encouraged to communicate any use, results, modification or publication.
 88 | 
 89 | Publications using the Model & Software
 90 | You acknowledge that the Model & Software is a valuable scientific resource and agree to appropriately reference
 91 | the following paper in any publication making use of the Model & Software.
 92 | 
 93 | Citation:
 94 | 
 95 | @inproceedings{VIBE:CVPR:2020,
 96 |   title = {{VIBE}: Video Inference for Human Body Pose and Shape Estimation},
 97 |   author = {Kocabas, Muhammed and Athanasiou, Nikos and Black, Michael J.},
 98 |   booktitle = {Computer Vision and Pattern Recognition (CVPR)},
 99 |   month = jun,
100 |   year = {2020},
101 |   month_numeric = {6}
102 | }
103 | 
104 | Commercial licensing opportunities
105 | For commercial uses of the Software, please send email to ps-license@tue.mpg.de
106 | 
107 | This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention.
108 | 


--------------------------------------------------------------------------------
/lib/data_utils/posetrack_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import sys
 18 | sys.path.append('.')
 19 | 
 20 | import glob
 21 | import joblib
 22 | import argparse
 23 | import numpy as np
 24 | import json
 25 | import os.path as osp
 26 | 
 27 | from lib.models import spin
 28 | from lib.core.config import VIBE_DB_DIR
 29 | from lib.utils.utils import tqdm_enumerate
 30 | from lib.data_utils.feature_extractor import extract_features
 31 | from lib.data_utils.kp_utils import get_posetrack_original_kp_names, convert_kps
 32 | 
 33 | def read_data(folder, set):
 34 |     dataset = {
 35 |         'img_name' : [] ,
 36 |         'joints2D': [],
 37 |         'bbox': [],
 38 |         'vid_name': [],
 39 |         'features': [],
 40 |     }
 41 | 
 42 |     model = spin.get_pretrained_hmr()
 43 | 
 44 |     file_names = glob.glob(osp.join(folder, 'posetrack_data/annotations/', f'{set}/*.json'))
 45 |     file_names = sorted(file_names)
 46 |     nn_corrupted = 0
 47 |     tot_frames = 0
 48 |     min_frame_number = 8
 49 | 
 50 |     for fid,fname in tqdm_enumerate(file_names):
 51 |         if fname == osp.join(folder, 'annotations/train/021133_mpii_train.json'):
 52 |             continue
 53 | 
 54 |         with open(fname, 'r') as entry:
 55 |             anns = json.load(entry)
 56 |         # num_frames = anns['images'][0]['nframes']
 57 |         anns['images'] = [item for item in anns['images'] if item['is_labeled'] ]
 58 |         num_frames = len(anns['images'])
 59 |         frame2imgname = dict()
 60 |         for el in anns['images']:
 61 |             frame2imgname[el['frame_id']] = el['file_name']
 62 | 
 63 |         num_people = -1
 64 |         for x in anns['annotations']:
 65 |             if num_people < x['track_id']:
 66 |                 num_people = x['track_id']
 67 |         num_people += 1
 68 |         posetrack_joints = get_posetrack_original_kp_names()
 69 |         idxs = [anns['categories'][0]['keypoints'].index(h) for h in posetrack_joints if h in anns['categories'][0]['keypoints']]
 70 |         for x in anns['annotations']:
 71 |             kps = np.array(x['keypoints']).reshape((17,3))
 72 |             kps = kps[idxs,:]
 73 |             x['keypoints'] = list(kps.flatten())
 74 | 
 75 |         tot_frames += num_people * num_frames
 76 |         for p_id in range(num_people):
 77 | 
 78 |             annot_pid = [(item['keypoints'], item['bbox'], item['image_id'])
 79 |                          for item in anns['annotations']
 80 |                          if item['track_id'] == p_id and not(np.count_nonzero(item['keypoints']) == 0)  ]
 81 | 
 82 |             if len(annot_pid) < min_frame_number:
 83 |                 nn_corrupted += len(annot_pid)
 84 |                 continue
 85 | 
 86 |             bbox = np.zeros((len(annot_pid),4))
 87 |             # perm_idxs = get_perm_idxs('posetrack', 'common')
 88 |             kp_2d = np.zeros((len(annot_pid), len(annot_pid[0][0])//3 ,3))
 89 |             img_paths = np.zeros((len(annot_pid)))
 90 | 
 91 |             for i, (key2djnts, bbox_p, image_id) in enumerate(annot_pid):
 92 | 
 93 |                 if (bbox_p[2]==0 or bbox_p[3]==0) :
 94 |                     nn_corrupted +=1
 95 |                     continue
 96 | 
 97 |                 img_paths[i] = image_id
 98 |                 key2djnts[2::3] = len(key2djnts[2::3])*[1]
 99 | 
100 |                 kp_2d[i,:] = np.array(key2djnts).reshape(int(len(key2djnts)/3),3) # [perm_idxs, :]
101 |                 for kp_loc in kp_2d[i,:]:
102 |                     if kp_loc[0] == 0 and kp_loc[1] == 0:
103 |                         kp_loc[2] = 0
104 | 
105 | 
106 |                 x_tl = bbox_p[0]
107 |                 y_tl = bbox_p[1]
108 |                 w = bbox_p[2]
109 |                 h = bbox_p[3]
110 |                 bbox_p[0] = x_tl + w / 2
111 |                 bbox_p[1] = y_tl + h / 2
112 |                 #
113 | 
114 |                 w = h = np.where(w / h > 1, w, h)
115 |                 w = h = h * 0.8
116 |                 bbox_p[2] = w
117 |                 bbox_p[3] = h
118 |                 bbox[i, :] = bbox_p
119 | 
120 |             img_paths = list(img_paths)
121 |             img_paths = [osp.join(folder, frame2imgname[item]) if item != 0 else 0 for item in img_paths ]
122 | 
123 |             bbx_idxs = []
124 |             for bbx_id, bbx in enumerate(bbox):
125 |                 if np.count_nonzero(bbx) == 0:
126 |                     bbx_idxs += [bbx_id]
127 | 
128 |             kp_2d = np.delete(kp_2d, bbx_idxs, 0)
129 |             img_paths = np.delete(np.array(img_paths), bbx_idxs, 0)
130 |             bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0)
131 | 
132 |             # Convert to common 2d keypoint format
133 |             if bbox.size == 0 or bbox.shape[0] < min_frame_number:
134 |                 nn_corrupted += 1
135 |                 continue
136 | 
137 |             kp_2d = convert_kps(kp_2d, src='posetrack', dst='spin')
138 | 
139 |             dataset['vid_name'].append(np.array([f'{fname}_{p_id}']*img_paths.shape[0]))
140 |             dataset['img_name'].append(np.array(img_paths))
141 |             dataset['joints2D'].append(kp_2d)
142 |             dataset['bbox'].append(np.array(bbox))
143 | 
144 |             # compute_features
145 |             features = extract_features(
146 |                 model,
147 |                 np.array(img_paths),
148 |                 bbox,
149 |                 kp_2d=kp_2d,
150 |                 dataset='spin',
151 |                 debug=False,
152 |             )
153 | 
154 |             assert kp_2d.shape[0] == img_paths.shape[0] == bbox.shape[0]
155 | 
156 |             dataset['features'].append(features)
157 | 
158 | 
159 |     print(nn_corrupted, tot_frames)
160 |     for k in dataset.keys():
161 |         dataset[k] = np.array(dataset[k])
162 | 
163 |     for k in dataset.keys():
164 |         dataset[k] = np.concatenate(dataset[k])
165 | 
166 |     for k,v in dataset.items():
167 |         print(k, v.shape)
168 | 
169 |     return dataset
170 | 
171 | 
172 | if __name__ == '__main__':
173 |     parser = argparse.ArgumentParser()
174 |     parser.add_argument('--dir', type=str, help='dataset directory', default='data/posetrack')
175 |     args = parser.parse_args()
176 | 
177 |     dataset_train = read_data(args.dir, 'train')
178 |     joblib.dump(dataset_train, osp.join(VIBE_DB_DIR, 'posetrack_train_db.pt'))
179 | 


--------------------------------------------------------------------------------
/lib/data_utils/threedpw_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import sys
 18 | sys.path.append('.')
 19 | 
 20 | import os
 21 | import cv2
 22 | import torch
 23 | import joblib
 24 | import argparse
 25 | import numpy as np
 26 | import pickle as pkl
 27 | import os.path as osp
 28 | from tqdm import tqdm
 29 | 
 30 | from lib.models import spin
 31 | from lib.data_utils.kp_utils import *
 32 | from lib.core.config import VIBE_DB_DIR, VIBE_DATA_DIR
 33 | from lib.utils.smooth_bbox import get_smooth_bbox_params
 34 | from lib.models.smpl import SMPL, SMPL_MODEL_DIR, H36M_TO_J14
 35 | from lib.data_utils.feature_extractor import extract_features
 36 | from lib.utils.geometry import batch_rodrigues, rotation_matrix_to_angle_axis
 37 | 
 38 | NUM_JOINTS = 24
 39 | VIS_THRESH = 0.3
 40 | MIN_KP = 6
 41 | 
 42 | def read_data(folder, set, debug=False):
 43 | 
 44 |     dataset = {
 45 |         'vid_name': [],
 46 |         'frame_id': [],
 47 |         'joints3D': [],
 48 |         'joints2D': [],
 49 |         'shape': [],
 50 |         'pose': [],
 51 |         'bbox': [],
 52 |         'img_name': [],
 53 |         'features': [],
 54 |         'valid': [],
 55 |     }
 56 | 
 57 |     model = spin.get_pretrained_hmr()
 58 | 
 59 |     sequences = [x.split('.')[0] for x in os.listdir(osp.join(folder, 'sequenceFiles', set))]
 60 | 
 61 |     J_regressor = None
 62 | 
 63 |     smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
 64 |     if set == 'test' or set == 'validation':
 65 |         J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()
 66 | 
 67 |     for i, seq in tqdm(enumerate(sequences)):
 68 | 
 69 |         data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl')
 70 | 
 71 |         data = pkl.load(open(data_file, 'rb'), encoding='latin1')
 72 | 
 73 |         img_dir = osp.join(folder, 'imageFiles', seq)
 74 | 
 75 |         num_people = len(data['poses'])
 76 |         num_frames = len(data['img_frame_ids'])
 77 |         assert (data['poses2d'][0].shape[0] == num_frames)
 78 | 
 79 |         for p_id in range(num_people):
 80 |             pose = torch.from_numpy(data['poses'][p_id]).float()
 81 |             shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat(pose.size(0), 1)
 82 |             trans = torch.from_numpy(data['trans'][p_id]).float()
 83 |             j2d = data['poses2d'][p_id].transpose(0,2,1)
 84 |             cam_pose = data['cam_poses']
 85 |             campose_valid = data['campose_valid'][p_id]
 86 | 
 87 |             # ======== Align the mesh params ======== #
 88 |             rot = pose[:, :3]
 89 |             rot_mat = batch_rodrigues(rot)
 90 | 
 91 |             Rc = torch.from_numpy(cam_pose[:, :3, :3]).float()
 92 |             Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3))
 93 |             rot = rotation_matrix_to_angle_axis(Rs)
 94 |             pose[:, :3] = rot
 95 |             # ======== Align the mesh params ======== #
 96 | 
 97 |             output = smpl(betas=shape, body_pose=pose[:,3:], global_orient=pose[:,:3], transl=trans)
 98 |             # verts = output.vertices
 99 |             j3d = output.joints
100 | 
101 |             if J_regressor is not None:
102 |                 vertices = output.vertices
103 |                 J_regressor_batch = J_regressor[None, :].expand(vertices.shape[0], -1, -1).to(vertices.device)
104 |                 j3d = torch.matmul(J_regressor_batch, vertices)
105 |                 j3d = j3d[:, H36M_TO_J14, :]
106 | 
107 |             img_paths = []
108 |             for i_frame in range(num_frames):
109 |                 img_path = os.path.join(img_dir + '/image_{:05d}.jpg'.format(i_frame))
110 |                 img_paths.append(img_path)
111 | 
112 |             bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(j2d, vis_thresh=VIS_THRESH, sigma=8)
113 | 
114 |             # process bbox_params
115 |             c_x = bbox_params[:,0]
116 |             c_y = bbox_params[:,1]
117 |             scale = bbox_params[:,2]
118 |             w = h = 150. / scale
119 |             w = h = h * 1.1
120 |             bbox = np.vstack([c_x,c_y,w,h]).T
121 | 
122 |             # process keypoints
123 |             j2d[:, :, 2] = j2d[:, :, 2] > 0.3  # set the visibility flags
124 |             # Convert to common 2d keypoint format
125 |             perm_idxs = get_perm_idxs('3dpw', 'common')
126 |             perm_idxs += [0, 0]  # no neck, top head
127 |             j2d = j2d[:, perm_idxs]
128 |             j2d[:, 12:, 2] = 0.0
129 | 
130 |             # print('j2d', j2d[time_pt1:time_pt2].shape)
131 |             # print('campose', campose_valid[time_pt1:time_pt2].shape)
132 | 
133 |             img_paths_array = np.array(img_paths)[time_pt1:time_pt2]
134 |             dataset['vid_name'].append(np.array([f'{seq}_{p_id}']*num_frames)[time_pt1:time_pt2])
135 |             dataset['frame_id'].append(np.arange(0, num_frames)[time_pt1:time_pt2])
136 |             dataset['img_name'].append(img_paths_array)
137 |             dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2])
138 |             dataset['joints2D'].append(j2d[time_pt1:time_pt2])
139 |             dataset['shape'].append(shape.numpy()[time_pt1:time_pt2])
140 |             dataset['pose'].append(pose.numpy()[time_pt1:time_pt2])
141 |             dataset['bbox'].append(bbox)
142 |             dataset['valid'].append(campose_valid[time_pt1:time_pt2])
143 | 
144 |             features = extract_features(model, img_paths_array, bbox,
145 |                                         kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2)
146 |             dataset['features'].append(features)
147 | 
148 |     for k in dataset.keys():
149 |         dataset[k] = np.concatenate(dataset[k])
150 |         print(k, dataset[k].shape)
151 | 
152 |     # Filter out keypoints
153 |     indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0]
154 |     for k in dataset.keys():
155 |         dataset[k] = dataset[k][indices_to_use]
156 | 
157 |     return dataset
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     parser = argparse.ArgumentParser()
162 |     parser.add_argument('--dir', type=str, help='dataset directory', default='data/3dpw')
163 |     args = parser.parse_args()
164 | 
165 |     debug = False
166 | 
167 |     dataset = read_data(args.dir, 'validation', debug=debug)
168 |     joblib.dump(dataset, osp.join(VIBE_DB_DIR, '3dpw_val_db.pt'))
169 | 
170 |     dataset = read_data(args.dir, 'test', debug=debug)
171 |     joblib.dump(dataset, osp.join(VIBE_DB_DIR, '3dpw_test_db.pt'))
172 | 


--------------------------------------------------------------------------------
/lib/dataset/dataset_3d.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import torch
 19 | import random
 20 | import logging
 21 | import numpy as np
 22 | import os.path as osp
 23 | import joblib
 24 | 
 25 | from torch.utils.data import Dataset
 26 | from lib.core.config import VIBE_DB_DIR
 27 | from lib.data_utils.kp_utils import convert_kps
 28 | from lib.data_utils.img_utils import normalize_2d_kp, transfrom_keypoints, split_into_chunks
 29 | 
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | class Dataset3D(Dataset):
 33 |     def __init__(self, set, seqlen, overlap=0., folder=None, dataset_name=None, debug=False):
 34 | 
 35 |         self.folder = folder
 36 |         self.set = set
 37 |         self.dataset_name = dataset_name
 38 |         self.seqlen = seqlen
 39 |         self.stride = int(seqlen * (1-overlap))
 40 |         self.debug = debug
 41 |         self.db = self.load_db()
 42 |         self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
 43 | 
 44 |     def __len__(self):
 45 |         return len(self.vid_indices)
 46 | 
 47 |     def __getitem__(self, index):
 48 |         return self.get_single_item(index)
 49 | 
 50 |     def load_db(self):
 51 |         db_file = osp.join(VIBE_DB_DIR, f'{self.dataset_name}_{self.set}_db.pt')
 52 | 
 53 |         if osp.isfile(db_file):
 54 |             db = joblib.load(db_file)
 55 |         else:
 56 |             raise ValueError(f'{db_file} do not exists')
 57 | 
 58 |         print(f'Loaded {self.dataset_name} dataset from {db_file}')
 59 |         return db
 60 | 
 61 |     def get_single_item(self, index):
 62 |         start_index, end_index = self.vid_indices[index]
 63 | 
 64 |         is_train = self.set == 'train'
 65 | 
 66 |         if self.dataset_name == '3dpw':
 67 |             kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1], src='common', dst='spin')
 68 |             kp_3d = self.db['joints3D'][start_index:end_index + 1]
 69 |         elif self.dataset_name == 'mpii3d':
 70 |             kp_2d = self.db['joints2D'][start_index:end_index + 1]
 71 |             if is_train:
 72 |                 kp_3d = self.db['joints3D'][start_index:end_index + 1]
 73 |             else:
 74 |                 kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common')
 75 |         elif self.dataset_name == 'h36m':
 76 |             kp_2d = self.db['joints2D'][start_index:end_index + 1]
 77 |             if is_train:
 78 |                 kp_3d = self.db['joints3D'][start_index:end_index + 1]
 79 |             else:
 80 |                 kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common')
 81 | 
 82 |         kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16)
 83 |         nj = 14 if not is_train else 49
 84 |         kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16)
 85 | 
 86 | 
 87 |         if self.dataset_name == '3dpw':
 88 |             pose  = self.db['pose'][start_index:end_index+1]
 89 |             shape = self.db['shape'][start_index:end_index+1]
 90 |             w_smpl = torch.ones(self.seqlen).float()
 91 |             w_3d = torch.ones(self.seqlen).float()
 92 |         elif self.dataset_name == 'h36m':
 93 |             if not is_train:
 94 |                 pose = np.zeros((kp_2d.shape[0], 72))
 95 |                 shape = np.zeros((kp_2d.shape[0], 10))
 96 |                 w_smpl = torch.zeros(self.seqlen).float()
 97 |                 w_3d = torch.ones(self.seqlen).float()
 98 |             else:
 99 |                 pose = self.db['pose'][start_index:end_index + 1]
100 |                 shape = self.db['shape'][start_index:end_index + 1]
101 |                 w_smpl = torch.ones(self.seqlen).float()
102 |                 w_3d = torch.ones(self.seqlen).float()
103 |         elif self.dataset_name == 'mpii3d':
104 |             pose = np.zeros((kp_2d.shape[0], 72))
105 |             shape = np.zeros((kp_2d.shape[0], 10))
106 |             w_smpl = torch.zeros(self.seqlen).float()
107 |             w_3d = torch.ones(self.seqlen).float()
108 | 
109 |         bbox = self.db['bbox'][start_index:end_index + 1]
110 |         input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float()
111 | 
112 |         theta_tensor = np.zeros((self.seqlen, 85), dtype=np.float16)
113 | 
114 |         for idx in range(self.seqlen):
115 |             # crop image and transform 2d keypoints
116 |             kp_2d[idx,:,:2], trans = transfrom_keypoints(
117 |                 kp_2d=kp_2d[idx,:,:2],
118 |                 center_x=bbox[idx,0],
119 |                 center_y=bbox[idx,1],
120 |                 width=bbox[idx,2],
121 |                 height=bbox[idx,3],
122 |                 patch_width=224,
123 |                 patch_height=224,
124 |                 do_augment=False,
125 |             )
126 | 
127 |             kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224)
128 | 
129 |             # theta shape (85,)
130 |             theta = np.concatenate((np.array([1., 0., 0.]), pose[idx], shape[idx]), axis=0)
131 | 
132 |             kp_2d_tensor[idx] = kp_2d[idx]
133 |             theta_tensor[idx] = theta
134 |             kp_3d_tensor[idx] = kp_3d[idx]
135 | 
136 |         target = {
137 |             'features': input,
138 |             'theta': torch.from_numpy(theta_tensor).float(), # camera, pose and shape
139 |             'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping
140 |             'kp_3d': torch.from_numpy(kp_3d_tensor).float(), # 3D keypoints
141 |             'w_smpl': w_smpl,
142 |             'w_3d': w_3d,
143 |         }
144 | 
145 |         if self.dataset_name == 'mpii3d' and not is_train:
146 |             target['valid'] = self.db['valid_i'][start_index:end_index+1]
147 | 
148 |         if self.dataset_name == '3dpw' and not is_train:
149 |             vn = self.db['vid_name'][start_index:end_index + 1]
150 |             fi = self.db['frame_id'][start_index:end_index + 1]
151 |             target['instance_id'] = [f'{v}/{f}'for v,f in zip(vn,fi)]
152 | 
153 | 
154 | 
155 |         # if self.dataset_name == '3dpw' and not self.is_train:
156 |             # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist()
157 |             # target['imgname'] = np.array(target['imgname'])
158 |             # print(target['imgname'].dtype)
159 |             # target['center'] = self.db['bbox'][start_index:end_index+1, :2]
160 |             # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1])
161 | 
162 |         if self.debug:
163 |             from lib.data_utils.img_utils import get_single_image_crop
164 | 
165 |             if self.dataset_name == 'mpii3d':
166 |                 video = self.db['img_name'][start_index:end_index+1]
167 |                 # print(video)
168 |             elif self.dataset_name == 'h36m':
169 |                 video = self.db['img_name'][start_index:end_index + 1]
170 |             else:
171 |                 vid_name = self.db['vid_name'][start_index]
172 |                 vid_name = '_'.join(vid_name.split('_')[:-1])
173 |                 f = osp.join(self.folder, 'imageFiles', vid_name)
174 |                 video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')]
175 |                 frame_idxs = self.db['frame_id'][start_index:end_index + 1]
176 |                 # print(f, frame_idxs)
177 |                 video = [video_file_list[i] for i in frame_idxs]
178 | 
179 |             video = torch.cat(
180 |                 [get_single_image_crop(image, bbox).unsqueeze(0) for image, bbox in zip(video, bbox)], dim=0
181 |             )
182 | 
183 |             target['video'] = video
184 | 
185 |         return target
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VIBE: Video Inference for Human Body Pose and Shape Estimation [CVPR-2020]
  2 | [![report](https://img.shields.io/badge/arxiv-report-red)](https://arxiv.org/abs/1912.05656) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1dFfwxZ52MN86FA6uFNypMEdFShd2euQA) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/vibe-video-inference-for-human-body-pose-and/3d-human-pose-estimation-on-3dpw)](https://paperswithcode.com/sota/3d-human-pose-estimation-on-3dpw?p=vibe-video-inference-for-human-body-pose-and)
  3 | 
  4 | <p float="center">
  5 |   <img src="https://s5.gifyu.com/images/ezgif-6-fb456de304c9.gif" width="49%" />
  6 |   <img src="https://s5.gifyu.com/images/ezgif.com-optimize6d7c4d9d7251b20a.gif" width="49%" />
  7 | </p>
  8 | 
  9 | Check our YouTube videos below for more details.
 10 | 
 11 | | Paper Video                                                                                                | Qualitative Results                                                                                                |
 12 | |------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
 13 | | [![PaperVideo](https://img.youtube.com/vi/rIr-nX63dUA/0.jpg)](https://www.youtube.com/watch?v=rIr-nX63dUA) | [![QualitativeResults](https://img.youtube.com/vi/fW0sIZfQcIs/0.jpg)](https://www.youtube.com/watch?v=fW0sIZfQcIs) |
 14 | 
 15 | <!-- <sub>Sources: left video - [https://www.youtube.com/watch?v=qlPRDVqYO74](https://www.youtube.com/watch?v=qlPRDVqYO74), right video - [https://www.youtube.com/watch?v=Opry3F6aB1I](https://www.youtube.com/watch?v=Opry3F6aB1I)
 16 | </sub> -->
 17 | 
 18 | > [**VIBE: Video Inference for Human Body Pose and Shape Estimation**](https://arxiv.org/abs/1912.05656),            
 19 | > [Muhammed Kocabas](https://ps.is.tuebingen.mpg.de/person/mkocabas), [Nikos Athanasiou](https://ps.is.tuebingen.mpg.de/person/nathanasiou), 
 20 | [Michael J. Black](https://ps.is.tuebingen.mpg.de/person/black),        
 21 | > *IEEE Computer Vision and Pattern Recognition, 2020* 
 22 | 
 23 | ## Features
 24 | 
 25 | _**V**ideo **I**nference for **B**ody Pose and Shape **E**stimation_ (VIBE) is a video pose and shape estimation method.
 26 | It predicts the parameters of SMPL body model for each frame of an input video. Pleaser refer to our [arXiv report](https://arxiv.org/abs/1912.05656) for further details.
 27 | 
 28 | This implementation:
 29 | 
 30 | - has the demo and training code for VIBE implemented purely in PyTorch,
 31 | - can work on arbitrary videos with multiple people,
 32 | - supports both CPU and GPU inference (though GPU is way faster),
 33 | - is fast, up-to 30 FPS on a RTX2080Ti (see [this table](doc/demo.md#runtime-performance)),
 34 | - achieves SOTA results on 3DPW and MPI-INF-3DHP datasets,
 35 | - includes Temporal SMPLify implementation.
 36 | - includes the training code and detailed instruction on how to train it from scratch.
 37 | - can create an FBX/glTF output to be used with major graphics softwares.
 38 | 
 39 | <p float="center">
 40 |   <img src="https://s5.gifyu.com/images/method_v2.gif" width="49%" />
 41 |   <img src="https://s5.gifyu.com/images/parkour.gif" width="49%" />
 42 | </p>
 43 | 
 44 | ## Updates
 45 | 
 46 | - 06/10/2020: Support OneEuroFilter smoothing.
 47 | - 14/09/2020: FBX/glTF conversion script is released.
 48 | 
 49 | ## Getting Started
 50 | VIBE has been implemented and tested on Ubuntu 18.04 with python >= 3.7. It supports both GPU and CPU inference.
 51 | If you don't have a suitable device, try running our Colab demo. 
 52 | 
 53 | Clone the repo:
 54 | ```bash
 55 | git clone https://github.com/mkocabas/VIBE.git
 56 | ```
 57 | 
 58 | Install the requirements using `virtualenv` or `conda`:
 59 | ```bash
 60 | # pip
 61 | source scripts/install_pip.sh
 62 | 
 63 | # conda
 64 | source scripts/install_conda.sh
 65 | ```
 66 | 
 67 | ## Running the Demo
 68 | 
 69 | We have prepared a nice demo code to run VIBE on arbitrary videos. 
 70 | First, you need download the required data(i.e our trained model and SMPL model parameters). To do this you can just run:
 71 | 
 72 | ```bash
 73 | source scripts/prepare_data.sh
 74 | ```
 75 | 
 76 | Then, running the demo is as simple as:
 77 | 
 78 | ```bash
 79 | # Run on a local video
 80 | python demo.py --vid_file sample_video.mp4 --output_folder output/ --display
 81 | 
 82 | # Run on a YouTube video
 83 | python demo.py --vid_file https://www.youtube.com/watch?v=wPZP8Bwxplo --output_folder output/ --display
 84 | ```
 85 | 
 86 | Refer to [`doc/demo.md`](doc/demo.md) for more details about the demo code.
 87 | 
 88 | Sample demo output with the `--sideview` flag:
 89 | 
 90 | <p float="left">
 91 |   <img src="https://s5.gifyu.com/images/sample_video.gif" width="30%" />
 92 | </p>
 93 | 
 94 | ### FBX and glTF output (New Feature!)
 95 | We provide a script to convert VIBE output to standalone FBX/glTF files to be used in 3D graphics tools like
 96 | Blender, Unity etc. You need to follow steps below to be able to run the conversion script.
 97 | 
 98 | - You need to download FBX files for SMPL body model
 99 |     - Go to [SMPL website](https://smpl.is.tue.mpg.de/) and create an account.
100 |     - Download the Unity-compatible FBX file through the [link](https://psfiles.is.tuebingen.mpg.de/downloads/smpl/SMPL_unity_v-1-0-0-zip)
101 |     - Unzip the contents and locate them `data/SMPL_unity_v.1.0.0`.
102 | - Install Blender python API
103 |     - Note that we tested our script with Blender v2.8.0 and v2.8.3.
104 | - Run the command below to convert VIBE output to FBX:
105 | ```
106 | python lib/utils/fbx_output.py \
107 |     --input output/sample_video/vibe_output.pkl \
108 |     --output output/sample_video/fbx_output.fbx \ # specify the file extension as *.glb for glTF
109 |     --fps_source 30 \
110 |     --fps_target 30 \
111 |     --gender <male or female> \
112 |     --person_id <tracklet id from VIBE output>
113 | 
114 | ``` 
115 | 
116 | ## Google Colab
117 | If you do not have a suitable environment to run this project then you could give Google Colab a try. 
118 | It allows you to run the project in the cloud, free of charge. You may try our Colab demo using the notebook we have prepared: 
119 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1dFfwxZ52MN86FA6uFNypMEdFShd2euQA)
120 | 
121 | 
122 | ## Training
123 | Run the commands below to start training:
124 | 
125 | ```shell script
126 | source scripts/prepare_training_data.sh
127 | python train.py --cfg configs/config.yaml
128 | ```
129 | 
130 | Note that the training datasets should be downloaded and prepared before running data processing script.
131 | Please see [`doc/train.md`](doc/train.md) for details on how to prepare them.
132 |  
133 | ## Evaluation
134 | 
135 | Here we compare VIBE with recent state-of-the-art methods on 3D pose estimation datasets. Evaluation metric is
136 | Procrustes Aligned Mean Per Joint Position Error (PA-MPJPE) in mm.
137 | 
138 | | Models         | 3DPW &#8595; | MPI-INF-3DHP &#8595; | H36M &#8595; |
139 | |----------------|:----:|:------------:|:----:|
140 | | SPIN           | 59.2 |     67.5     | **41.1** |
141 | | Temporal HMR   | 76.7 |     89.8     | 56.8 |
142 | | VIBE           | 56.5 |     **63.4**     | 41.5 |
143 | 
144 | See [`doc/eval.md`](doc/eval.md) to reproduce the results in this table or 
145 | evaluate a pretrained model.
146 | 
147 | **Correction**: Due to a mistake in dataset preprocessing, VIBE trained with 3DPW results in Table 1 of the original paper are not correct.
148 | Besides, even though training with 3DPW guarantees better quantitative performance, it does not give good 
149 | qualitative results. ArXiv version will be updated with the corrected results. 
150 | 
151 | ## Citation
152 | 
153 | ```bibtex
154 | @inproceedings{kocabas2019vibe,
155 |   title={VIBE: Video Inference for Human Body Pose and Shape Estimation},
156 |   author={Kocabas, Muhammed and Athanasiou, Nikos and Black, Michael J.},
157 |   booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
158 |   month = {June},
159 |   year = {2020}
160 | }
161 | ```
162 | 
163 | ## License
164 | This code is available for **non-commercial scientific research purposes** as defined in the [LICENSE file](LICENSE). By downloading and using this code you agree to the terms in the [LICENSE](LICENSE). Third-party datasets and software are subject to their respective licenses.
165 | 
166 | 
167 | ## References
168 | We indicate if a function or script is borrowed externally inside each file. Here are some great resources we 
169 | benefit:
170 | 
171 | - Pretrained HMR and some functions are borrowed from [SPIN](https://github.com/nkolot/SPIN).
172 | - SMPL models and layer is from [SMPL-X model](https://github.com/vchoutas/smplx).
173 | - Some functions are borrowed from [Temporal HMR](https://github.com/akanazawa/human_dynamics).
174 | - Some functions are borrowed from [HMR-pytorch](https://github.com/MandyMo/pytorch_HMR).
175 | - Some functions are borrowed from [Kornia](https://github.com/kornia/kornia).
176 | - Pose tracker is from [STAF](https://github.com/soulslicer/openpose/tree/staf).
177 | 
178 | 


--------------------------------------------------------------------------------
/lib/utils/eval_utils.py:
--------------------------------------------------------------------------------
  1 | # Some functions are borrowed from https://github.com/akanazawa/human_dynamics/blob/master/src/evaluation/eval_util.py
  2 | # Adhere to their licence to use these functions
  3 | 
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | 
  8 | def compute_accel(joints):
  9 |     """
 10 |     Computes acceleration of 3D joints.
 11 |     Args:
 12 |         joints (Nx25x3).
 13 |     Returns:
 14 |         Accelerations (N-2).
 15 |     """
 16 |     velocities = joints[1:] - joints[:-1]
 17 |     acceleration = velocities[1:] - velocities[:-1]
 18 |     acceleration_normed = np.linalg.norm(acceleration, axis=2)
 19 |     return np.mean(acceleration_normed, axis=1)
 20 | 
 21 | 
 22 | def compute_error_accel(joints_gt, joints_pred, vis=None):
 23 |     """
 24 |     Computes acceleration error:
 25 |         1/(n-2) \sum_{i=1}^{n-1} X_{i-1} - 2X_i + X_{i+1}
 26 |     Note that for each frame that is not visible, three entries in the
 27 |     acceleration error should be zero'd out.
 28 |     Args:
 29 |         joints_gt (Nx14x3).
 30 |         joints_pred (Nx14x3).
 31 |         vis (N).
 32 |     Returns:
 33 |         error_accel (N-2).
 34 |     """
 35 |     # (N-2)x14x3
 36 |     accel_gt = joints_gt[:-2] - 2 * joints_gt[1:-1] + joints_gt[2:]
 37 |     accel_pred = joints_pred[:-2] - 2 * joints_pred[1:-1] + joints_pred[2:]
 38 | 
 39 |     normed = np.linalg.norm(accel_pred - accel_gt, axis=2)
 40 | 
 41 |     if vis is None:
 42 |         new_vis = np.ones(len(normed), dtype=bool)
 43 |     else:
 44 |         invis = np.logical_not(vis)
 45 |         invis1 = np.roll(invis, -1)
 46 |         invis2 = np.roll(invis, -2)
 47 |         new_invis = np.logical_or(invis, np.logical_or(invis1, invis2))[:-2]
 48 |         new_vis = np.logical_not(new_invis)
 49 | 
 50 |     return np.mean(normed[new_vis], axis=1)
 51 | 
 52 | 
 53 | def compute_error_verts(pred_verts, target_verts=None, target_theta=None):
 54 |     """
 55 |     Computes MPJPE over 6890 surface vertices.
 56 |     Args:
 57 |         verts_gt (Nx6890x3).
 58 |         verts_pred (Nx6890x3).
 59 |     Returns:
 60 |         error_verts (N).
 61 |     """
 62 | 
 63 |     if target_verts is None:
 64 |         from lib.models.smpl import SMPL_MODEL_DIR
 65 |         from lib.models.smpl import SMPL
 66 |         device = 'cpu'
 67 |         smpl = SMPL(
 68 |             SMPL_MODEL_DIR,
 69 |             batch_size=1, # target_theta.shape[0],
 70 |         ).to(device)
 71 | 
 72 |         betas = torch.from_numpy(target_theta[:,75:]).to(device)
 73 |         pose = torch.from_numpy(target_theta[:,3:75]).to(device)
 74 | 
 75 |         target_verts = []
 76 |         b_ = torch.split(betas, 5000)
 77 |         p_ = torch.split(pose, 5000)
 78 | 
 79 |         for b,p in zip(b_,p_):
 80 |             output = smpl(betas=b, body_pose=p[:, 3:], global_orient=p[:, :3], pose2rot=True)
 81 |             target_verts.append(output.vertices.detach().cpu().numpy())
 82 | 
 83 |         target_verts = np.concatenate(target_verts, axis=0)
 84 | 
 85 |     assert len(pred_verts) == len(target_verts)
 86 |     error_per_vert = np.sqrt(np.sum((target_verts - pred_verts) ** 2, axis=2))
 87 |     return np.mean(error_per_vert, axis=1)
 88 | 
 89 | 
 90 | def compute_similarity_transform(S1, S2):
 91 |     '''
 92 |     Computes a similarity transform (sR, t) that takes
 93 |     a set of 3D points S1 (3 x N) closest to a set of 3D points S2,
 94 |     where R is an 3x3 rotation matrix, t 3x1 translation, s scale.
 95 |     i.e. solves the orthogonal Procrutes problem.
 96 |     '''
 97 |     transposed = False
 98 |     if S1.shape[0] != 3 and S1.shape[0] != 2:
 99 |         S1 = S1.T
100 |         S2 = S2.T
101 |         transposed = True
102 |     assert(S2.shape[1] == S1.shape[1])
103 | 
104 |     # 1. Remove mean.
105 |     mu1 = S1.mean(axis=1, keepdims=True)
106 |     mu2 = S2.mean(axis=1, keepdims=True)
107 |     X1 = S1 - mu1
108 |     X2 = S2 - mu2
109 | 
110 |     # 2. Compute variance of X1 used for scale.
111 |     var1 = np.sum(X1**2)
112 | 
113 |     # 3. The outer product of X1 and X2.
114 |     K = X1.dot(X2.T)
115 | 
116 |     # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
117 |     # singular vectors of K.
118 |     U, s, Vh = np.linalg.svd(K)
119 |     V = Vh.T
120 |     # Construct Z that fixes the orientation of R to get det(R)=1.
121 |     Z = np.eye(U.shape[0])
122 |     Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T)))
123 |     # Construct R.
124 |     R = V.dot(Z.dot(U.T))
125 | 
126 |     # 5. Recover scale.
127 |     scale = np.trace(R.dot(K)) / var1
128 | 
129 |     # 6. Recover translation.
130 |     t = mu2 - scale*(R.dot(mu1))
131 | 
132 |     # 7. Error:
133 |     S1_hat = scale*R.dot(S1) + t
134 | 
135 |     if transposed:
136 |         S1_hat = S1_hat.T
137 | 
138 |     return S1_hat
139 | 
140 | 
141 | def compute_similarity_transform_torch(S1, S2):
142 |     '''
143 |     Computes a similarity transform (sR, t) that takes
144 |     a set of 3D points S1 (3 x N) closest to a set of 3D points S2,
145 |     where R is an 3x3 rotation matrix, t 3x1 translation, s scale.
146 |     i.e. solves the orthogonal Procrutes problem.
147 |     '''
148 |     transposed = False
149 |     if S1.shape[0] != 3 and S1.shape[0] != 2:
150 |         S1 = S1.T
151 |         S2 = S2.T
152 |         transposed = True
153 |     assert (S2.shape[1] == S1.shape[1])
154 | 
155 |     # 1. Remove mean.
156 |     mu1 = S1.mean(axis=1, keepdims=True)
157 |     mu2 = S2.mean(axis=1, keepdims=True)
158 |     X1 = S1 - mu1
159 |     X2 = S2 - mu2
160 | 
161 |     # print('X1', X1.shape)
162 | 
163 |     # 2. Compute variance of X1 used for scale.
164 |     var1 = torch.sum(X1 ** 2)
165 | 
166 |     # print('var', var1.shape)
167 | 
168 |     # 3. The outer product of X1 and X2.
169 |     K = X1.mm(X2.T)
170 | 
171 |     # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
172 |     # singular vectors of K.
173 |     U, s, V = torch.svd(K)
174 |     # V = Vh.T
175 |     # Construct Z that fixes the orientation of R to get det(R)=1.
176 |     Z = torch.eye(U.shape[0], device=S1.device)
177 |     Z[-1, -1] *= torch.sign(torch.det(U @ V.T))
178 |     # Construct R.
179 |     R = V.mm(Z.mm(U.T))
180 | 
181 |     # print('R', X1.shape)
182 | 
183 |     # 5. Recover scale.
184 |     scale = torch.trace(R.mm(K)) / var1
185 |     # print(R.shape, mu1.shape)
186 |     # 6. Recover translation.
187 |     t = mu2 - scale * (R.mm(mu1))
188 |     # print(t.shape)
189 | 
190 |     # 7. Error:
191 |     S1_hat = scale * R.mm(S1) + t
192 | 
193 |     if transposed:
194 |         S1_hat = S1_hat.T
195 | 
196 |     return S1_hat
197 | 
198 | 
199 | def batch_compute_similarity_transform_torch(S1, S2):
200 |     '''
201 |     Computes a similarity transform (sR, t) that takes
202 |     a set of 3D points S1 (3 x N) closest to a set of 3D points S2,
203 |     where R is an 3x3 rotation matrix, t 3x1 translation, s scale.
204 |     i.e. solves the orthogonal Procrutes problem.
205 |     '''
206 |     transposed = False
207 |     if S1.shape[0] != 3 and S1.shape[0] != 2:
208 |         S1 = S1.permute(0,2,1)
209 |         S2 = S2.permute(0,2,1)
210 |         transposed = True
211 |     assert(S2.shape[1] == S1.shape[1])
212 | 
213 |     # 1. Remove mean.
214 |     mu1 = S1.mean(axis=-1, keepdims=True)
215 |     mu2 = S2.mean(axis=-1, keepdims=True)
216 | 
217 |     X1 = S1 - mu1
218 |     X2 = S2 - mu2
219 | 
220 |     # 2. Compute variance of X1 used for scale.
221 |     var1 = torch.sum(X1**2, dim=1).sum(dim=1)
222 | 
223 |     # 3. The outer product of X1 and X2.
224 |     K = X1.bmm(X2.permute(0,2,1))
225 | 
226 |     # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
227 |     # singular vectors of K.
228 |     U, s, V = torch.svd(K)
229 | 
230 |     # Construct Z that fixes the orientation of R to get det(R)=1.
231 |     Z = torch.eye(U.shape[1], device=S1.device).unsqueeze(0)
232 |     Z = Z.repeat(U.shape[0],1,1)
233 |     Z[:,-1, -1] *= torch.sign(torch.det(U.bmm(V.permute(0,2,1))))
234 | 
235 |     # Construct R.
236 |     R = V.bmm(Z.bmm(U.permute(0,2,1)))
237 | 
238 |     # 5. Recover scale.
239 |     scale = torch.cat([torch.trace(x).unsqueeze(0) for x in R.bmm(K)]) / var1
240 | 
241 |     # 6. Recover translation.
242 |     t = mu2 - (scale.unsqueeze(-1).unsqueeze(-1) * (R.bmm(mu1)))
243 | 
244 |     # 7. Error:
245 |     S1_hat = scale.unsqueeze(-1).unsqueeze(-1) * R.bmm(S1) + t
246 | 
247 |     if transposed:
248 |         S1_hat = S1_hat.permute(0,2,1)
249 | 
250 |     return S1_hat
251 | 
252 | 
253 | def align_by_pelvis(joints):
254 |     """
255 |     Assumes joints is 14 x 3 in LSP order.
256 |     Then hips are: [3, 2]
257 |     Takes mid point of these points, then subtracts it.
258 |     """
259 | 
260 |     left_id = 2
261 |     right_id = 3
262 | 
263 |     pelvis = (joints[left_id, :] + joints[right_id, :]) / 2.0
264 |     return joints - np.expand_dims(pelvis, axis=0)
265 | 
266 | 
267 | def compute_errors(gt3ds, preds):
268 |     """
269 |     Gets MPJPE after pelvis alignment + MPJPE after Procrustes.
270 |     Evaluates on the 14 common joints.
271 |     Inputs:
272 |       - gt3ds: N x 14 x 3
273 |       - preds: N x 14 x 3
274 |     """
275 |     errors, errors_pa = [], []
276 |     for i, (gt3d, pred) in enumerate(zip(gt3ds, preds)):
277 |         gt3d = gt3d.reshape(-1, 3)
278 |         # Root align.
279 |         gt3d = align_by_pelvis(gt3d)
280 |         pred3d = align_by_pelvis(pred)
281 | 
282 |         joint_error = np.sqrt(np.sum((gt3d - pred3d)**2, axis=1))
283 |         errors.append(np.mean(joint_error))
284 | 
285 |         # Get PA error.
286 |         pred3d_sym = compute_similarity_transform(pred3d, gt3d)
287 |         pa_error = np.sqrt(np.sum((gt3d - pred3d_sym)**2, axis=1))
288 |         errors_pa.append(np.mean(pa_error))
289 | 
290 |     return errors, errors_pa
291 | 


--------------------------------------------------------------------------------
/lib/smplify/prior.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import print_function
 19 | from __future__ import division
 20 | 
 21 | import sys
 22 | import os
 23 | 
 24 | import time
 25 | import pickle
 26 | 
 27 | import numpy as np
 28 | 
 29 | import torch
 30 | import torch.nn as nn
 31 | 
 32 | DEFAULT_DTYPE = torch.float32
 33 | 
 34 | 
 35 | def create_prior(prior_type, **kwargs):
 36 |     if prior_type == 'gmm':
 37 |         prior = MaxMixturePrior(**kwargs)
 38 |     elif prior_type == 'l2':
 39 |         return L2Prior(**kwargs)
 40 |     elif prior_type == 'angle':
 41 |         return SMPLifyAnglePrior(**kwargs)
 42 |     elif prior_type == 'none' or prior_type is None:
 43 |         # Don't use any pose prior
 44 |         def no_prior(*args, **kwargs):
 45 |             return 0.0
 46 |         prior = no_prior
 47 |     else:
 48 |         raise ValueError('Prior {}'.format(prior_type) + ' is not implemented')
 49 |     return prior
 50 | 
 51 | 
 52 | class SMPLifyAnglePrior(nn.Module):
 53 |     def __init__(self, dtype=torch.float32, **kwargs):
 54 |         super(SMPLifyAnglePrior, self).__init__()
 55 | 
 56 |         # Indices for the roration angle of
 57 |         # 55: left elbow,  90deg bend at -np.pi/2
 58 |         # 58: right elbow, 90deg bend at np.pi/2
 59 |         # 12: left knee,   90deg bend at np.pi/2
 60 |         # 15: right knee,  90deg bend at np.pi/2
 61 |         angle_prior_idxs = np.array([55, 58, 12, 15], dtype=np.int64)
 62 |         angle_prior_idxs = torch.tensor(angle_prior_idxs, dtype=torch.long)
 63 |         self.register_buffer('angle_prior_idxs', angle_prior_idxs)
 64 | 
 65 |         angle_prior_signs = np.array([1, -1, -1, -1],
 66 |                                      dtype=np.float32 if dtype == torch.float32
 67 |                                      else np.float64)
 68 |         angle_prior_signs = torch.tensor(angle_prior_signs,
 69 |                                          dtype=dtype)
 70 |         self.register_buffer('angle_prior_signs', angle_prior_signs)
 71 | 
 72 |     def forward(self, pose, with_global_pose=False):
 73 |         ''' Returns the angle prior loss for the given pose
 74 | 
 75 |         Args:
 76 |             pose: (Bx[23 + 1] * 3) torch tensor with the axis-angle
 77 |             representation of the rotations of the joints of the SMPL model.
 78 |         Kwargs:
 79 |             with_global_pose: Whether the pose vector also contains the global
 80 |             orientation of the SMPL model. If not then the indices must be
 81 |             corrected.
 82 |         Returns:
 83 |             A sze (B) tensor containing the angle prior loss for each element
 84 |             in the batch.
 85 |         '''
 86 |         angle_prior_idxs = self.angle_prior_idxs - (not with_global_pose) * 3
 87 |         return torch.exp(pose[:, angle_prior_idxs] *
 88 |                          self.angle_prior_signs).pow(2)
 89 | 
 90 | 
 91 | class L2Prior(nn.Module):
 92 |     def __init__(self, dtype=DEFAULT_DTYPE, reduction='sum', **kwargs):
 93 |         super(L2Prior, self).__init__()
 94 | 
 95 |     def forward(self, module_input, *args):
 96 |         return torch.sum(module_input.pow(2))
 97 | 
 98 | 
 99 | class MaxMixturePrior(nn.Module):
100 | 
101 |     def __init__(self, prior_folder='prior',
102 |                  num_gaussians=6, dtype=DEFAULT_DTYPE, epsilon=1e-16,
103 |                  use_merged=True,
104 |                  **kwargs):
105 |         super(MaxMixturePrior, self).__init__()
106 | 
107 |         if dtype == DEFAULT_DTYPE:
108 |             np_dtype = np.float32
109 |         elif dtype == torch.float64:
110 |             np_dtype = np.float64
111 |         else:
112 |             print('Unknown float type {}, exiting!'.format(dtype))
113 |             sys.exit(-1)
114 | 
115 |         self.num_gaussians = num_gaussians
116 |         self.epsilon = epsilon
117 |         self.use_merged = use_merged
118 |         gmm_fn = 'gmm_{:02d}.pkl'.format(num_gaussians)
119 | 
120 |         full_gmm_fn = os.path.join(prior_folder, gmm_fn)
121 |         if not os.path.exists(full_gmm_fn):
122 |             print('The path to the mixture prior "{}"'.format(full_gmm_fn) +
123 |                   ' does not exist, exiting!')
124 |             sys.exit(-1)
125 | 
126 |         with open(full_gmm_fn, 'rb') as f:
127 |             gmm = pickle.load(f, encoding='latin1')
128 | 
129 |         if type(gmm) == dict:
130 |             means = gmm['means'].astype(np_dtype)
131 |             covs = gmm['covars'].astype(np_dtype)
132 |             weights = gmm['weights'].astype(np_dtype)
133 |         elif 'sklearn.mixture.gmm.GMM' in str(type(gmm)):
134 |             means = gmm.means_.astype(np_dtype)
135 |             covs = gmm.covars_.astype(np_dtype)
136 |             weights = gmm.weights_.astype(np_dtype)
137 |         else:
138 |             print('Unknown type for the prior: {}, exiting!'.format(type(gmm)))
139 |             sys.exit(-1)
140 | 
141 |         self.register_buffer('means', torch.tensor(means, dtype=dtype))
142 | 
143 |         self.register_buffer('covs', torch.tensor(covs, dtype=dtype))
144 | 
145 |         precisions = [np.linalg.inv(cov) for cov in covs]
146 |         precisions = np.stack(precisions).astype(np_dtype)
147 | 
148 |         self.register_buffer('precisions',
149 |                              torch.tensor(precisions, dtype=dtype))
150 | 
151 |         # The constant term:
152 |         sqrdets = np.array([(np.sqrt(np.linalg.det(c)))
153 |                             for c in gmm['covars']])
154 |         const = (2 * np.pi)**(69 / 2.)
155 | 
156 |         nll_weights = np.asarray(gmm['weights'] / (const *
157 |                                                    (sqrdets / sqrdets.min())))
158 |         nll_weights = torch.tensor(nll_weights, dtype=dtype).unsqueeze(dim=0)
159 |         self.register_buffer('nll_weights', nll_weights)
160 | 
161 |         weights = torch.tensor(gmm['weights'], dtype=dtype).unsqueeze(dim=0)
162 |         self.register_buffer('weights', weights)
163 | 
164 |         self.register_buffer('pi_term',
165 |                              torch.log(torch.tensor(2 * np.pi, dtype=dtype)))
166 | 
167 |         cov_dets = [np.log(np.linalg.det(cov.astype(np_dtype)) + epsilon)
168 |                     for cov in covs]
169 |         self.register_buffer('cov_dets',
170 |                              torch.tensor(cov_dets, dtype=dtype))
171 | 
172 |         # The dimensionality of the random variable
173 |         self.random_var_dim = self.means.shape[1]
174 | 
175 |     def get_mean(self):
176 |         ''' Returns the mean of the mixture '''
177 |         mean_pose = torch.matmul(self.weights, self.means)
178 |         return mean_pose
179 | 
180 |     def merged_log_likelihood(self, pose, betas):
181 |         diff_from_mean = pose.unsqueeze(dim=1) - self.means
182 | 
183 |         prec_diff_prod = torch.einsum('mij,bmj->bmi',
184 |                                       [self.precisions, diff_from_mean])
185 |         diff_prec_quadratic = (prec_diff_prod * diff_from_mean).sum(dim=-1)
186 | 
187 |         curr_loglikelihood = 0.5 * diff_prec_quadratic - \
188 |             torch.log(self.nll_weights)
189 |         #  curr_loglikelihood = 0.5 * (self.cov_dets.unsqueeze(dim=0) +
190 |         #  self.random_var_dim * self.pi_term +
191 |         #  diff_prec_quadratic
192 |         #  ) - torch.log(self.weights)
193 | 
194 |         min_likelihood, _ = torch.min(curr_loglikelihood, dim=1)
195 |         return min_likelihood
196 | 
197 |     def log_likelihood(self, pose, betas, *args, **kwargs):
198 |         ''' Create graph operation for negative log-likelihood calculation
199 |         '''
200 |         likelihoods = []
201 | 
202 |         for idx in range(self.num_gaussians):
203 |             mean = self.means[idx]
204 |             prec = self.precisions[idx]
205 |             cov = self.covs[idx]
206 |             diff_from_mean = pose - mean
207 | 
208 |             curr_loglikelihood = torch.einsum('bj,ji->bi',
209 |                                               [diff_from_mean, prec])
210 |             curr_loglikelihood = torch.einsum('bi,bi->b',
211 |                                               [curr_loglikelihood,
212 |                                                diff_from_mean])
213 |             cov_term = torch.log(torch.det(cov) + self.epsilon)
214 |             curr_loglikelihood += 0.5 * (cov_term +
215 |                                          self.random_var_dim *
216 |                                          self.pi_term)
217 |             likelihoods.append(curr_loglikelihood)
218 | 
219 |         log_likelihoods = torch.stack(likelihoods, dim=1)
220 |         min_idx = torch.argmin(log_likelihoods, dim=1)
221 |         weight_component = self.nll_weights[:, min_idx]
222 |         weight_component = -torch.log(weight_component)
223 | 
224 |         return weight_component + log_likelihoods[:, min_idx]
225 | 
226 |     def forward(self, pose, betas):
227 |         if self.use_merged:
228 |             return self.merged_log_likelihood(pose, betas)
229 |         else:
230 |             return self.log_likelihood(pose, betas)
231 | 


--------------------------------------------------------------------------------
/lib/smplify/losses.py:
--------------------------------------------------------------------------------
  1 | # This script is the extended version of https://github.com/nkolot/SPIN/blob/master/smplify/losses.py to deal with
  2 | # sequences inputs.
  3 | 
  4 | import torch
  5 | from lib.models.spin import perspective_projection
  6 | from lib.models.smpl import JOINT_IDS
  7 | 
  8 | 
  9 | def gmof(x, sigma):
 10 |     """
 11 |     Geman-McClure error function
 12 |     """
 13 |     x_squared = x ** 2
 14 |     sigma_squared = sigma ** 2
 15 |     return (sigma_squared * x_squared) / (sigma_squared + x_squared)
 16 | 
 17 | 
 18 | def angle_prior(pose):
 19 |     """
 20 |     Angle prior that penalizes unnatural bending of the knees and elbows
 21 |     """
 22 |     # We subtract 3 because pose does not include the global rotation of the model
 23 |     return torch.exp(
 24 |         pose[:, [55 - 3, 58 - 3, 12 - 3, 15 - 3]] * torch.tensor([1., -1., -1, -1.], device=pose.device)) ** 2
 25 | 
 26 | 
 27 | def body_fitting_loss(body_pose, betas, model_joints, camera_t, camera_center,
 28 |                       joints_2d, joints_conf, pose_prior,
 29 |                       focal_length=5000, sigma=100, pose_prior_weight=4.78,
 30 |                       shape_prior_weight=5, angle_prior_weight=15.2,
 31 |                       output='sum'):
 32 |     """
 33 |     Loss function for body fitting
 34 |     """
 35 |     # pose_prior_weight = 1.
 36 |     # shape_prior_weight = 1.
 37 |     # angle_prior_weight = 1.
 38 |     # sigma = 10.
 39 | 
 40 |     batch_size = body_pose.shape[0]
 41 |     rotation = torch.eye(3, device=body_pose.device).unsqueeze(0).expand(batch_size, -1, -1)
 42 |     projected_joints = perspective_projection(model_joints, rotation, camera_t,
 43 |                                               focal_length, camera_center)
 44 | 
 45 |     # Weighted robust reprojection error
 46 |     reprojection_error = gmof(projected_joints - joints_2d, sigma)
 47 |     reprojection_loss = (joints_conf ** 2) * reprojection_error.sum(dim=-1)
 48 | 
 49 |     # Pose prior loss
 50 |     pose_prior_loss = (pose_prior_weight ** 2) * pose_prior(body_pose, betas)
 51 | 
 52 |     # Angle prior for knees and elbows
 53 |     angle_prior_loss = (angle_prior_weight ** 2) * angle_prior(body_pose).sum(dim=-1)
 54 | 
 55 |     # Regularizer to prevent betas from taking large values
 56 |     shape_prior_loss = (shape_prior_weight ** 2) * (betas ** 2).sum(dim=-1)
 57 | 
 58 |     total_loss = reprojection_loss.sum(dim=-1) + pose_prior_loss + angle_prior_loss + shape_prior_loss
 59 |     print(f'joints: {reprojection_loss[0].sum().item():.2f}, '
 60 |           f'pose_prior: {pose_prior_loss[0].item():.2f}, '
 61 |           f'angle_prior: {angle_prior_loss[0].item():.2f}, '
 62 |           f'shape_prior: {shape_prior_loss[0].item():.2f}')
 63 | 
 64 |     if output == 'sum':
 65 |         return total_loss.sum()
 66 |     elif output == 'reprojection':
 67 |         return reprojection_loss
 68 | 
 69 | 
 70 | def camera_fitting_loss(model_joints, camera_t, camera_t_est, camera_center, joints_2d, joints_conf,
 71 |                         focal_length=5000, depth_loss_weight=100):
 72 |     """
 73 |     Loss function for camera optimization.
 74 |     """
 75 | 
 76 |     # Project model joints
 77 |     batch_size = model_joints.shape[0]
 78 |     rotation = torch.eye(3, device=model_joints.device).unsqueeze(0).expand(batch_size, -1, -1)
 79 |     projected_joints = perspective_projection(model_joints, rotation, camera_t,
 80 |                                               focal_length, camera_center)
 81 | 
 82 |     op_joints = ['OP RHip', 'OP LHip', 'OP RShoulder', 'OP LShoulder']
 83 |     op_joints_ind = [JOINT_IDS[joint] for joint in op_joints]
 84 |     gt_joints = ['Right Hip', 'Left Hip', 'Right Shoulder', 'Left Shoulder']
 85 |     gt_joints_ind = [JOINT_IDS[joint] for joint in gt_joints]
 86 |     reprojection_error_op = (joints_2d[:, op_joints_ind] -
 87 |                              projected_joints[:, op_joints_ind]) ** 2
 88 |     reprojection_error_gt = (joints_2d[:, gt_joints_ind] -
 89 |                              projected_joints[:, gt_joints_ind]) ** 2
 90 | 
 91 |     # Check if for each example in the batch all 4 OpenPose detections are valid, otherwise use the GT detections
 92 |     # OpenPose joints are more reliable for this task, so we prefer to use them if possible
 93 |     is_valid = (joints_conf[:, op_joints_ind].min(dim=-1)[0][:, None, None] > 0).float()
 94 |     reprojection_loss = (is_valid * reprojection_error_op + (1 - is_valid) * reprojection_error_gt).sum(dim=(1, 2))
 95 | 
 96 |     # Loss that penalizes deviation from depth estimate
 97 |     depth_loss = (depth_loss_weight ** 2) * (camera_t[:, 2] - camera_t_est[:, 2]) ** 2
 98 | 
 99 |     total_loss = reprojection_loss + depth_loss
100 |     return total_loss.sum()
101 | 
102 | 
103 | def temporal_body_fitting_loss(body_pose, betas, model_joints, camera_t, camera_center,
104 |                                joints_2d, joints_conf, pose_prior,
105 |                                focal_length=5000, sigma=100, pose_prior_weight=4.78,
106 |                                shape_prior_weight=5, angle_prior_weight=15.2,
107 |                                smooth_2d_weight=0.01, smooth_3d_weight=1.0,
108 |                                output='sum'):
109 |     """
110 |     Loss function for body fitting
111 |     """
112 |     # pose_prior_weight = 1.
113 |     # shape_prior_weight = 1.
114 |     # angle_prior_weight = 1.
115 |     # sigma = 10.
116 | 
117 |     batch_size = body_pose.shape[0]
118 |     rotation = torch.eye(3, device=body_pose.device).unsqueeze(0).expand(batch_size, -1, -1)
119 |     projected_joints = perspective_projection(model_joints, rotation, camera_t,
120 |                                               focal_length, camera_center)
121 | 
122 |     # Weighted robust reprojection error
123 |     reprojection_error = gmof(projected_joints - joints_2d, sigma)
124 |     reprojection_loss = (joints_conf ** 2) * reprojection_error.sum(dim=-1)
125 | 
126 |     # Pose prior loss
127 |     pose_prior_loss = (pose_prior_weight ** 2) * pose_prior(body_pose, betas)
128 | 
129 |     # Angle prior for knees and elbows
130 |     angle_prior_loss = (angle_prior_weight ** 2) * angle_prior(body_pose).sum(dim=-1)
131 | 
132 |     # Regularizer to prevent betas from taking large values
133 |     shape_prior_loss = (shape_prior_weight ** 2) * (betas ** 2).sum(dim=-1)
134 | 
135 |     total_loss = reprojection_loss.sum(dim=-1) + pose_prior_loss + angle_prior_loss + shape_prior_loss
136 | 
137 |     # Smooth 2d joint loss
138 |     joint_conf_diff = joints_conf[1:]
139 |     joints_2d_diff = projected_joints[1:] - projected_joints[:-1]
140 |     smooth_j2d_loss = (joint_conf_diff ** 2) * joints_2d_diff.abs().sum(dim=-1)
141 |     smooth_j2d_loss = torch.cat(
142 |         [torch.zeros(1, smooth_j2d_loss.shape[1], device=body_pose.device), smooth_j2d_loss]
143 |     ).sum(dim=-1)
144 |     smooth_j2d_loss = (smooth_2d_weight ** 2) * smooth_j2d_loss
145 | 
146 |     # Smooth 3d joint loss
147 |     joints_3d_diff = model_joints[1:] - model_joints[:-1]
148 |     # joints_3d_diff = joints_3d_diff * 100.
149 |     smooth_j3d_loss = (joint_conf_diff ** 2) * joints_3d_diff.abs().sum(dim=-1)
150 |     smooth_j3d_loss = torch.cat(
151 |         [torch.zeros(1, smooth_j3d_loss.shape[1], device=body_pose.device), smooth_j3d_loss]
152 |     ).sum(dim=-1)
153 |     smooth_j3d_loss = (smooth_3d_weight ** 2) * smooth_j3d_loss
154 | 
155 |     total_loss += smooth_j2d_loss + smooth_j3d_loss
156 | 
157 |     # print(f'joints: {reprojection_loss[0].sum().item():.2f}, '
158 |     #       f'pose_prior: {pose_prior_loss[0].item():.2f}, '
159 |     #       f'angle_prior: {angle_prior_loss[0].item():.2f}, '
160 |     #       f'shape_prior: {shape_prior_loss[0].item():.2f}, '
161 |     #       f'smooth_j2d: {smooth_j2d_loss.sum().item()}, '
162 |     #       f'smooth_j3d: {smooth_j3d_loss.sum().item()}')
163 | 
164 |     if output == 'sum':
165 |         return total_loss.sum()
166 |     elif output == 'reprojection':
167 |         return reprojection_loss
168 | 
169 | 
170 | def temporal_camera_fitting_loss(model_joints, camera_t, camera_t_est, camera_center, joints_2d, joints_conf,
171 |                                  focal_length=5000, depth_loss_weight=100):
172 |     """
173 |     Loss function for camera optimization.
174 |     """
175 | 
176 |     # Project model joints
177 |     batch_size = model_joints.shape[0]
178 |     rotation = torch.eye(3, device=model_joints.device).unsqueeze(0).expand(batch_size, -1, -1)
179 |     projected_joints = perspective_projection(model_joints, rotation, camera_t,
180 |                                               focal_length, camera_center)
181 | 
182 |     op_joints = ['OP RHip', 'OP LHip', 'OP RShoulder', 'OP LShoulder']
183 |     op_joints_ind = [JOINT_IDS[joint] for joint in op_joints]
184 |     # gt_joints = ['Right Hip', 'Left Hip', 'Right Shoulder', 'Left Shoulder']
185 |     # gt_joints_ind = [constants.JOINT_IDS[joint] for joint in gt_joints]
186 |     reprojection_error_op = (joints_2d[:, op_joints_ind] -
187 |                              projected_joints[:, op_joints_ind]) ** 2
188 |     # reprojection_error_gt = (joints_2d[:, gt_joints_ind] -
189 |     #                          projected_joints[:, gt_joints_ind]) ** 2
190 | 
191 |     # Check if for each example in the batch all 4 OpenPose detections are valid, otherwise use the GT detections
192 |     # OpenPose joints are more reliable for this task, so we prefer to use them if possible
193 |     is_valid = (joints_conf[:, op_joints_ind].min(dim=-1)[0][:, None, None] > 0).float()
194 |     reprojection_loss = (is_valid * reprojection_error_op).sum(dim=(1, 2))
195 | 
196 |     # Loss that penalizes deviation from depth estimate
197 |     depth_loss = (depth_loss_weight ** 2) * (camera_t[:, 2] - camera_t_est[:, 2]) ** 2
198 | 
199 |     total_loss = reprojection_loss + depth_loss
200 |     return total_loss.sum()
201 | 


--------------------------------------------------------------------------------
/lib/core/loss.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | 
 20 | from lib.utils.geometry import batch_rodrigues
 21 | 
 22 | class VIBELoss(nn.Module):
 23 |     def __init__(
 24 |             self,
 25 |             e_loss_weight=60.,
 26 |             e_3d_loss_weight=30.,
 27 |             e_pose_loss_weight=1.,
 28 |             e_shape_loss_weight=0.001,
 29 |             d_motion_loss_weight=1.,
 30 |             device='cuda',
 31 |     ):
 32 |         super(VIBELoss, self).__init__()
 33 |         self.e_loss_weight = e_loss_weight
 34 |         self.e_3d_loss_weight = e_3d_loss_weight
 35 |         self.e_pose_loss_weight = e_pose_loss_weight
 36 |         self.e_shape_loss_weight = e_shape_loss_weight
 37 |         self.d_motion_loss_weight = d_motion_loss_weight
 38 | 
 39 |         self.device = device
 40 |         self.criterion_shape = nn.L1Loss().to(self.device)
 41 |         self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
 42 |         self.criterion_regr = nn.MSELoss().to(self.device)
 43 | 
 44 |         self.enc_loss = batch_encoder_disc_l2_loss
 45 |         self.dec_loss = batch_adv_disc_l2_loss
 46 | 
 47 |     def forward(
 48 |             self,
 49 |             generator_outputs,
 50 |             data_2d,
 51 |             data_3d,
 52 |             data_body_mosh=None,
 53 |             data_motion_mosh=None,
 54 |             body_discriminator=None,
 55 |             motion_discriminator=None,
 56 |     ):
 57 |         # to reduce time dimension
 58 |         reduce = lambda x: x.reshape((x.shape[0] * x.shape[1],) + x.shape[2:])
 59 |         # flatten for weight vectors
 60 |         flatten = lambda x: x.reshape(-1)
 61 |         # accumulate all predicted thetas from IEF
 62 |         accumulate_thetas = lambda x: torch.cat([output['theta'] for output in x],0)
 63 | 
 64 |         if data_2d:
 65 |             sample_2d_count = data_2d['kp_2d'].shape[0]
 66 |             real_2d = torch.cat((data_2d['kp_2d'], data_3d['kp_2d']), 0)
 67 |         else:
 68 |             sample_2d_count = 0
 69 |             real_2d = data_3d['kp_2d']
 70 | 
 71 |         real_2d = reduce(real_2d)
 72 | 
 73 |         real_3d = reduce(data_3d['kp_3d'])
 74 |         data_3d_theta = reduce(data_3d['theta'])
 75 | 
 76 |         w_3d = data_3d['w_3d'].type(torch.bool)
 77 |         w_smpl = data_3d['w_smpl'].type(torch.bool)
 78 | 
 79 |         total_predict_thetas = accumulate_thetas(generator_outputs)
 80 | 
 81 |         preds = generator_outputs[-1]
 82 | 
 83 |         pred_j3d = preds['kp_3d'][sample_2d_count:]
 84 |         pred_theta = preds['theta'][sample_2d_count:]
 85 | 
 86 |         theta_size = pred_theta.shape[:2]
 87 | 
 88 |         pred_theta = reduce(pred_theta)
 89 |         pred_j2d = reduce(preds['kp_2d'])
 90 |         pred_j3d = reduce(pred_j3d)
 91 | 
 92 |         w_3d = flatten(w_3d)
 93 |         w_smpl = flatten(w_smpl)
 94 | 
 95 |         pred_theta = pred_theta[w_smpl]
 96 |         pred_j3d = pred_j3d[w_3d]
 97 |         data_3d_theta = data_3d_theta[w_smpl]
 98 |         real_3d = real_3d[w_3d]
 99 | 
100 |         # <======== Generator Loss
101 |         loss_kp_2d =  self.keypoint_loss(pred_j2d, real_2d, openpose_weight=1., gt_weight=1.) * self.e_loss_weight
102 | 
103 |         loss_kp_3d = self.keypoint_3d_loss(pred_j3d, real_3d)
104 |         loss_kp_3d = loss_kp_3d * self.e_3d_loss_weight
105 | 
106 |         real_shape, pred_shape = data_3d_theta[:, 75:], pred_theta[:, 75:]
107 |         real_pose, pred_pose = data_3d_theta[:, 3:75], pred_theta[:, 3:75]
108 | 
109 |         loss_dict = {
110 |             'loss_kp_2d': loss_kp_2d,
111 |             'loss_kp_3d': loss_kp_3d,
112 |         }
113 |         if pred_theta.shape[0] > 0:
114 |             loss_pose, loss_shape = self.smpl_losses(pred_pose, pred_shape, real_pose, real_shape)
115 |             loss_shape = loss_shape * self.e_shape_loss_weight
116 |             loss_pose = loss_pose * self.e_pose_loss_weight
117 |             loss_dict['loss_shape'] = loss_shape
118 |             loss_dict['loss_pose'] = loss_pose
119 | 
120 |         gen_loss = torch.stack(list(loss_dict.values())).sum()
121 | 
122 |         # <======== Motion Discriminator Loss
123 |         end_idx = 75
124 |         start_idx = 6
125 |         pred_motion = total_predict_thetas
126 |         e_motion_disc_loss = self.enc_loss(motion_discriminator(pred_motion[:, :, start_idx:end_idx]))
127 |         e_motion_disc_loss = e_motion_disc_loss * self.d_motion_loss_weight
128 | 
129 |         fake_motion = pred_motion.detach()
130 |         real_motion = data_motion_mosh['theta']
131 |         fake_disc_value = motion_discriminator(fake_motion[:, :, start_idx:end_idx])
132 |         real_disc_value = motion_discriminator(real_motion[:, :, start_idx:end_idx])
133 |         d_motion_disc_real, d_motion_disc_fake, d_motion_disc_loss = self.dec_loss(real_disc_value, fake_disc_value)
134 | 
135 |         d_motion_disc_real = d_motion_disc_real * self.d_motion_loss_weight
136 |         d_motion_disc_fake = d_motion_disc_fake * self.d_motion_loss_weight
137 |         d_motion_disc_loss = d_motion_disc_loss * self.d_motion_loss_weight
138 | 
139 |         loss_dict['e_m_disc_loss'] = e_motion_disc_loss
140 |         loss_dict['d_m_disc_real'] = d_motion_disc_real
141 |         loss_dict['d_m_disc_fake'] = d_motion_disc_fake
142 |         loss_dict['d_m_disc_loss'] = d_motion_disc_loss
143 | 
144 |         gen_loss = gen_loss + e_motion_disc_loss
145 |         motion_dis_loss = d_motion_disc_loss
146 | 
147 |         return gen_loss, motion_dis_loss, loss_dict
148 | 
149 |     def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d, openpose_weight, gt_weight):
150 |         """
151 |         Compute 2D reprojection loss on the keypoints.
152 |         The loss is weighted by the confidence.
153 |         The available keypoints are different for each dataset.
154 |         """
155 |         conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
156 |         conf[:, :25] *= openpose_weight
157 |         conf[:, 25:] *= gt_weight
158 |         loss = (conf * self.criterion_keypoints(pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean()
159 |         return loss
160 | 
161 |     def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d):
162 |         """
163 |         Compute 3D keypoint loss for the examples that 3D keypoint annotations are available.
164 |         The loss is weighted by the confidence.
165 |         """
166 |         pred_keypoints_3d = pred_keypoints_3d[:, 25:39, :]
167 |         gt_keypoints_3d = gt_keypoints_3d[:, 25:39, :]
168 | 
169 |         # conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
170 |         # gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone()
171 |         # gt_keypoints_3d = gt_keypoints_3d
172 |         # conf = conf
173 |         pred_keypoints_3d = pred_keypoints_3d
174 |         if len(gt_keypoints_3d) > 0:
175 |             gt_pelvis = (gt_keypoints_3d[:, 2,:] + gt_keypoints_3d[:, 3,:]) / 2
176 |             gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :]
177 |             pred_pelvis = (pred_keypoints_3d[:, 2,:] + pred_keypoints_3d[:, 3,:]) / 2
178 |             pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :]
179 |             # print(conf.shape, pred_keypoints_3d.shape, gt_keypoints_3d.shape)
180 |             # return (conf * self.criterion_keypoints(pred_keypoints_3d, gt_keypoints_3d)).mean()
181 |             return self.criterion_keypoints(pred_keypoints_3d, gt_keypoints_3d).mean()
182 |         else:
183 |             return torch.FloatTensor(1).fill_(0.).to(self.device)
184 | 
185 |     def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas):
186 |         pred_rotmat_valid = batch_rodrigues(pred_rotmat.reshape(-1,3)).reshape(-1, 24, 3, 3)
187 |         gt_rotmat_valid = batch_rodrigues(gt_pose.reshape(-1,3)).reshape(-1, 24, 3, 3)
188 |         pred_betas_valid = pred_betas
189 |         gt_betas_valid = gt_betas
190 |         if len(pred_rotmat_valid) > 0:
191 |             loss_regr_pose = self.criterion_regr(pred_rotmat_valid, gt_rotmat_valid)
192 |             loss_regr_betas = self.criterion_regr(pred_betas_valid, gt_betas_valid)
193 |         else:
194 |             loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device)
195 |             loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device)
196 |         return loss_regr_pose, loss_regr_betas
197 | 
198 | 
199 | def batch_encoder_disc_l2_loss(disc_value):
200 |     '''
201 |         Inputs:
202 |             disc_value: N x 25
203 |     '''
204 |     k = disc_value.shape[0]
205 |     return torch.sum((disc_value - 1.0) ** 2) * 1.0 / k
206 | 
207 | 
208 | def batch_adv_disc_l2_loss(real_disc_value, fake_disc_value):
209 |     '''
210 |         Inputs:
211 |             disc_value: N x 25
212 |     '''
213 |     ka = real_disc_value.shape[0]
214 |     kb = fake_disc_value.shape[0]
215 |     lb, la = torch.sum(fake_disc_value ** 2) / kb, torch.sum((real_disc_value - 1) ** 2) / ka
216 |     return la, lb, la + lb
217 | 
218 | 
219 | def batch_encoder_disc_wasserstein_loss(disc_value):
220 |     '''
221 |         Inputs:
222 |             disc_value: N x 25
223 |     '''
224 |     k = disc_value.shape[0]
225 |     return -1 * disc_value.sum() / k
226 | 
227 | 
228 | def batch_adv_disc_wasserstein_loss(real_disc_value, fake_disc_value):
229 |     '''
230 |         Inputs:
231 |             disc_value: N x 25
232 |     '''
233 | 
234 |     ka = real_disc_value.shape[0]
235 |     kb = fake_disc_value.shape[0]
236 | 
237 |     la = -1 * real_disc_value.sum() / ka
238 |     lb = fake_disc_value.sum() / kb
239 |     return la, lb, la + lb
240 | 
241 | 
242 | def batch_smooth_pose_loss(pred_theta):
243 |     pose = pred_theta[:,:,3:75]
244 |     pose_diff = pose[:,1:,:] - pose[:,:-1,:]
245 |     return torch.mean(pose_diff).abs()
246 | 
247 | 
248 | def batch_smooth_shape_loss(pred_theta):
249 |     shape = pred_theta[:, :, 75:]
250 |     shape_diff = shape[:, 1:, :] - shape[:, :-1, :]
251 |     return torch.mean(shape_diff).abs()
252 | 


--------------------------------------------------------------------------------
/lib/utils/demo_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import os
 18 | import cv2
 19 | import time
 20 | import json
 21 | import torch
 22 | import subprocess
 23 | import numpy as np
 24 | import os.path as osp
 25 | from pytube import YouTube
 26 | from collections import OrderedDict
 27 | 
 28 | from lib.utils.smooth_bbox import get_smooth_bbox_params, get_all_bbox_params
 29 | from lib.data_utils.img_utils import get_single_image_crop_demo
 30 | from lib.utils.geometry import rotation_matrix_to_angle_axis
 31 | from lib.smplify.temporal_smplify import TemporalSMPLify
 32 | 
 33 | 
 34 | def preprocess_video(video, joints2d, bboxes, frames, scale=1.0, crop_size=224):
 35 |     """
 36 |     Read video, do normalize and crop it according to the bounding box.
 37 |     If there are bounding box annotations, use them to crop the image.
 38 |     If no bounding box is specified but openpose detections are available, use them to get the bounding box.
 39 | 
 40 |     :param video (ndarray): input video
 41 |     :param joints2d (ndarray, NxJx3): openpose detections
 42 |     :param bboxes (ndarray, Nx5): bbox detections
 43 |     :param scale (float): bbox crop scaling factor
 44 |     :param crop_size (int): crop width and height
 45 |     :return: cropped video, cropped and normalized video, modified bboxes, modified joints2d
 46 |     """
 47 | 
 48 |     if joints2d is not None:
 49 |         bboxes, time_pt1, time_pt2 = get_all_bbox_params(joints2d, vis_thresh=0.3)
 50 |         bboxes[:,2:] = 150. / bboxes[:,2:]
 51 |         bboxes = np.stack([bboxes[:,0], bboxes[:,1], bboxes[:,2], bboxes[:,2]]).T
 52 | 
 53 |         video = video[time_pt1:time_pt2]
 54 |         joints2d = joints2d[time_pt1:time_pt2]
 55 |         frames = frames[time_pt1:time_pt2]
 56 | 
 57 |     shape = video.shape
 58 | 
 59 |     temp_video = np.zeros((shape[0], crop_size, crop_size, shape[-1]))
 60 |     norm_video = torch.zeros(shape[0], shape[-1], crop_size, crop_size)
 61 | 
 62 |     for idx in range(video.shape[0]):
 63 | 
 64 |         img = video[idx]
 65 |         bbox = bboxes[idx]
 66 | 
 67 |         j2d = joints2d[idx] if joints2d is not None else None
 68 | 
 69 |         norm_img, raw_img, kp_2d = get_single_image_crop_demo(
 70 |             img,
 71 |             bbox,
 72 |             kp_2d=j2d,
 73 |             scale=scale,
 74 |             crop_size=crop_size)
 75 | 
 76 |         if joints2d is not None:
 77 |             joints2d[idx] = kp_2d
 78 | 
 79 |         temp_video[idx] = raw_img
 80 |         norm_video[idx] = norm_img
 81 | 
 82 |     temp_video = temp_video.astype(np.uint8)
 83 | 
 84 |     return temp_video, norm_video, bboxes, joints2d, frames
 85 | 
 86 | 
 87 | def download_youtube_clip(url, download_folder):
 88 |     return YouTube(url).streams.first().download(output_path=download_folder)
 89 | 
 90 | 
 91 | def smplify_runner(
 92 |         pred_rotmat,
 93 |         pred_betas,
 94 |         pred_cam,
 95 |         j2d,
 96 |         device,
 97 |         batch_size,
 98 |         lr=1.0,
 99 |         opt_steps=1,
100 |         use_lbfgs=True,
101 |         pose2aa=True
102 | ):
103 |     smplify = TemporalSMPLify(
104 |         step_size=lr,
105 |         batch_size=batch_size,
106 |         num_iters=opt_steps,
107 |         focal_length=5000.,
108 |         use_lbfgs=use_lbfgs,
109 |         device=device,
110 |         # max_iter=10,
111 |     )
112 |     # Convert predicted rotation matrices to axis-angle
113 |     if pose2aa:
114 |         pred_pose = rotation_matrix_to_angle_axis(pred_rotmat.detach()).reshape(batch_size, -1)
115 |     else:
116 |         pred_pose = pred_rotmat
117 | 
118 |     # Calculate camera parameters for smplify
119 |     pred_cam_t = torch.stack([
120 |         pred_cam[:, 1], pred_cam[:, 2],
121 |         2 * 5000 / (224 * pred_cam[:, 0] + 1e-9)
122 |     ], dim=-1)
123 | 
124 |     gt_keypoints_2d_orig = j2d
125 |     # Before running compute reprojection error of the network
126 |     opt_joint_loss = smplify.get_fitting_loss(
127 |         pred_pose.detach(), pred_betas.detach(),
128 |         pred_cam_t.detach(),
129 |         0.5 * 224 * torch.ones(batch_size, 2, device=device),
130 |         gt_keypoints_2d_orig).mean(dim=-1)
131 | 
132 |     best_prediction_id = torch.argmin(opt_joint_loss).item()
133 |     pred_betas = pred_betas[best_prediction_id].unsqueeze(0)
134 |     # pred_betas = pred_betas[best_prediction_id:best_prediction_id+2] # .unsqueeze(0)
135 |     # top5_best_idxs = torch.topk(opt_joint_loss, 5, largest=False)[1]
136 |     # breakpoint()
137 | 
138 |     start = time.time()
139 |     # Run SMPLify optimization initialized from the network prediction
140 |     # new_opt_vertices, new_opt_joints, \
141 |     # new_opt_pose, new_opt_betas, \
142 |     # new_opt_cam_t, \
143 |     output, new_opt_joint_loss = smplify(
144 |         pred_pose.detach(), pred_betas.detach(),
145 |         pred_cam_t.detach(),
146 |         0.5 * 224 * torch.ones(batch_size, 2, device=device),
147 |         gt_keypoints_2d_orig,
148 |     )
149 |     new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1)
150 |     # smplify_time = time.time() - start
151 |     # print(f'Smplify time: {smplify_time}')
152 |     # Will update the dictionary for the examples where the new loss is less than the current one
153 |     update = (new_opt_joint_loss < opt_joint_loss)
154 | 
155 |     new_opt_vertices = output['verts']
156 |     new_opt_cam_t = output['theta'][:,:3]
157 |     new_opt_pose = output['theta'][:,3:75]
158 |     new_opt_betas = output['theta'][:,75:]
159 |     new_opt_joints3d = output['kp_3d']
160 | 
161 |     return_val = [
162 |         update, new_opt_vertices.cpu(), new_opt_cam_t.cpu(),
163 |         new_opt_pose.cpu(), new_opt_betas.cpu(), new_opt_joints3d.cpu(),
164 |         new_opt_joint_loss, opt_joint_loss,
165 |     ]
166 | 
167 |     return return_val
168 | 
169 | 
170 | def trim_videos(filename, start_time, end_time, output_filename):
171 |     command = ['ffmpeg',
172 |                '-i', '"%s"' % filename,
173 |                '-ss', str(start_time),
174 |                '-t', str(end_time - start_time),
175 |                '-c:v', 'libx264', '-c:a', 'copy',
176 |                '-threads', '1',
177 |                '-loglevel', 'panic',
178 |                '"%s"' % output_filename]
179 |     # command = ' '.join(command)
180 |     subprocess.call(command)
181 | 
182 | 
183 | def video_to_images(vid_file, img_folder=None, return_info=False):
184 |     if img_folder is None:
185 |         img_folder = osp.join('/tmp', osp.basename(vid_file).replace('.', '_'))
186 | 
187 |     os.makedirs(img_folder, exist_ok=True)
188 | 
189 |     command = ['ffmpeg',
190 |                '-i', vid_file,
191 |                '-f', 'image2',
192 |                '-v', 'error',
193 |                f'{img_folder}/%06d.png']
194 |     print(f'Running \"{" ".join(command)}\"')
195 |     subprocess.call(command)
196 | 
197 |     print(f'Images saved to \"{img_folder}\"')
198 | 
199 |     img_shape = cv2.imread(osp.join(img_folder, '000001.png')).shape
200 | 
201 |     if return_info:
202 |         return img_folder, len(os.listdir(img_folder)), img_shape
203 |     else:
204 |         return img_folder
205 | 
206 | 
207 | def download_url(url, outdir):
208 |     print(f'Downloading files from {url}')
209 |     cmd = ['wget', '-c', url, '-P', outdir]
210 |     subprocess.call(cmd)
211 | 
212 | 
213 | def download_ckpt(outdir='data/vibe_data', use_3dpw=False):
214 |     os.makedirs(outdir, exist_ok=True)
215 | 
216 |     if use_3dpw:
217 |         ckpt_file = 'data/vibe_data/vibe_model_w_3dpw.pth.tar'
218 |         url = 'https://www.dropbox.com/s/41ozgqorcp095ja/vibe_model_w_3dpw.pth.tar'
219 |         if not os.path.isfile(ckpt_file):
220 |             download_url(url=url, outdir=outdir)
221 |     else:
222 |         ckpt_file = 'data/vibe_data/vibe_model_wo_3dpw.pth.tar'
223 |         url = 'https://www.dropbox.com/s/amj2p8bmf6g56k6/vibe_model_wo_3dpw.pth.tar'
224 |         if not os.path.isfile(ckpt_file):
225 |             download_url(url=url, outdir=outdir)
226 | 
227 |     return ckpt_file
228 | 
229 | 
230 | def images_to_video(img_folder, output_vid_file):
231 |     os.makedirs(img_folder, exist_ok=True)
232 | 
233 |     command = [
234 |         'ffmpeg', '-y', '-threads', '16', '-i', f'{img_folder}/%06d.png', '-profile:v', 'baseline',
235 |         '-level', '3.0', '-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-an', '-v', 'error', output_vid_file,
236 |     ]
237 | 
238 |     print(f'Running \"{" ".join(command)}\"')
239 |     subprocess.call(command)
240 | 
241 | 
242 | def convert_crop_cam_to_orig_img(cam, bbox, img_width, img_height):
243 |     '''
244 |     Convert predicted camera from cropped image coordinates
245 |     to original image coordinates
246 |     :param cam (ndarray, shape=(3,)): weak perspective camera in cropped img coordinates
247 |     :param bbox (ndarray, shape=(4,)): bbox coordinates (c_x, c_y, h)
248 |     :param img_width (int): original image width
249 |     :param img_height (int): original image height
250 |     :return:
251 |     '''
252 |     cx, cy, h = bbox[:,0], bbox[:,1], bbox[:,2]
253 |     hw, hh = img_width / 2., img_height / 2.
254 |     sx = cam[:,0] * (1. / (img_width / h))
255 |     sy = cam[:,0] * (1. / (img_height / h))
256 |     tx = ((cx - hw) / hw / sx) + cam[:,1]
257 |     ty = ((cy - hh) / hh / sy) + cam[:,2]
258 |     orig_cam = np.stack([sx, sy, tx, ty]).T
259 |     return orig_cam
260 | 
261 | 
262 | def prepare_rendering_results(vibe_results, nframes):
263 |     frame_results = [{} for _ in range(nframes)]
264 |     for person_id, person_data in vibe_results.items():
265 |         for idx, frame_id in enumerate(person_data['frame_ids']):
266 |             frame_results[frame_id][person_id] = {
267 |                 'verts': person_data['verts'][idx],
268 |                 'cam': person_data['orig_cam'][idx],
269 |             }
270 | 
271 |     # naive depth ordering based on the scale of the weak perspective camera
272 |     for frame_id, frame_data in enumerate(frame_results):
273 |         # sort based on y-scale of the cam in original image coords
274 |         sort_idx = np.argsort([v['cam'][1] for k,v in frame_data.items()])
275 |         frame_results[frame_id] = OrderedDict(
276 |             {list(frame_data.keys())[i]:frame_data[list(frame_data.keys())[i]] for i in sort_idx}
277 |         )
278 | 
279 |     return frame_results
280 | 


--------------------------------------------------------------------------------
/lib/smplify/temporal_smplify.py:
--------------------------------------------------------------------------------
  1 | # This script is the extended version of https://github.com/nkolot/SPIN/blob/master/smplify/smplify.py to deal with
  2 | # sequences inputs.
  3 | 
  4 | import os
  5 | import torch
  6 | 
  7 | from lib.core.config import VIBE_DATA_DIR
  8 | from lib.models.smpl import SMPL, JOINT_IDS, SMPL_MODEL_DIR
  9 | from lib.smplify.losses import temporal_camera_fitting_loss, temporal_body_fitting_loss
 10 | 
 11 | # For the GMM prior, we use the GMM implementation of SMPLify-X
 12 | # https://github.com/vchoutas/smplify-x/blob/master/smplifyx/prior.py
 13 | from .prior import MaxMixturePrior
 14 | 
 15 | def arrange_betas(pose, betas):
 16 |     batch_size = pose.shape[0]
 17 |     num_video = betas.shape[0]
 18 | 
 19 |     video_size = batch_size // num_video
 20 |     betas_ext = torch.zeros(batch_size, betas.shape[-1], device=betas.device)
 21 |     for i in range(num_video):
 22 |         betas_ext[i*video_size:(i+1)*video_size] = betas[i]
 23 | 
 24 |     return betas_ext
 25 | 
 26 | class TemporalSMPLify():
 27 |     """Implementation of single-stage SMPLify."""
 28 | 
 29 |     def __init__(self,
 30 |                  step_size=1e-2,
 31 |                  batch_size=66,
 32 |                  num_iters=100,
 33 |                  focal_length=5000,
 34 |                  use_lbfgs=True,
 35 |                  device=torch.device('cuda'),
 36 |                  max_iter=20):
 37 | 
 38 |         # Store options
 39 |         self.device = device
 40 |         self.focal_length = focal_length
 41 |         self.step_size = step_size
 42 |         self.max_iter = max_iter
 43 |         # Ignore the the following joints for the fitting process
 44 |         ign_joints = ['OP Neck', 'OP RHip', 'OP LHip', 'Right Hip', 'Left Hip']
 45 |         self.ign_joints = [JOINT_IDS[i] for i in ign_joints]
 46 |         self.num_iters = num_iters
 47 | 
 48 |         # GMM pose prior
 49 |         self.pose_prior = MaxMixturePrior(prior_folder=VIBE_DATA_DIR,
 50 |                                           num_gaussians=8,
 51 |                                           dtype=torch.float32).to(device)
 52 |         self.use_lbfgs = use_lbfgs
 53 |         # Load SMPL model
 54 |         self.smpl = SMPL(SMPL_MODEL_DIR,
 55 |                          batch_size=batch_size,
 56 |                          create_transl=False).to(self.device)
 57 | 
 58 |     def __call__(self, init_pose, init_betas, init_cam_t, camera_center, keypoints_2d):
 59 |         """Perform body fitting.
 60 |         Input:
 61 |             init_pose: SMPL pose estimate
 62 |             init_betas: SMPL betas estimate
 63 |             init_cam_t: Camera translation estimate
 64 |             camera_center: Camera center location
 65 |             keypoints_2d: Keypoints used for the optimization
 66 |         Returns:
 67 |             vertices: Vertices of optimized shape
 68 |             joints: 3D joints of optimized shape
 69 |             pose: SMPL pose parameters of optimized shape
 70 |             betas: SMPL beta parameters of optimized shape
 71 |             camera_translation: Camera translation
 72 |             reprojection_loss: Final joint reprojection loss
 73 |         """
 74 | 
 75 |         # Make camera translation a learnable parameter
 76 |         camera_translation = init_cam_t.clone()
 77 | 
 78 |         # Get joint confidence
 79 |         joints_2d = keypoints_2d[:, :, :2]
 80 |         joints_conf = keypoints_2d[:, :, -1]
 81 | 
 82 |         # Split SMPL pose to body pose and global orientation
 83 |         body_pose = init_pose[:, 3:].detach().clone()
 84 |         global_orient = init_pose[:, :3].detach().clone()
 85 |         betas = init_betas.detach().clone()
 86 | 
 87 |         # Step 1: Optimize camera translation and body orientation
 88 |         # Optimize only camera translation and body orientation
 89 |         body_pose.requires_grad = False
 90 |         betas.requires_grad = False
 91 |         global_orient.requires_grad = True
 92 |         camera_translation.requires_grad = True
 93 | 
 94 |         camera_opt_params = [global_orient, camera_translation]
 95 | 
 96 |         if self.use_lbfgs:
 97 |             camera_optimizer = torch.optim.LBFGS(camera_opt_params, max_iter=self.max_iter,
 98 |                                                  lr=self.step_size, line_search_fn='strong_wolfe')
 99 |             for i in range(self.num_iters):
100 |                 def closure():
101 |                     camera_optimizer.zero_grad()
102 |                     betas_ext = arrange_betas(body_pose, betas)
103 |                     smpl_output = self.smpl(global_orient=global_orient,
104 |                                             body_pose=body_pose,
105 |                                             betas=betas_ext)
106 |                     model_joints = smpl_output.joints
107 | 
108 | 
109 |                     loss = temporal_camera_fitting_loss(model_joints, camera_translation,
110 |                                                init_cam_t, camera_center,
111 |                                                joints_2d, joints_conf, focal_length=self.focal_length)
112 |                     loss.backward()
113 |                     return loss
114 | 
115 |                 camera_optimizer.step(closure)
116 |         else:
117 |             camera_optimizer = torch.optim.Adam(camera_opt_params, lr=self.step_size, betas=(0.9, 0.999))
118 | 
119 |             for i in range(self.num_iters):
120 |                 betas_ext = arrange_betas(body_pose, betas)
121 |                 smpl_output = self.smpl(global_orient=global_orient,
122 |                                         body_pose=body_pose,
123 |                                         betas=betas_ext)
124 |                 model_joints = smpl_output.joints
125 |                 loss = temporal_camera_fitting_loss(model_joints, camera_translation,
126 |                                            init_cam_t, camera_center,
127 |                                            joints_2d, joints_conf, focal_length=self.focal_length)
128 |                 camera_optimizer.zero_grad()
129 |                 loss.backward()
130 |                 camera_optimizer.step()
131 | 
132 |         # Fix camera translation after optimizing camera
133 |         camera_translation.requires_grad = False
134 | 
135 |         # Step 2: Optimize body joints
136 |         # Optimize only the body pose and global orientation of the body
137 |         body_pose.requires_grad = True
138 |         betas.requires_grad = True
139 |         global_orient.requires_grad = True
140 |         camera_translation.requires_grad = False
141 |         body_opt_params = [body_pose, betas, global_orient]
142 | 
143 |         # For joints ignored during fitting, set the confidence to 0
144 |         joints_conf[:, self.ign_joints] = 0.
145 | 
146 |         if self.use_lbfgs:
147 |             body_optimizer = torch.optim.LBFGS(body_opt_params, max_iter=self.max_iter,
148 |                                                lr=self.step_size, line_search_fn='strong_wolfe')
149 |             for i in range(self.num_iters):
150 |                 def closure():
151 |                     body_optimizer.zero_grad()
152 |                     betas_ext = arrange_betas(body_pose, betas)
153 |                     smpl_output = self.smpl(global_orient=global_orient,
154 |                                             body_pose=body_pose,
155 |                                             betas=betas_ext)
156 |                     model_joints = smpl_output.joints
157 | 
158 |                     loss = temporal_body_fitting_loss(body_pose, betas, model_joints, camera_translation, camera_center,
159 |                                              joints_2d, joints_conf, self.pose_prior,
160 |                                              focal_length=self.focal_length)
161 |                     loss.backward()
162 |                     return loss
163 | 
164 |                 body_optimizer.step(closure)
165 |         else:
166 |             body_optimizer = torch.optim.Adam(body_opt_params, lr=self.step_size, betas=(0.9, 0.999))
167 | 
168 |             for i in range(self.num_iters):
169 |                 betas_ext = arrange_betas(body_pose, betas)
170 |                 smpl_output = self.smpl(global_orient=global_orient,
171 |                                         body_pose=body_pose,
172 |                                         betas=betas_ext)
173 |                 model_joints = smpl_output.joints
174 |                 loss = temporal_body_fitting_loss(body_pose, betas, model_joints, camera_translation, camera_center,
175 |                                          joints_2d, joints_conf, self.pose_prior,
176 |                                          focal_length=self.focal_length)
177 |                 body_optimizer.zero_grad()
178 |                 loss.backward()
179 |                 body_optimizer.step()
180 |                 # scheduler.step(epoch=i)
181 | 
182 |         # Get final loss value
183 | 
184 |         with torch.no_grad():
185 |             betas_ext = arrange_betas(body_pose, betas)
186 |             smpl_output = self.smpl(global_orient=global_orient,
187 |                                     body_pose=body_pose,
188 |                                     betas=betas_ext)
189 |             model_joints = smpl_output.joints
190 |             reprojection_loss = temporal_body_fitting_loss(body_pose, betas, model_joints, camera_translation,
191 |                                                            camera_center,
192 |                                                            joints_2d, joints_conf, self.pose_prior,
193 |                                                            focal_length=self.focal_length,
194 |                                                            output='reprojection')
195 | 
196 |         vertices = smpl_output.vertices.detach()
197 |         joints = smpl_output.joints.detach()
198 |         pose = torch.cat([global_orient, body_pose], dim=-1).detach()
199 |         betas = betas.detach()
200 | 
201 |         # Back to weak perspective camera
202 |         camera_translation = torch.stack([
203 |             2 * 5000. / (224 * camera_translation[:,2] + 1e-9),
204 |             camera_translation[:,0], camera_translation[:,1]
205 |         ], dim=-1)
206 | 
207 |         betas = betas.repeat(pose.shape[0],1)
208 |         output = {
209 |             'theta': torch.cat([camera_translation, pose, betas], dim=1),
210 |             'verts': vertices,
211 |             'kp_3d': joints,
212 |         }
213 | 
214 |         return output, reprojection_loss
215 |         # return vertices, joints, pose, betas, camera_translation, reprojection_loss
216 | 
217 |     def get_fitting_loss(self, pose, betas, cam_t, camera_center, keypoints_2d):
218 |         """Given body and camera parameters, compute reprojection loss value.
219 |         Input:
220 |             pose: SMPL pose parameters
221 |             betas: SMPL beta parameters
222 |             cam_t: Camera translation
223 |             camera_center: Camera center location
224 |             keypoints_2d: Keypoints used for the optimization
225 |         Returns:
226 |             reprojection_loss: Final joint reprojection loss
227 |         """
228 | 
229 |         batch_size = pose.shape[0]
230 | 
231 |         # Get joint confidence
232 |         joints_2d = keypoints_2d[:, :, :2]
233 |         joints_conf = keypoints_2d[:, :, -1]
234 |         # For joints ignored during fitting, set the confidence to 0
235 |         joints_conf[:, self.ign_joints] = 0.
236 | 
237 |         # Split SMPL pose to body pose and global orientation
238 |         body_pose = pose[:, 3:]
239 |         global_orient = pose[:, :3]
240 | 
241 |         with torch.no_grad():
242 |             smpl_output = self.smpl(global_orient=global_orient,
243 |                                     body_pose=body_pose,
244 |                                     betas=betas, return_full_pose=True)
245 |             model_joints = smpl_output.joints
246 |             reprojection_loss = temporal_body_fitting_loss(body_pose, betas, model_joints, cam_t, camera_center,
247 |                                                   joints_2d, joints_conf, self.pose_prior,
248 |                                                   focal_length=self.focal_length,
249 |                                                   output='reprojection')
250 | 
251 |         return reprojection_loss
252 | 


--------------------------------------------------------------------------------