├── mGPT
    ├── __init__.py
    ├── archs
    │   ├── __init__.py
    │   ├── tools
    │   │   ├── resnet.py
    │   │   └── token_emb.py
    │   └── tm2t_evaluator.py
    ├── models
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── position_encoding_layer.py
    │   │   ├── tools.py
    │   │   ├── adain.py
    │   │   └── blocks.py
    │   └── build_model.py
    ├── render
    │   ├── __init__.py
    │   ├── blender
    │   │   ├── __init__.py
    │   │   ├── data.py
    │   │   ├── vertices.py
    │   │   ├── sampler.py
    │   │   ├── camera.py
    │   │   ├── tools.py
    │   │   ├── meshes.py
    │   │   ├── floor.py
    │   │   └── scene.py
    │   ├── pyrender
    │   │   └── j3ds_render_smpl.py
    │   ├── video.py
    │   └── rendermotion.py
    ├── utils
    │   ├── __init__.py
    │   ├── fixseed.py
    │   ├── misc.py
    │   ├── sample_utils.py
    │   ├── load_checkpoint.py
    │   ├── logger.py
    │   ├── tensors.py
    │   ├── demo_utils.py
    │   ├── easyconvert.py
    │   └── temos_utils.py
    ├── losses
    │   ├── __init__.py
    │   ├── base.py
    │   └── mgpt.py
    ├── metrics
    │   ├── __init__.py
    │   ├── base.py
    │   ├── m2m.py
    │   ├── mr.py
    │   └── mm.py
    └── data
    │   ├── humanml
    │       ├── README.md
    │       ├── __init__.py
    │       ├── dataset_m_vq.py
    │       ├── utils
    │       │   ├── paramUtil.py
    │       │   └── word_vectorizer.py
    │       ├── dataset_t2m_token.py
    │       ├── dataset_t2m_eval.py
    │       └── dataset_t2m_m2t.py
    │   ├── transforms
    │       ├── joints2jfeats
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   └── tools.py
    │       ├── rots2joints
    │       │   ├── __init__.py
    │       │   └── base.py
    │       ├── rots2rfeats
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   └── globvelandy.py
    │       ├── __init__.py
    │       ├── identity.py
    │       ├── xyz.py
    │       ├── base.py
    │       └── joints2rots
    │       │   └── config.py
    │   ├── tools
    │       ├── __init__.py
    │       ├── tensors.py
    │       ├── easyconvert.py
    │       └── collate.py
    │   ├── build_data.py
    │   ├── utils.py
    │   ├── Kit.py
    │   ├── __init__.py
    │   └── webui.py
├── scripts
    ├── fit_motion.sh
    ├── visualize_motion.sh
    ├── visualize_motion_parallel.sh
    ├── fit_motion_parallel.sh
    ├── plys2npy.py
    ├── get_motion_code.py
    └── get_code_visual.py
├── assets
    ├── meta
    │   ├── std.npy
    │   ├── mean.npy
    │   ├── std_eval.npy
    │   └── mean_eval.npy
    ├── images
    │   ├── table7.png
    │   ├── table8.png
    │   ├── figure10.png
    │   ├── figure12.png
    │   ├── figure13.png
    │   ├── pipeline.png
    │   ├── table15.png
    │   ├── avatar_bot.jpg
    │   └── avatar_user.png
    └── videos
    │   ├── example0.mp4
    │   ├── example1.mp4
    │   ├── example2.mp4
    │   ├── example3.mp4
    │   ├── example4.mp4
    │   ├── example5.mp4
    │   ├── example6.mp4
    │   ├── example7.mp4
    │   ├── example8.mp4
    │   ├── example0_fast.mp4
    │   └── example0_blender.mp4
├── prepare
    ├── prepare_t5.sh
    ├── requirements_render.txt
    ├── download_pretrained_models.sh
    ├── download_smpl_model.sh
    ├── download_t2m_evaluators.sh
    ├── instructions
    │   └── template_pretrain.json
    ├── smplh.sh
    └── merge_smplh_mano.py
├── configs
    ├── lm
    │   ├── default.yaml
    │   ├── gpt2_medium.yaml
    │   ├── t5_large.yaml
    │   └── t5_small.yaml
    ├── vq
    │   └── default.yaml
    ├── evaluator
    │   └── tm2t.yaml
    ├── render.yaml
    ├── assets.yaml
    ├── config_h3d_stage1.yaml
    ├── config_h3d_stage2.yaml
    ├── webui.yaml
    ├── config_h3d_stage3.yaml
    └── default.yaml
├── requirements.txt
├── setup.py
├── demos
    ├── pred.txt
    └── inbetween.txt
├── LICENSE
├── train.py
├── test.py
└── render.py


/mGPT/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mGPT/archs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mGPT/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mGPT/render/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mGPT/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mGPT/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mGPT/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseLosses
2 | 


--------------------------------------------------------------------------------
/mGPT/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseMetrics
2 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/__init__.py:
--------------------------------------------------------------------------------
1 | from .render import render
2 | 


--------------------------------------------------------------------------------
/scripts/fit_motion.sh:
--------------------------------------------------------------------------------
1 | python -m fit --dir $1 --save_folder $2 --cuda True


--------------------------------------------------------------------------------
/assets/meta/std.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/std.npy


--------------------------------------------------------------------------------
/mGPT/data/humanml/README.md:
--------------------------------------------------------------------------------
1 | This code is based on https://github.com/EricGuo5513/text-to-motion.git


--------------------------------------------------------------------------------
/mGPT/render/blender/data.py:
--------------------------------------------------------------------------------
1 | class Data:
2 |     def __len__(self):
3 |         return self.N
4 | 


--------------------------------------------------------------------------------
/assets/meta/mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/mean.npy


--------------------------------------------------------------------------------
/assets/images/table7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/table7.png


--------------------------------------------------------------------------------
/assets/images/table8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/table8.png


--------------------------------------------------------------------------------
/assets/meta/std_eval.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/std_eval.npy


--------------------------------------------------------------------------------
/assets/images/figure10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/figure10.png


--------------------------------------------------------------------------------
/assets/images/figure12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/figure12.png


--------------------------------------------------------------------------------
/assets/images/figure13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/figure13.png


--------------------------------------------------------------------------------
/assets/images/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/pipeline.png


--------------------------------------------------------------------------------
/assets/images/table15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/table15.png


--------------------------------------------------------------------------------
/assets/meta/mean_eval.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/mean_eval.npy


--------------------------------------------------------------------------------
/assets/videos/example0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example0.mp4


--------------------------------------------------------------------------------
/assets/videos/example1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example1.mp4


--------------------------------------------------------------------------------
/assets/videos/example2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example2.mp4


--------------------------------------------------------------------------------
/assets/videos/example3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example3.mp4


--------------------------------------------------------------------------------
/assets/videos/example4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example4.mp4


--------------------------------------------------------------------------------
/assets/videos/example5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example5.mp4


--------------------------------------------------------------------------------
/assets/videos/example6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example6.mp4


--------------------------------------------------------------------------------
/assets/videos/example7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example7.mp4


--------------------------------------------------------------------------------
/assets/videos/example8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example8.mp4


--------------------------------------------------------------------------------
/mGPT/data/transforms/joints2jfeats/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Joints2Jfeats
2 | from .rifke import Rifke
3 | 


--------------------------------------------------------------------------------
/assets/images/avatar_bot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/avatar_bot.jpg


--------------------------------------------------------------------------------
/assets/images/avatar_user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/avatar_user.png


--------------------------------------------------------------------------------
/assets/videos/example0_fast.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example0_fast.mp4


--------------------------------------------------------------------------------
/prepare/prepare_t5.sh:
--------------------------------------------------------------------------------
1 | cd deps/
2 | git lfs install
3 | git clone https://huggingface.co/google/flan-t5-base
4 | cd ..
5 | 


--------------------------------------------------------------------------------
/assets/videos/example0_blender.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example0_blender.mp4


--------------------------------------------------------------------------------
/mGPT/data/transforms/rots2joints/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Rots2Joints
2 | from .smplh import SMPLH
3 | from .smplx import SMPLX
4 | 


--------------------------------------------------------------------------------
/prepare/requirements_render.txt:
--------------------------------------------------------------------------------
1 | # for rendering in blender python
2 | pytest-shutil
3 | matplotlib
4 | tqdm
5 | hydra-core
6 | six
7 | natsort
8 | smplx
9 | moviepy


--------------------------------------------------------------------------------
/mGPT/data/transforms/rots2rfeats/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Rots2Rfeats
2 | # from .globvel import Globalvel
3 | 
4 | from .globvelandy import Globalvelandy
5 | # from .rifeats import Rifeats
6 | 


--------------------------------------------------------------------------------
/mGPT/data/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .tensors import lengths_to_mask
2 | from .collate import collate_text_and_length, collate_pairs_and_text, collate_datastruct_and_text, collate_tensor_with_padding
3 | 


--------------------------------------------------------------------------------
/configs/lm/default.yaml:
--------------------------------------------------------------------------------
1 | target: mGPT.archs.mgpt_lm.MLM
2 | params:
3 |   model_type: t5
4 |   model_path: ./deps/flan-t5-base
5 |   stage: ${TRAIN.STAGE}
6 |   motion_codebook_size: ${model.params.codebook_size}
7 |   ablation: ${ABLATION}
8 | 


--------------------------------------------------------------------------------
/configs/lm/gpt2_medium.yaml:
--------------------------------------------------------------------------------
1 | target: mGPT.archs.mgpt_lm.MLM
2 | params:
3 |   model_type: gpt2
4 |   model_path: openai/gpt2-medium
5 |   stage: ${TRAIN.STAGE}
6 |   motion_codebook_size: ${model.params.codebook_size}
7 |   ablation: ${ABLATION}
8 | 


--------------------------------------------------------------------------------
/configs/lm/t5_large.yaml:
--------------------------------------------------------------------------------
1 | target: mGPT.archs.mgpt_lm.MLM
2 | params:
3 |   model_type: t5
4 |   model_path: google/flan-t5-large
5 |   stage: ${TRAIN.STAGE}
6 |   motion_codebook_size: ${model.params.codebook_size}
7 |   ablation: ${ABLATION}
8 | 


--------------------------------------------------------------------------------
/configs/lm/t5_small.yaml:
--------------------------------------------------------------------------------
1 | target: mGPT.archs.mgpt_lm.MLM
2 | params:
3 |   model_type: t5
4 |   model_path: google/flan-t5-small
5 |   stage: ${TRAIN.STAGE}
6 |   motion_codebook_size: ${model.params.codebook_size}
7 |   ablation: ${ABLATION}
8 | 


--------------------------------------------------------------------------------
/prepare/download_pretrained_models.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p checkpoints/
 2 | cd checkpoints/
 3 | echo -e "The pretrained models will stored in the 'checkpoints' folder\n"
 4 | mkdir -p mld_humanml3d_checkpoint/
 5 | 
 6 | git lfs install
 7 | git clone https://huggingface.co/OpenMotionLab/MotionGPT-base
 8 | 
 9 | echo -e "Downloading done!"
10 | 


--------------------------------------------------------------------------------
/configs/vq/default.yaml:
--------------------------------------------------------------------------------
 1 | target: mGPT.archs.mgpt_vq.VQVae
 2 | params:
 3 |   quantizer: 'ema_reset'
 4 |   code_num: 512
 5 |   code_dim: 512
 6 |   output_emb_width: 512
 7 |   down_t: 2
 8 |   stride_t: 2
 9 |   width: 512
10 |   depth: 3
11 |   dilation_growth_rate: 3
12 |   norm: None
13 |   activation: 'relu'
14 |   nfeats: ${DATASET.NFEATS}
15 |   ablation: ${ABLATION}
16 | 


--------------------------------------------------------------------------------
/mGPT/models/build_model.py:
--------------------------------------------------------------------------------
1 | from omegaconf import OmegaConf
2 | from mGPT.config import instantiate_from_config
3 | 
4 | def build_model(cfg, datamodule):
5 |     model_config = OmegaConf.to_container(cfg.model, resolve=True)
6 |     model_config['params']['cfg'] = cfg
7 |     model_config['params']['datamodule'] = datamodule
8 |     return instantiate_from_config(model_config)
9 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_t2m import Text2MotionDataset
2 | from .dataset_t2m_eval import Text2MotionDatasetEval
3 | from .dataset_t2m_cb import Text2MotionDatasetCB
4 | from .dataset_t2m_token import Text2MotionDatasetToken
5 | from .dataset_t2m_m2t import Text2MotionDatasetM2T
6 | from .dataset_m import MotionDataset
7 | from .dataset_m_vq import MotionDatasetVQ
8 | 


--------------------------------------------------------------------------------
/prepare/download_smpl_model.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p deps/
 2 | cd deps/
 3 | 
 4 | echo "The smpl model will be stored in the './deps' folder"
 5 | 
 6 | # SMPL Models
 7 | echo "Downloading"
 8 | gdown "https://drive.google.com/uc?id=1qrFkPZyRwRGd0Q3EY76K8oJaIgs_WK9i"
 9 | echo "Extracting"
10 | tar xfzv smpl.tar.gz
11 | echo "Cleaning"
12 | rm smpl.tar.gz
13 | 
14 | echo "Downloading done!"
15 | 


--------------------------------------------------------------------------------
/prepare/download_t2m_evaluators.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p deps/
 2 | cd deps/
 3 | 
 4 | echo "The t2m evaluators will be stored in the './deps' folder"
 5 | 
 6 | # HumanAct12 poses
 7 | echo "Downloading"
 8 | gdown "https://drive.google.com/uc?id=1AYsmEG8I3fAAoraT4vau0GnesWBWyeT8"
 9 | echo "Extracting"
10 | tar xfzv t2m.tar.gz
11 | echo "Cleaning"
12 | rm t2m.tar.gz
13 | 
14 | echo "Downloading done!"
15 | 


--------------------------------------------------------------------------------
/mGPT/utils/fixseed.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import random
 4 | 
 5 | 
 6 | def fixseed(seed):
 7 |     random.seed(seed)
 8 |     np.random.seed(seed)
 9 |     torch.manual_seed(seed)
10 | 
11 | 
12 | SEED = 10
13 | EVALSEED = 0
14 | # Provoc warning: not fully functionnal yet
15 | # torch.set_deterministic(True)
16 | torch.backends.cudnn.benchmark = False
17 | 
18 | fixseed(SEED)
19 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | pytorch_lightning
 3 | torchmetrics
 4 | omegaconf
 5 | shortuuid
 6 | chumpy
 7 | transformers
 8 | diffusers
 9 | einops
10 | wandb
11 | rich
12 | matplotlib
13 | 
14 | # for visualization
15 | smplx==0.1.28
16 | trimesh==3.9.24
17 | joblib==1.2.0
18 | h5py
19 | scikit-image
20 | spacy
21 | ftfy
22 | more-itertools
23 | natsort
24 | pyrender
25 | moviepy
26 | librosa
27 | triangle==20220202
28 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import Transform
 2 | from .smpl import SMPLTransform
 3 | from .xyz import XYZTransform
 4 | 
 5 | # rots2rfeats
 6 | from .rots2rfeats import Rots2Rfeats
 7 | from .rots2rfeats import Globalvelandy
 8 | 
 9 | # rots2joints
10 | from .rots2joints import Rots2Joints
11 | from .rots2joints import SMPLH, SMPLX
12 | 
13 | # joints2jfeats
14 | from .joints2jfeats import Joints2Jfeats
15 | from .joints2jfeats import Rifke
16 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/vertices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def prepare_vertices(vertices, canonicalize=True):
 5 |     data = vertices
 6 |     # Swap axis (gravity=Z instead of Y)
 7 |     # data = data[..., [2, 0, 1]]
 8 | 
 9 |     # Make left/right correct
10 |     # data[..., [1]] = -data[..., [1]]
11 | 
12 |     # Center the first root to the first frame
13 |     data -= data[[0], [0], :]
14 | 
15 |     # Remove the floor
16 |     data[..., 2] -= np.min(data[..., 2])
17 |     return data
18 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def get_frameidx(*, mode, nframes, exact_frame, frames_to_keep):
 4 |     if mode == "sequence":
 5 |         frameidx = np.linspace(0, nframes - 1, frames_to_keep)
 6 |         frameidx = np.round(frameidx).astype(int)
 7 |         frameidx = list(frameidx)
 8 |     elif mode == "frame":
 9 |         index_frame = int(exact_frame*nframes)
10 |         frameidx = [index_frame]
11 |     elif mode == "video":
12 |         frameidx = range(0, nframes)
13 |     else:
14 |         raise ValueError(f"Not support {mode} render mode")
15 |     return frameidx
16 | 


--------------------------------------------------------------------------------
/configs/evaluator/tm2t.yaml:
--------------------------------------------------------------------------------
 1 | t2m_textencoder:
 2 |   target: mGPT.archs.tm2t_evaluator.TextEncoderBiGRUCo
 3 |   params:
 4 |     word_size: 300
 5 |     pos_size: 15
 6 |     hidden_size: 512
 7 |     output_size: 512
 8 | t2m_moveencoder:
 9 |   target: mGPT.archs.tm2t_evaluator.MovementConvEncoder
10 |   params:
11 |     input_size: ${eval:${DATASET.NFEATS} - 4}
12 |     hidden_size: 512
13 |     output_size: 512
14 | t2m_motionencoder:
15 |   target: mGPT.archs.tm2t_evaluator.MotionEncoderBiGRUCo
16 |   params:
17 |     input_size: ${evaluator.tm2t.t2m_moveencoder.params.output_size}
18 |     hidden_size: 1024
19 |     output_size: 512
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | import numpy as np
 6 | 
 7 | setup(
 8 |     name="MotionGPT",
 9 |     version="0.1.0",
10 |     author="Biao Jiang and Xin Chen",
11 |     author_email="jiangb22@m.fudan.edu.cn",
12 |     description="MotionGPT: Human motion as a foreign language.",
13 |     packages=find_packages(exclude=("configs", "deps")),
14 |     python_requires=">=3.8",
15 |     install_requires=[
16 |         "torch",
17 |         "numpy",
18 |         "tqdm",
19 |     ],
20 | )
21 | 


--------------------------------------------------------------------------------
/configs/render.yaml:
--------------------------------------------------------------------------------
 1 | NAME: '___render_do_not_need_name__' # Experiment name
 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 3 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 4 | 
 5 | RENDER:
 6 |   FOLDER: '___no_need__'
 7 |   INPUT_MODE: 'npy'
 8 |   DIR: ''
 9 |   NPY: '___no_need__'
10 |   DENOISING: True
11 |   OLDRENDER: True
12 |   # ["ultra", "high", "med", "low"]
13 |   # RES: 'high'
14 |   RES: 'med'
15 |   DOWNSAMPLE: False
16 |   FPS: 20.0
17 |   CANONICALIZE: True
18 |   EXACT_FRAME: 0.5
19 |   NUM: 8
20 |   MODE: '___no_need__' #sequence frame video
21 |   VID_EXT: mp4
22 |   ALWAYS_ON_FLOOR: false
23 |   GT: false
24 | 


--------------------------------------------------------------------------------
/scripts/visualize_motion.sh:
--------------------------------------------------------------------------------
 1 | # for npy folder
 2 | # CUDA_VISIBLE_DEVICES=0  /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --cfg=./configs/render.yaml --dir=$1 --mode=$2
 3 | 
 4 | for j in `seq 0 2`
 5 | do
 6 |     CUDA_VISIBLE_DEVICES=0  /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --dir=$1 --mode=$2
 7 | done
 8 | 
 9 | # for single npy
10 | # /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --cfg=./configs/render_cx.yaml --npy=$1 --joint_type=HumanML3D 
11 | 


--------------------------------------------------------------------------------
/scripts/visualize_motion_parallel.sh:
--------------------------------------------------------------------------------
 1 | # # parallel fit
 2 | # for i in `seq 0 7`
 3 | # do
 4 | #     for j in `seq 0 2`
 5 | #     do
 6 | #         CUDA_VISIBLE_DEVICES=$i python -m fit --dir $1 --save_folder $2 --cuda True &
 7 | #         echo $j &
 8 | #     done
 9 | # done
10 | # wait 
11 | # echo "all weakup"
12 | 
13 | 
14 | # parallel render
15 | for i in `seq 0 7`
16 | do
17 |     for j in `seq 0 2`
18 |     do
19 |         sleep 1 &
20 |         CUDA_VISIBLE_DEVICES=$i /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --dir=$1 --mode=$2 &
21 |         echo $i
22 |     done
23 | done
24 | wait 
25 | echo "all weakup"
26 | 


--------------------------------------------------------------------------------
/mGPT/data/build_data.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | from os.path import join as pjoin
 3 | from mGPT.config import instantiate_from_config
 4 | 
 5 | 
 6 | def build_data(cfg, phase="train"):
 7 |     data_config = OmegaConf.to_container(cfg.DATASET, resolve=True)
 8 |     data_config['params'] = {'cfg': cfg, 'phase': phase}
 9 |     if isinstance(data_config['target'], str):
10 |         return instantiate_from_config(data_config)
11 |     elif isinstance(data_config['target'], list):
12 |         data_config_tmp = data_config.copy()
13 |         data_config_tmp['params']['dataModules'] = data_config['target']
14 |         data_config_tmp['target'] = 'mGPT.data.Concat.ConcatDataModule'
15 |         return instantiate_from_config(data_config)
16 | 


--------------------------------------------------------------------------------
/scripts/fit_motion_parallel.sh:
--------------------------------------------------------------------------------
 1 | # parallel render
 2 | for i in `seq 0 7`
 3 | do
 4 |     for j in `seq 0 1`
 5 |     do
 6 |         CUDA_VISIBLE_DEVICES=$i python -m fit --dir $1 --save_folder $2 --cuda True &
 7 |         echo $j &
 8 |     done
 9 | done
10 | 
11 | wait 
12 | echo "all weakup"
13 | 
14 | # # parallel render
15 | # for i in `seq 0 25`
16 | # do
17 | #     CUDA_VISIBLE_DEVICES=$3 python -m fit --dir $1 --save_folder $2 --cuda True &
18 | #     echo $i
19 | # done
20 | # wait 
21 | # echo "all weakup"
22 | 
23 | 
24 | # # gpu parallel render
25 | # for i in `seq 0 7`
26 | # do
27 | #     CUDA_VISIBLE_DEVICES=$i python -m fit --dir $1 --save_folder $2 --cuda True &
28 | #     echo $i
29 | # done
30 | # wait 
31 | # echo "all weakup"
32 | 


--------------------------------------------------------------------------------
/mGPT/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def to_numpy(tensor):
 5 |     if torch.is_tensor(tensor):
 6 |         return tensor.cpu().numpy()
 7 |     elif type(tensor).__module__ != 'numpy':
 8 |         raise ValueError("Cannot convert {} to numpy array".format(
 9 |             type(tensor)))
10 |     return tensor
11 | 
12 | 
13 | def to_torch(ndarray):
14 |     if type(ndarray).__module__ == 'numpy':
15 |         return torch.from_numpy(ndarray)
16 |     elif not torch.is_tensor(ndarray):
17 |         raise ValueError("Cannot convert {} to torch tensor".format(
18 |             type(ndarray)))
19 |     return ndarray
20 | 
21 | 
22 | def cleanexit():
23 |     import sys
24 |     import os
25 |     try:
26 |         sys.exit(0)
27 |     except SystemExit:
28 |         os._exit(0)
29 | 
30 | 


--------------------------------------------------------------------------------
/mGPT/utils/sample_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | logger = logging.getLogger(__name__)
 4 | 
 5 | def cfg_mean_nsamples_resolution(cfg):
 6 |     if cfg.mean and cfg.number_of_samples > 1:
 7 |         logger.error("All the samples will be the mean.. cfg.number_of_samples=1 will be forced.")
 8 |         cfg.number_of_samples = 1
 9 | 
10 |     return cfg.number_of_samples == 1
11 | 
12 | 
13 | def get_path(sample_path: Path, is_amass: bool, gender: str, split: str, onesample: bool, mean: bool, fact: float):
14 |     extra_str = ("_mean" if mean else "") if onesample else "_multi"
15 |     fact_str = "" if fact == 1 else f"{fact}_"
16 |     gender_str = gender + "_" if is_amass else ""
17 |     path = sample_path / f"{fact_str}{gender_str}{split}{extra_str}"
18 |     return path
19 | 


--------------------------------------------------------------------------------
/demos/pred.txt:
--------------------------------------------------------------------------------
 1 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/M009643.npy
 2 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/M008862.npy
 3 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/004163.npy
 4 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/005166.npy
 5 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/009181.npy
 6 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/000179.npy
 7 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/013811.npy
 8 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/000556.npy
 9 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/010967.npy
10 | Predict motion: <Motion_Placeholder_s1>#datasets/humanml3d/new_joint_vecs/007567.npy
11 | 


--------------------------------------------------------------------------------
/prepare/instructions/template_pretrain.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Text-to-Motion": {
 3 |         "t2m": {
 4 |             "class": "t2m",
 5 |             "input": [
 6 |                 "<Caption_Placeholder>"
 7 |             ],
 8 |             "output": [
 9 |                 "<Motion_Placeholder>"
10 |             ]
11 |         }
12 |     },
13 |     "Motion-to-Text": {
14 |         "m2t": {
15 |             "class": "m2t",
16 |             "input": [
17 |                 "<Motion_Placeholder>"
18 |             ],
19 |             "output": [
20 |                 "<Caption_Placeholder>"
21 |             ]
22 |         }
23 |     },
24 |     "Motion Prediction": {
25 |         "pred": {
26 |             "class": "predict",
27 |             "input": [
28 |                 "Predict motion: <Motion_Placeholder_s1>"
29 |             ],
30 |             "output": [
31 |                 "<Motion_Placeholder_s2>"
32 |             ]
33 |         }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/configs/assets.yaml:
--------------------------------------------------------------------------------
 1 | CONFIG_FOLDER: configs # Config files path
 2 | FOLDER: experiments # Experiment files saving path
 3 | 
 4 | TEST:
 5 |   FOLDER: results # Testing files saving path
 6 | 
 7 | DATASET:
 8 |   TASK_ROOT: deps/mGPT_instructions
 9 |   SMPL_PATH: deps/smpl
10 |   TRANSFORM_PATH: deps/transforms/
11 |   WORD_VERTILIZER_PATH: deps/glove/
12 |   KIT:
13 |     ROOT: datasets/kit-ml # KIT directory
14 |     SPLIT_ROOT: datasets/kit-ml # KIT splits directory
15 |     MEAN_STD_PATH: deps/t2m/
16 |   HUMANML3D:
17 |     ROOT: datasets/humanml3d # HumanML3D directory
18 |     SPLIT_ROOT: datasets/humanml3d # HumanML3D splits directory
19 |     MEAN_STD_PATH: deps/t2m/
20 | 
21 | METRIC:
22 |   TM2T:
23 |     t2m_path: deps/t2m/ # path for tm2t evaluator
24 | 
25 | model:
26 |   whisper_path: deps/whisper-large-v2 # path for whisper model, webui only
27 | 
28 | RENDER:
29 |   BLENDER_PATH: libs/blender-2.93.2-linux-x64/blender
30 |   SMPL_MODEL_PATH: deps/smpl/smpl_models/smpl
31 |   MODEL_PATH: deps/smpl/smpl_models/
32 |   FACES_PATH: deps/smplh/smplh.faces
33 | 


--------------------------------------------------------------------------------
/mGPT/models/utils/position_encoding_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class PositionalEncoding(nn.Module):
 7 | 
 8 |     def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False):
 9 |         super().__init__()
10 |         self.batch_first = batch_first
11 | 
12 |         self.dropout = nn.Dropout(p=dropout)
13 | 
14 |         pe = torch.zeros(max_len, d_model)
15 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
16 |         div_term = torch.exp(torch.arange(
17 |             0, d_model, 2).float() * (-np.log(10000.0) / d_model))
18 |         pe[:, 0::2] = torch.sin(position * div_term)
19 |         pe[:, 1::2] = torch.cos(position * div_term)
20 |         pe = pe.unsqueeze(0).transpose(0, 1)
21 | 
22 |         self.register_buffer("pe", pe)
23 | 
24 |     def forward(self, x):
25 |         # not used in the final model
26 |         if self.batch_first:
27 |             x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :]
28 |         else:
29 |             x = x + self.pe[: x.shape[0], :]
30 |         return self.dropout(x)
31 | 


--------------------------------------------------------------------------------
/prepare/smplh.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "Extraction of the archives"
 3 | echo
 4 | 
 5 | cd deps/smplh
 6 | mkdir tmp
 7 | cd tmp
 8 | 
 9 | tar xfv ../smplh.tar.xz
10 | unzip ../mano_v1_2.zip
11 | 
12 | cd ../../../
13 | echo
14 | echo "Done!"
15 | echo
16 | echo "Clean and merge models"
17 | echo
18 | 
19 | python prepare/merge_smplh_mano.py --smplh-fn deps/smplh/tmp/male/model.npz --mano-left-fn deps/smplh/tmp/mano_v1_2/models/MANO_LEFT.pkl --mano-right-fn deps/smplh/tmp/mano_v1_2/models/MANO_RIGHT.pkl --output-folder deps/smplh/
20 | 
21 | python prepare/merge_smplh_mano.py --smplh-fn deps/smplh/tmp/female/model.npz --mano-left-fn deps/smplh/tmp/mano_v1_2/models/MANO_LEFT.pkl --mano-right-fn deps/smplh/tmp/mano_v1_2/models/MANO_RIGHT.pkl --output-folder deps/smplh/
22 | 
23 | python prepare/merge_smplh_mano.py --smplh-fn deps/smplh/tmp/neutral/model.npz --mano-left-fn deps/smplh/tmp/mano_v1_2/models/MANO_LEFT.pkl --mano-right-fn deps/smplh/tmp/mano_v1_2/models/MANO_RIGHT.pkl --output-folder deps/smplh/
24 | 
25 | echo
26 | echo "Done!"
27 | echo
28 | echo "Deleting tmp files"
29 | rm -rf deps/smplh/tmp/
30 | echo 
31 | echo "Done!"
32 | 


--------------------------------------------------------------------------------
/mGPT/data/tools/tensors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from typing import List, Dict
18 | import torch
19 | from torch import Tensor
20 | 
21 | 
22 | def lengths_to_mask(lengths: List[int], device: torch.device) -> Tensor:
23 |     lengths = torch.tensor(lengths, device=device)
24 |     max_len = max(lengths)
25 |     mask = torch.arange(max_len, device=device).expand(len(lengths), max_len) < lengths.unsqueeze(1)
26 |     return mask
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 OpenMotionLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/mGPT/models/utils/tools.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | def remove_padding(tensors, lengths):
 4 |     return [tensor[:tensor_length] for tensor, tensor_length in zip(tensors, lengths)]
 5 | 
 6 | class AutoParams(nn.Module):
 7 |     def __init__(self, **kargs):
 8 |         try:
 9 |             for param in self.needed_params:
10 |                 if param in kargs:
11 |                     setattr(self, param, kargs[param])
12 |                 else:
13 |                     raise ValueError(f"{param} is needed.")
14 |         except :
15 |             pass
16 |             
17 |         try:
18 |             for param, default in self.optional_params.items():
19 |                 if param in kargs and kargs[param] is not None:
20 |                     setattr(self, param, kargs[param])
21 |                 else:
22 |                     setattr(self, param, default)
23 |         except :
24 |             pass
25 |         super().__init__()
26 | 
27 | 
28 | # taken from joeynmt repo
29 | def freeze_params(module: nn.Module) -> None:
30 |     """
31 |     Freeze the parameters of this module,
32 |     i.e. do not update them during training
33 | 
34 |     :param module: freeze parameters of this module
35 |     """
36 |     for _, p in module.named_parameters():
37 |         p.requires_grad = False
38 | 


--------------------------------------------------------------------------------
/mGPT/utils/load_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def load_pretrained(cfg, model, logger=None, phase="train"):
 4 |     if logger is not None:
 5 |         logger.info(f"Loading pretrain model from {cfg.TRAIN.PRETRAINED}")
 6 |         
 7 |     if phase == "train":
 8 |         ckpt_path = cfg.TRAIN.PRETRAINED
 9 |     elif phase == "test":
10 |         ckpt_path = cfg.TEST.CHECKPOINTS
11 |         
12 |     state_dict = torch.load(ckpt_path, map_location="cpu")["state_dict"]
13 |     model.load_state_dict(state_dict, strict=True)
14 |     return model
15 | 
16 | 
17 | def load_pretrained_vae(cfg, model, logger=None):
18 |     state_dict = torch.load(cfg.TRAIN.PRETRAINED_VAE,
19 |                             map_location="cpu")['state_dict']
20 |     if logger is not None:
21 |         logger.info(f"Loading pretrain vae from {cfg.TRAIN.PRETRAINED_VAE}")
22 |         
23 |     # Extract encoder/decoder
24 |     from collections import OrderedDict
25 |     vae_dict = OrderedDict()
26 |     for k, v in state_dict.items():
27 |         if "motion_vae" in k:
28 |             name = k.replace("motion_vae.", "")
29 |             vae_dict[name] = v
30 |         elif "vae" in k:
31 |             name = k.replace("vae.", "")
32 |             vae_dict[name] = v
33 |     if hasattr(model, 'vae'):
34 |         model.vae.load_state_dict(vae_dict, strict=True)
35 |     else:
36 |         model.motion_vae.load_state_dict(vae_dict, strict=True)
37 |     
38 |     return model
39 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/identity.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from typing import Optional
18 | from torch import Tensor
19 | 
20 | from .base import Datastruct, dataclass, Transform
21 | 
22 | 
23 | class IdentityTransform(Transform):
24 |     def __init__(self, **kwargs):
25 |         return
26 | 
27 |     def Datastruct(self, **kwargs):
28 |         return IdentityDatastruct(**kwargs)
29 | 
30 |     def __repr__(self):
31 |         return "IdentityTransform()"
32 | 
33 | 
34 | @dataclass
35 | class IdentityDatastruct(Datastruct):
36 |     transforms: IdentityTransform
37 | 
38 |     features: Optional[Tensor] = None
39 | 
40 |     def __post_init__(self):
41 |         self.datakeys = ["features"]
42 | 
43 |     def __len__(self):
44 |         return len(self.rfeats)
45 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/camera.py:
--------------------------------------------------------------------------------
 1 | import bpy
 2 | 
 3 | 
 4 | class Camera:
 5 |     def __init__(self, *, first_root, mode, is_mesh):
 6 |         camera = bpy.data.objects['Camera']
 7 | 
 8 |         ## initial position
 9 |         camera.location.x = 7.36
10 |         camera.location.y = -6.93
11 |         if is_mesh:
12 |             # camera.location.z = 5.45
13 |             camera.location.z = 5.6
14 |         else:
15 |             camera.location.z = 5.2
16 | 
17 |         # wider point of view
18 |         if mode == "sequence":
19 |             if is_mesh:
20 |                 camera.data.lens = 65
21 |             else:
22 |                 camera.data.lens = 85
23 |         elif mode == "frame":
24 |             if is_mesh:
25 |                 camera.data.lens = 130
26 |             else:
27 |                 camera.data.lens = 85
28 |         elif mode == "video":
29 |             if is_mesh:
30 |                 camera.data.lens = 110
31 |             else:
32 |                 # avoid cutting person
33 |                 camera.data.lens = 85
34 |                 # camera.data.lens = 140
35 | 
36 |         # camera.location.x += 0.75
37 | 
38 |         self.mode = mode
39 |         self.camera = camera
40 | 
41 |         self.camera.location.x += first_root[0]
42 |         self.camera.location.y += first_root[1]
43 | 
44 |         self._root = first_root
45 | 
46 |     def update(self, newroot):
47 |         delta_root = newroot - self._root
48 | 
49 |         self.camera.location.x += delta_root[0]
50 |         self.camera.location.y += delta_root[1]
51 | 
52 |         self._root = newroot
53 | 


--------------------------------------------------------------------------------
/scripts/plys2npy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from argparse import ArgumentParser
 4 | from pathlib import Path
 5 | 
 6 | import natsort
 7 | import numpy as np
 8 | import torch
 9 | import trimesh
10 | from tqdm import tqdm
11 | 
12 | 
13 | def main():
14 |     parser = ArgumentParser()
15 | 
16 |     group = parser.add_argument_group("Params")
17 |     group.add_argument(
18 |         "--ply_dir",
19 |         type=str,
20 |         required=True,
21 |         help="ply set",
22 |     )
23 |     group.add_argument(
24 |         "--out_dir",
25 |         type=str,
26 |         required=True,
27 |         help="output folder",
28 |     )    
29 |     params = parser.parse_args()
30 |     plys2npy(params.ply_dir, params.out_dir)
31 | 
32 | def plys2npy(ply_dir, out_dir):
33 |     ply_dir = Path(ply_dir)
34 |     paths = []
35 |     file_list = natsort.natsorted(os.listdir(ply_dir))
36 |     for item in file_list:
37 |         if item.endswith(".ply") and not item.endswith("_gt.ply"):
38 |             paths.append(os.path.join(ply_dir, item))
39 | 
40 | 
41 |     meshs = np.zeros((len(paths), 6890, 3))
42 |     for i, path in enumerate(paths):
43 |         mesh = trimesh.load_mesh(path, process=False)
44 |         vs = mesh.vertices
45 |         assert vs.shape == (6890, 3)
46 |         meshs[i] = vs 
47 | 
48 |     basename = os.path.basename(ply_dir)
49 |     if basename.startswith("SMPLFit_"):
50 |         basename = basename[len("SMPLFit_"):]
51 |     file_name = os.path.join(out_dir, basename+ "_mesh.npy")
52 |     np.save(file_name, meshs)
53 |     
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/configs/config_h3d_stage1.yaml:
--------------------------------------------------------------------------------
 1 | NAME: VQVAE_HumanML3D # Experiment names
 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 3 | NUM_NODES: 1 # Number of GPU nodes for distributed training
 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 5 | 
 6 | TRAIN:
 7 |   #---------------------------------
 8 |   STAGE: vae # stage "vae" , "lm_pretrain", "lm_instruct"
 9 |   #---------------------------------
10 |   NUM_WORKERS: 16 # Number of workers
11 |   BATCH_SIZE: 256 # Size of batches
12 |   END_EPOCH: 999999 # End epoch
13 |   RESUME: '' # Resume training from this path
14 |   PRETRAINED: '' # Preatrained model path
15 | 
16 |   OPTIM:
17 |     target: AdamW
18 |     params:
19 |       lr: 2e-4
20 |       betas: [0.9, 0.99]
21 |       weight_decay: 0.0
22 | 
23 | # Evaluating Configuration
24 | EVAL:
25 |   BATCH_SIZE: 32 # Evaluating Batch size
26 |   SPLIT: test
27 | 
28 | TEST:
29 |   CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar
30 |   SPLIT: test
31 |   BATCH_SIZE: 32 # training Batch size
32 | 
33 | DATASET:
34 |   target: mGPT.data.HumanML3D.HumanML3DDataModule
35 | 
36 | METRIC:
37 |   TYPE: ['TM2TMetrics', 'MRMetrics']
38 | 
39 | LOSS:
40 |   LAMBDA_FEATURE: 1.0
41 |   LAMBDA_VELOCITY: 0.5
42 |   LAMBDA_COMMIT: 0.02
43 |   LAMBDA_CLS: 1.0
44 |   ABLATION:
45 |     RECONS_LOSS: 'l1_smooth'
46 | 
47 | model:
48 |   target: mGPT.models.mgpt.MotionGPT
49 |   params:
50 |     condition: 'text'
51 |     task: 't2m'
52 |     lm: ${lm.default}
53 |     motion_vae: ${vq.default}
54 | 
55 | LOGGER:
56 |   TYPE: ['tensorboard', 'wandb']
57 |   VAL_EVERY_STEPS: 10
58 |   WANDB:
59 |     params:
60 |       project: motiongpt
61 | 


--------------------------------------------------------------------------------
/configs/config_h3d_stage2.yaml:
--------------------------------------------------------------------------------
 1 | NAME: Pretrain_HumanML3D # Experiment names
 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 3 | NUM_NODES: 1 # Number of GPU nodes for distributed training
 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 5 | 
 6 | TRAIN:
 7 |   #---------------------------------
 8 |   STAGE: lm_pretrain # stage "vae" , "lm_pretrain", "lm_instruct"
 9 |   #---------------------------------
10 |   NUM_WORKERS: 16 # Number of workers
11 |   BATCH_SIZE: 16 # Size of batches
12 |   END_EPOCH: 999999 # End epoch
13 |   RESUME: '' # Resume training from this path
14 |   PRETRAINED: '' # Preatrained model path
15 |   PRETRAINED_VAE: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar # Vae model path
16 | 
17 |   OPTIM:
18 |     target: AdamW
19 |     params:
20 |       lr: 2e-4
21 |       betas: [0.9, 0.99]
22 |       weight_decay: 0.0
23 | 
24 | # Evaluating Configuration
25 | EVAL:
26 |   BATCH_SIZE: 32 # Evaluating Batch size
27 |   SPLIT: test
28 | 
29 | TEST:
30 |   CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar
31 |   SPLIT: test
32 |   BATCH_SIZE: 32 # training Batch size
33 | 
34 | DATASET:
35 |   target: mGPT.data.HumanML3D.HumanML3DDataModule
36 |   CODE_PATH: TOKENS
37 | 
38 | METRIC:
39 |   TYPE: ['TM2TMetrics', 'PredMetrics']
40 | 
41 | LOSS:
42 |   LAMBDA_FEATURE: 1.0
43 |   LAMBDA_VELOCITY: 0.5
44 |   LAMBDA_COMMIT: 0.02
45 |   LAMBDA_CLS: 1.0
46 |   ABLATION:
47 |     RECONS_LOSS: 'l1_smooth'
48 | 
49 | model:
50 |   target: mGPT.models.mgpt.MotionGPT
51 |   params:
52 |     condition: 'text'
53 |     task: 't2m'
54 |     lm: ${lm.default}
55 |     motion_vae: ${vq.default}
56 | 
57 | LOGGER:
58 |   TYPE: ['tensorboard', 'wandb']
59 |   VAL_EVERY_STEPS: 10
60 |   WANDB:
61 |     params:
62 |       project: motiongpt
63 | 


--------------------------------------------------------------------------------
/mGPT/metrics/base.py:
--------------------------------------------------------------------------------
 1 | from torch import Tensor, nn
 2 | from os.path import join as pjoin
 3 | from .mr import MRMetrics
 4 | from .t2m import TM2TMetrics
 5 | from .mm import MMMetrics
 6 | from .m2t import M2TMetrics
 7 | from .m2m import PredMetrics
 8 | 
 9 | 
10 | class BaseMetrics(nn.Module):
11 |     def __init__(self, cfg, datamodule, debug, **kwargs) -> None:
12 |         super().__init__()
13 | 
14 |         njoints = datamodule.njoints
15 | 
16 |         data_name = datamodule.name
17 |         if data_name in ["humanml3d", "kit"]:
18 |             self.TM2TMetrics = TM2TMetrics(
19 |                 cfg=cfg,
20 |                 dataname=data_name,
21 |                 diversity_times=30 if debug else cfg.METRIC.DIVERSITY_TIMES,
22 |                 dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP,
23 |             )
24 |             self.M2TMetrics = M2TMetrics(
25 |                 cfg=cfg,
26 |                 w_vectorizer=datamodule.hparams.w_vectorizer,
27 |                 diversity_times=30 if debug else cfg.METRIC.DIVERSITY_TIMES,
28 |                 dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP)
29 |             self.MMMetrics = MMMetrics(
30 |                 cfg=cfg,
31 |                 mm_num_times=cfg.METRIC.MM_NUM_TIMES,
32 |                 dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP,
33 |             )
34 | 
35 |         self.MRMetrics = MRMetrics(
36 |             njoints=njoints,
37 |             jointstype=cfg.DATASET.JOINT_TYPE,
38 |             dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP,
39 |         )
40 |         self.PredMetrics = PredMetrics(
41 |             cfg=cfg,
42 |             njoints=njoints,
43 |             jointstype=cfg.DATASET.JOINT_TYPE,
44 |             dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP,
45 |             task=cfg.model.params.task,
46 |         )
47 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/tools.py:
--------------------------------------------------------------------------------
 1 | import bpy
 2 | import numpy as np
 3 | 
 4 | 
 5 | def style_detect(data):
 6 |     is_mesh = False
 7 |     is_smplx = False
 8 |     jointstyle = 'mmm'
 9 |     # heuristic
10 |     if data.shape[1] > 1000:
11 |         is_mesh = True
12 |     if data.shape[1] == 10475:
13 |         is_smplx = True
14 |     if data.shape[1] == 22:
15 |         jointstyle =  'humanml3d'
16 |         
17 |     return is_mesh, is_smplx, jointstyle
18 | 
19 | 
20 | 
21 | # see this for more explanation
22 | # https://gist.github.com/iyadahmed/7c7c0fae03c40bd87e75dc7059e35377
23 | # This should be solved with new version of blender
24 | class ndarray_pydata(np.ndarray):
25 |     def __bool__(self) -> bool:
26 |         return len(self) > 0
27 | 
28 | 
29 | def load_numpy_vertices_into_blender(vertices, faces, name, mat):
30 |     mesh = bpy.data.meshes.new(name)
31 |     mesh.from_pydata(vertices, [], faces.view(ndarray_pydata))
32 |     mesh.validate()
33 | 
34 |     obj = bpy.data.objects.new(name, mesh)
35 |     bpy.context.scene.collection.objects.link(obj)
36 | 
37 |     bpy.ops.object.select_all(action='DESELECT')
38 |     obj.select_set(True)
39 |     obj.active_material = mat
40 |     bpy.context.view_layer.objects.active = obj
41 |     bpy.ops.object.shade_smooth()
42 |     bpy.ops.object.select_all(action='DESELECT')
43 |     return True
44 | 
45 | 
46 | def delete_objs(names):
47 |     if not isinstance(names, list):
48 |         names = [names]
49 |     # bpy.ops.object.mode_set(mode='OBJECT')
50 |     bpy.ops.object.select_all(action='DESELECT')
51 |     for obj in bpy.context.scene.objects:
52 |         for name in names:
53 |             if obj.name.startswith(name) or obj.name.endswith(name):
54 |                 obj.select_set(True)
55 |     bpy.ops.object.delete()
56 |     bpy.ops.object.select_all(action='DESELECT')
57 | 


--------------------------------------------------------------------------------
/configs/webui.yaml:
--------------------------------------------------------------------------------
 1 | NAME: Webui # Experiment name
 2 | DEBUG: False # Debug mode
 3 | ACCELERATOR: 'cpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 5 | 
 6 | # Training configuration
 7 | TRAIN:
 8 |   #---------------------------------
 9 |   STAGE: lm_instruct
10 |   NUM_WORKERS: 32 # Number of workers
11 |   BATCH_SIZE: 16 # Size of batches
12 |   START_EPOCH: 0 # Start epochMMOTIONENCODER
13 |   END_EPOCH: 99999 # End epoch
14 |   ABLATION:
15 |     pkeep: 0.5
16 |   OPTIM:
17 |     TYPE: AdamW # Optimizer type
18 |     LR: 2e-4 # Learning rate
19 |     WEIGHT_DECAY: 0.0
20 |     LR_SCHEDULER: [100, 200, 300, 400]
21 |     GAMMA: 0.8
22 | 
23 | # Evaluating Configuration
24 | EVAL:
25 |   BATCH_SIZE: 32 # Evaluating Batch size
26 |   SPLIT: test
27 | 
28 | # Test Configuration
29 | TEST:
30 |   CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar
31 |   SPLIT: test
32 |   BATCH_SIZE: 32 # training Batch size
33 |   MEAN: False
34 |   NUM_SAMPLES: 1
35 |   FACT: 1
36 | 
37 | # Datasets Configuration
38 | DATASET:
39 |   target: mGPT.data.webui.HumanML3DDataModule
40 | 
41 | METRIC:
42 |   TYPE: ['TM2TMetrics']
43 | # Losses Configuration
44 | LOSS:
45 |   TYPE: t2mgpt # Losses type
46 |   LAMBDA_FEATURE: 1.0
47 |   LAMBDA_VELOCITY: 0.5
48 |   LAMBDA_COMMIT: 0.02
49 |   LAMBDA_CLS: 1.0
50 |   LAMBDA_M2T2M: 1.0
51 |   LAMBDA_T2M2T: 10.0
52 |   ABLATION:
53 |     RECONS_LOSS: 'l1_smooth'
54 | 
55 | # Model Configuration
56 | model:
57 |   target: mGPT.models.mgpt_webui.MotionGPT
58 |   params:
59 |     condition: 'text'
60 |     task: 't2m'
61 |     lm: ${lm.default}
62 |     motion_vae: ${vq.default}
63 | 
64 | # Logger configuration
65 | LOGGER:
66 |   LOG_EVERY_STEPS: 5
67 |   VAL_EVERY_STEPS: 10
68 |   TENSORBOARD: True
69 |   wandb:
70 |     params:
71 |       project: null
72 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/dataset_m_vq.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import codecs as cs
 3 | import numpy as np
 4 | from torch.utils import data
 5 | from rich.progress import track
 6 | from os.path import join as pjoin
 7 | from .dataset_m import MotionDataset
 8 | from .dataset_t2m import Text2MotionDataset
 9 | 
10 | 
11 | class MotionDatasetVQ(Text2MotionDataset):
12 |     def __init__(
13 |         self,
14 |         data_root,
15 |         split,
16 |         mean,
17 |         std,
18 |         max_motion_length,
19 |         min_motion_length,
20 |         win_size,
21 |         unit_length=4,
22 |         fps=20,
23 |         tmpFile=True,
24 |         tiny=False,
25 |         debug=False,
26 |         **kwargs,
27 |     ):
28 |         super().__init__(data_root, split, mean, std, max_motion_length,
29 |                          min_motion_length, unit_length, fps, tmpFile, tiny,
30 |                          debug, **kwargs)
31 | 
32 |         # Filter out the motions that are too short
33 |         self.window_size = win_size
34 |         name_list = list(self.name_list)
35 |         for name in self.name_list:
36 |             motion = self.data_dict[name]["motion"]
37 |             if motion.shape[0] < self.window_size:
38 |                 name_list.remove(name)
39 |                 self.data_dict.pop(name)
40 |         self.name_list = name_list
41 | 
42 |     def __len__(self):
43 |         return len(self.name_list)
44 | 
45 |     def __getitem__(self, item):
46 |         idx = self.pointer + item
47 |         data = self.data_dict[self.name_list[idx]]
48 |         motion, length = data["motion"], data["length"]
49 | 
50 |         idx = random.randint(0, motion.shape[0] - self.window_size)
51 |         motion = motion[idx:idx + self.window_size]
52 |         motion = (motion - self.mean) / self.std
53 | 
54 |         return None, motion, length, None, None, None, None,
55 | 


--------------------------------------------------------------------------------
/configs/config_h3d_stage3.yaml:
--------------------------------------------------------------------------------
 1 | NAME: Instruct_HumanML3D # Experiment names
 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 3 | NUM_NODES: 1 # Number of GPU nodes for distributed training
 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 5 | 
 6 | TRAIN:
 7 |   #---------------------------------
 8 |   STAGE: lm_instruct # stage "vae" , "lm_pretrain", "lm_instruct"
 9 |   #---------------------------------
10 |   NUM_WORKERS: 16 # Number of workers
11 |   BATCH_SIZE: 16 # Size of batches
12 |   END_EPOCH: 999999 # End epoch
13 |   RESUME: '' # Resume training from this path
14 |   PRETRAINED: '' # Preatrained model path
15 |   PRETRAINED_VAE: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar # Vae model path
16 | 
17 |   OPTIM:
18 |     target: AdamW
19 |     params:
20 |       lr: 1e-4
21 |       betas: [0.9, 0.99]
22 |       weight_decay: 0.0
23 | 
24 | # Evaluating Configuration
25 | EVAL:
26 |   BATCH_SIZE: 32 # Evaluating Batch size
27 |   SPLIT: test
28 | 
29 | TEST:
30 |   CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar
31 |   # CHECKPOINTS: /apdcephfs/share_1227775/billljiang/memData/experiments/gpt0/0514_Instruct_T5BASE_HumanML3D/checkpoints/epoch=119.tar
32 |   SPLIT: test
33 |   BATCH_SIZE: 32 # training Batch size
34 | 
35 | DATASET:
36 |   target: mGPT.data.HumanML3D.HumanML3DDataModule
37 |   CODE_PATH: TOKENS
38 | 
39 | METRIC:
40 |   TYPE: ['TM2TMetrics', 'PredMetrics']
41 | 
42 | LOSS:
43 |   LAMBDA_FEATURE: 1.0
44 |   LAMBDA_VELOCITY: 0.5
45 |   LAMBDA_COMMIT: 0.02
46 |   LAMBDA_CLS: 1.0
47 |   ABLATION:
48 |     RECONS_LOSS: 'l1_smooth'
49 | 
50 | model:
51 |   target: mGPT.models.mgpt.MotionGPT
52 |   params:
53 |     condition: 'text'
54 |     task: 't2m'
55 |     lm: ${lm.default}
56 |     motion_vae: ${vq.default}
57 | 
58 | LOGGER:
59 |   TYPE: ['tensorboard', 'wandb']
60 |   VAL_EVERY_STEPS: 10
61 |   WANDB:
62 |     params:
63 |       project: mem
64 | 


--------------------------------------------------------------------------------
/mGPT/render/pyrender/j3ds_render_smpl.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import numpy as np
 4 | from scripts.hybrik_loc2rot import HybrIKJointsToRotmat
 5 | from scripts.pyrender import SMPLRender
 6 | import cv2
 7 | from scipy.spatial.transform import Rotation as RRR
 8 | 
 9 | parser = argparse.ArgumentParser(
10 |     description='Render a SMPL video by a j3ds npy file.')
11 | parser.add_argument('--input', type=str, default='', help='the npy file path')
12 | parser.add_argument('--render',
13 |                     type=int,
14 |                     default=1,
15 |                     help='render the video if 1')
16 | args = parser.parse_args()
17 | 
18 | input_path = args.input
19 | output_npy_path = args.input.replace('.npy', '_pose.npy')
20 | data = np.load(input_path)
21 | data = data - data[0, 0]
22 | pose_generator = HybrIKJointsToRotmat()
23 | pose = pose_generator(data)
24 | pose = np.concatenate(
25 |     [pose, np.stack([np.stack([np.eye(3)] * pose.shape[0], 0)] * 2, 1)], 1)
26 | np.save(output_npy_path, pose)
27 | shape = [768, 768]
28 | if args.render:
29 |     render = SMPLRender()
30 |     output_mp4_path = args.input.replace('.npy', '_smpl.mp4')
31 |     os.environ['PYOPENGL_PLATFORM'] = 'egl'
32 |     size = (shape[1], shape[0])
33 |     fps = 30.0
34 |     fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
35 |     videoWriter = cv2.VideoWriter(output_mp4_path, fourcc, fps, size)
36 |     r = RRR.from_rotvec(np.array([np.pi, 0.0, 0.0]))
37 |     pose[:, 0] = np.matmul(r.as_matrix().reshape(1, 3, 3), pose[:, 0])
38 |     for i in range(data.shape[0]):
39 |         img = np.zeros([shape[0], shape[1], 3])
40 |         aroot = data[[i], 0] + np.array([[0.0, 0.0, 30.0]])
41 |         aroot[:, 1] = -aroot[:, 1]
42 |         params = dict(pred_shape=np.zeros([1, 10]),
43 |                       pred_root=aroot,
44 |                       pred_pose=pose[[i]])
45 |         renderImg = render.render(img.copy(), params)
46 |         renderImg = (renderImg * 255).astype(np.uint8)
47 |         videoWriter.write(renderImg)
48 |     videoWriter.release()
49 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/utils/paramUtil.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # Define a kinematic tree for the skeletal struture
 4 | kit_kinematic_chain = [[0, 11, 12, 13, 14, 15], [0, 16, 17, 18, 19, 20], [0, 1, 2, 3, 4], [3, 5, 6, 7], [3, 8, 9, 10]]
 5 | 
 6 | kit_raw_offsets = np.array(
 7 |     [
 8 |         [0, 0, 0],
 9 |         [0, 1, 0],
10 |         [0, 1, 0],
11 |         [0, 1, 0],
12 |         [0, 1, 0],
13 |         [1, 0, 0],
14 |         [0, -1, 0],
15 |         [0, -1, 0],
16 |         [-1, 0, 0],
17 |         [0, -1, 0],
18 |         [0, -1, 0],
19 |         [1, 0, 0],
20 |         [0, -1, 0],
21 |         [0, -1, 0],
22 |         [0, 0, 1],
23 |         [0, 0, 1],
24 |         [-1, 0, 0],
25 |         [0, -1, 0],
26 |         [0, -1, 0],
27 |         [0, 0, 1],
28 |         [0, 0, 1]
29 |     ]
30 | )
31 | 
32 | t2m_raw_offsets = np.array([[0,0,0],
33 |                            [1,0,0],
34 |                            [-1,0,0],
35 |                            [0,1,0],
36 |                            [0,-1,0],
37 |                            [0,-1,0],
38 |                            [0,1,0],
39 |                            [0,-1,0],
40 |                            [0,-1,0],
41 |                            [0,1,0],
42 |                            [0,0,1],
43 |                            [0,0,1],
44 |                            [0,1,0],
45 |                            [1,0,0],
46 |                            [-1,0,0],
47 |                            [0,0,1],
48 |                            [0,-1,0],
49 |                            [0,-1,0],
50 |                            [0,-1,0],
51 |                            [0,-1,0],
52 |                            [0,-1,0],
53 |                            [0,-1,0]])
54 | 
55 | t2m_kinematic_chain = [[0, 2, 5, 8, 11], [0, 1, 4, 7, 10], [0, 3, 6, 9, 12, 15], [9, 14, 17, 19, 21], [9, 13, 16, 18, 20]]
56 | t2m_left_hand_chain = [[20, 22, 23, 24], [20, 34, 35, 36], [20, 25, 26, 27], [20, 31, 32, 33], [20, 28, 29, 30]]
57 | t2m_right_hand_chain = [[21, 43, 44, 45], [21, 46, 47, 48], [21, 40, 41, 42], [21, 37, 38, 39], [21, 49, 50, 51]]
58 | 
59 | 
60 | kit_tgt_skel_id = '03950'
61 | 
62 | t2m_tgt_skel_id = '000021'
63 | 
64 | 


--------------------------------------------------------------------------------
/scripts/get_motion_code.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pytorch_lightning as pl
 4 | import torch
 5 | from pathlib import Path
 6 | from tqdm import tqdm
 7 | from mGPT.config import parse_args
 8 | from mGPT.data.build_data import build_data
 9 | from mGPT.models.build_model import build_model
10 | from mGPT.utils.load_checkpoint import load_pretrained_vae
11 | 
12 | def main():
13 |     # parse options
14 |     cfg = parse_args(phase="test")  # parse config file
15 |     cfg.TRAIN.STAGE = "token"
16 |     cfg.TRAIN.BATCH_SIZE = 1
17 | 
18 |     # set seed
19 |     pl.seed_everything(cfg.SEED_VALUE)
20 | 
21 |     # gpu setting
22 |     if cfg.ACCELERATOR == "gpu":
23 |         os.environ["PYTHONWARNINGS"] = "ignore"
24 |         os.environ["TOKENIZERS_PARALLELISM"] = "false"
25 | 
26 |     # create dataset
27 |     datasets = build_data(cfg, phase='token')
28 |     print("datasets module initialized")
29 |     output_dir = os.path.join(datasets.hparams.data_root, cfg.DATASET.CODE_PATH)
30 | 
31 |     os.makedirs(output_dir, exist_ok=True)
32 | 
33 |     # create model
34 |     model = build_model(cfg, datasets)
35 |     if hasattr(model, "motion_vae"):
36 |         model.vae = model.motion_vae
37 |     print("model loaded")
38 | 
39 |     # Strict load vae model
40 |     assert cfg.TRAIN.PRETRAINED_VAE is not None
41 |     load_pretrained_vae(cfg, model)
42 | 
43 |     if cfg.ACCELERATOR == "gpu":
44 |         model = model.to('cuda')
45 | 
46 |     for batch in tqdm(datasets.train_dataloader(),
47 |                       desc=f'motion tokenize'):
48 |         name = batch['text']
49 |         
50 |         pose = batch['motion']
51 |         pose = pose.cuda().float()
52 | 
53 |         if pose.shape[1] == 0:
54 |             continue
55 |         target, _ = model.vae.encode(pose)
56 |         target = target.to('cpu').numpy()
57 | 
58 |         target_path = os.path.join(output_dir, name[0] + '.npy')
59 |         Path(target_path).parent.mkdir(parents=True, exist_ok=True)
60 |         np.save(target_path, target)
61 | 
62 |     print(
63 |         f'Motion tokenization done, the motion tokens are saved to {output_dir}'
64 |     )
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main()
69 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/rots2joints/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from typing import Optional
18 | 
19 | import torch
20 | from torch import Tensor, nn
21 | from pathlib import Path
22 | import os
23 | # import hydra
24 | 
25 | class Rots2Joints(nn.Module):
26 |     def __init__(self, path: Optional[str] = None,
27 |                  normalization: bool = False,
28 |                  eps: float = 1e-12,
29 |                  **kwargs) -> None:
30 |         if normalization and path is None:
31 |             raise TypeError("You should provide a path if normalization is on.")
32 | 
33 |         super().__init__()
34 |         self.normalization = normalization
35 |         self.eps = eps
36 |         # workaround for cluster local/sync
37 |         if path is not None:
38 |             rel_p = path.split('/')
39 |             rel_p = rel_p[rel_p.index('deps'):]
40 |             rel_p = '/'.join(rel_p)
41 |             # path = hydra.utils.get_original_cwd() + '/' + rel_p
42 |         if normalization:
43 |             mean_path = Path(path) / "mean.pt"
44 |             std_path = Path(path) / "std.pt"
45 |             self.register_buffer('mean', torch.load(mean_path))
46 |             self.register_buffer('std', torch.load(std_path))
47 | 
48 |     def normalize(self, features: Tensor) -> Tensor:
49 |         if self.normalization:
50 |             features = (features - self.mean)/(self.std + self.eps)
51 |         return features
52 | 
53 |     def unnormalize(self, features: Tensor) -> Tensor:
54 |         if self.normalization:
55 |             features = features * self.std + self.mean
56 |         return features
57 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/joints2jfeats/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from typing import Optional
18 | 
19 | import torch
20 | from torch import Tensor, nn
21 | from pathlib import Path
22 | import os
23 | 
24 | 
25 | class Joints2Jfeats(nn.Module):
26 | 
27 |     def __init__(self,
28 |                  path: Optional[str] = None,
29 |                  normalization: bool = False,
30 |                  eps: float = 1e-12,
31 |                  **kwargs) -> None:
32 |         if normalization and path is None:
33 |             raise TypeError(
34 |                 "You should provide a path if normalization is on.")
35 | 
36 |         super().__init__()
37 |         self.normalization = normalization
38 |         self.eps = eps
39 |         # workaround for cluster local/sync
40 |         if path is not None:
41 |             # rel_p = path.split('/')
42 |             # rel_p = rel_p[rel_p.index('deps'):]
43 |             # rel_p = '/'.join(rel_p)
44 |             pass
45 |         if normalization:
46 |             mean_path = Path(path) / "jfeats_mean.pt"
47 |             std_path = Path(path) / "jfeats_std.pt"
48 |             self.register_buffer('mean', torch.load(mean_path))
49 |             self.register_buffer('std', torch.load(std_path))
50 | 
51 |     def normalize(self, features: Tensor) -> Tensor:
52 |         if self.normalization:
53 |             features = (features - self.mean) / (self.std + self.eps)
54 |         return features
55 | 
56 |     def unnormalize(self, features: Tensor) -> Tensor:
57 |         if self.normalization:
58 |             features = features * self.std + self.mean
59 |         return features
60 | 


--------------------------------------------------------------------------------
/mGPT/render/video.py:
--------------------------------------------------------------------------------
 1 | import moviepy.editor as mp
 2 | import moviepy.video.fx.all as vfx
 3 | import os
 4 | import imageio
 5 | 
 6 | 
 7 | def mask_png(frames):
 8 |     for frame in frames:
 9 |         im = imageio.imread(frame)
10 |         im[im[:, :, 3] < 1, :] = 255
11 |         imageio.imwrite(frame, im[:, :, 0:3])
12 |     return
13 | 
14 | 
15 | class Video:
16 |     def __init__(self, frame_path: str, fps: float = 12.5, res="high"):
17 |         frame_path = str(frame_path)
18 |         self.fps = fps
19 | 
20 |         self._conf = {"codec": "libx264",
21 |                       "fps": self.fps,
22 |                       "audio_codec": "aac",
23 |                       "temp_audiofile": "temp-audio.m4a",
24 |                       "remove_temp": True}
25 | 
26 |         if res == "low":
27 |             bitrate = "500k"
28 |         else:
29 |             bitrate = "5000k"
30 | 
31 |         self._conf = {"bitrate": bitrate,
32 |                       "fps": self.fps}
33 | 
34 |         # Load video
35 |         # video = mp.VideoFileClip(video1_path, audio=False)
36 |         # Load with frames
37 |         frames = [os.path.join(frame_path, x)
38 |                   for x in sorted(os.listdir(frame_path))]
39 | 
40 |         # mask background white for videos
41 |         mask_png(frames)
42 | 
43 |         video = mp.ImageSequenceClip(frames, fps=fps)
44 |         self.video = video
45 |         self.duration = video.duration
46 | 
47 |     def add_text(self, text):
48 |         # needs ImageMagick
49 |         video_text = mp.TextClip(text,
50 |                                  font='Amiri',
51 |                                  color='white',
52 |                                  method='caption',
53 |                                  align="center",
54 |                                  size=(self.video.w, None),
55 |                                  fontsize=30)
56 |         video_text = video_text.on_color(size=(self.video.w, video_text.h + 5),
57 |                                          color=(0, 0, 0),
58 |                                          col_opacity=0.6)
59 |         # video_text = video_text.set_pos('bottom')
60 |         video_text = video_text.set_pos('top')
61 | 
62 |         self.video = mp.CompositeVideoClip([self.video, video_text])
63 | 
64 |     def save(self, out_path):
65 |         out_path = str(out_path)
66 |         self.video.subclip(0, self.duration).write_videofile(
67 |             out_path, **self._conf)
68 | 


--------------------------------------------------------------------------------
/mGPT/losses/base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class BaseLosses(nn.Module):
 5 |     def __init__(self, cfg, losses, params, losses_func, num_joints, **kwargs):
 6 |         super().__init__()
 7 |         
 8 |         # Save parameters
 9 |         self.num_joints = num_joints
10 |         self._params = params
11 |         
12 |         # Add total indicator
13 |         losses.append("total") if "total" not in losses else None
14 |         
15 |         # Register losses
16 |         for loss in losses:
17 |             self.register_buffer(loss, torch.tensor(0.0))
18 |         self.register_buffer("count", torch.tensor(0.0))
19 |         self.losses = losses
20 |         
21 |         # Instantiate loss functions
22 |         self._losses_func = {}
23 |         for loss in losses[:-1]:
24 |             self._losses_func[loss] = losses_func[loss](reduction='mean')
25 |             
26 |     def _update_loss(self, loss: str, outputs, inputs):
27 |         '''Update the loss and return the weighted loss.'''
28 |         # Update the loss
29 |         val = self._losses_func[loss](outputs, inputs)
30 |         # self.losses_values[loss] += val.detach()
31 |         getattr(self, loss).add_(val.detach())
32 |         # Return a weighted sum
33 |         weighted_loss = self._params[loss] * val
34 |         return weighted_loss
35 |             
36 |     def reset(self):
37 |         '''Reset the losses to 0.'''
38 |         for loss in self.losses:
39 |             setattr(self, loss, torch.tensor(0.0, device=getattr(self, loss).device))
40 |         setattr(self, "count", torch.tensor(0.0, device=getattr(self, "count").device))
41 | 
42 |     def compute(self, split):
43 |         '''Compute the losses and return a dictionary with the losses.'''
44 |         count = self.count
45 |         # Loss dictionary
46 |         loss_dict = {loss: getattr(self, loss)/count for loss in self.losses}
47 |         # Format the losses for logging
48 |         log_dict = { self.loss2logname(loss, split): value.item() 
49 |             for loss, value in loss_dict.items() if not torch.isnan(value)}
50 |         # Reset the losses
51 |         self.reset()
52 |         return log_dict
53 | 
54 |     def loss2logname(self, loss: str, split: str):
55 |         '''Convert the loss name to a log name.'''
56 |         if loss == "total":
57 |             log_name = f"{loss}/{split}"
58 |         else:
59 |             loss_type, name = loss.split("_")
60 |             log_name = f"{loss_type}/{name}/{split}"
61 |         return log_name
62 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/rots2rfeats/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from typing import Optional
18 | 
19 | import torch
20 | from torch import Tensor, nn
21 | from pathlib import Path
22 | import os
23 | 
24 | class Rots2Rfeats(nn.Module):
25 |     def __init__(self, path: Optional[str] = None,
26 |                  normalization: bool = True,
27 |                  eps: float = 1e-12,
28 |                  **kwargs) -> None:
29 |         if normalization and path is None:
30 |             raise TypeError("You should provide a path if normalization is on.")
31 | 
32 |         super().__init__()
33 |         self.normalization = normalization
34 |         self.eps = eps
35 |         if normalization:
36 |             # workaround for cluster local/sync
37 |             rel_p = path.split('/')
38 |             # superhacky it is for the datatype ugly stuff change it, copy the main stuff to seperate_pairs dict
39 |             if rel_p[-1] == 'separate_pairs':
40 |                 rel_p.remove('separate_pairs') 
41 |             ########################################################
42 |             # rel_p = rel_p[rel_p.index('deps'):]
43 |             rel_p = '/'.join(rel_p)
44 |             # path = hydra.utils.get_original_cwd() + '/' + rel_p
45 |             path = rel_p
46 |             mean_path = Path(path) / "rfeats_mean.pt"
47 |             std_path = Path(path) / "rfeats_std.pt"
48 | 
49 |             self.register_buffer('mean', torch.load(mean_path))
50 |             self.register_buffer('std', torch.load(std_path))
51 | 
52 |     def normalize(self, features: Tensor) -> Tensor:
53 |         if self.normalization:
54 |             features = (features - self.mean)/(self.std + self.eps)
55 |         return features
56 | 
57 |     def unnormalize(self, features: Tensor) -> Tensor:
58 |         if self.normalization:
59 |             features = features * self.std + self.mean
60 |         return features
61 | 


--------------------------------------------------------------------------------
/mGPT/data/tools/easyconvert.py:
--------------------------------------------------------------------------------
 1 | from .geometry import *
 2 | 
 3 | def nfeats_of(rottype):
 4 |     if rottype in ["rotvec", "axisangle"]:
 5 |         return 3
 6 |     elif rottype in ["rotquat", "quaternion"]:
 7 |         return 4
 8 |     elif rottype in ["rot6d", "6drot", "rotation6d"]:
 9 |         return 6
10 |     elif rottype in ["rotmat"]:
11 |         return 9
12 |     else:
13 |         return TypeError("This rotation type doesn't have features.")
14 | 
15 | 
16 | def axis_angle_to(newtype, rotations):
17 |     if newtype in ["matrix"]:
18 |         rotations = axis_angle_to_matrix(rotations)
19 |         return rotations
20 |     elif newtype in ["rotmat"]:
21 |         rotations = axis_angle_to_matrix(rotations)
22 |         rotations = matrix_to("rotmat", rotations)
23 |         return rotations
24 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
25 |         rotations = axis_angle_to_matrix(rotations)
26 |         rotations = matrix_to("rot6d", rotations)
27 |         return rotations
28 |     elif newtype in ["rotquat", "quaternion"]:
29 |         rotations = axis_angle_to_quaternion(rotations)
30 |         return rotations
31 |     elif newtype in ["rotvec", "axisangle"]:
32 |         return rotations
33 |     else:
34 |         raise NotImplementedError
35 | 
36 | 
37 | def matrix_to(newtype, rotations):
38 |     if newtype in ["matrix"]:
39 |         return rotations
40 |     if newtype in ["rotmat"]:
41 |         rotations = rotations.reshape((*rotations.shape[:-2], 9))
42 |         return rotations
43 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
44 |         rotations = matrix_to_rotation_6d(rotations)
45 |         return rotations
46 |     elif newtype in ["rotquat", "quaternion"]:
47 |         rotations = matrix_to_quaternion(rotations)
48 |         return rotations
49 |     elif newtype in ["rotvec", "axisangle"]:
50 |         rotations = matrix_to_axis_angle(rotations)
51 |         return rotations
52 |     else:
53 |         raise NotImplementedError
54 | 
55 | 
56 | def to_matrix(oldtype, rotations):
57 |     if oldtype in ["matrix"]:
58 |         return rotations
59 |     if oldtype in ["rotmat"]:
60 |         rotations = rotations.reshape((*rotations.shape[:-2], 3, 3))
61 |         return rotations
62 |     elif oldtype in ["rot6d", "6drot", "rotation6d"]:
63 |         rotations = rotation_6d_to_matrix(rotations)
64 |         return rotations
65 |     elif oldtype in ["rotquat", "quaternion"]:
66 |         rotations = quaternion_to_matrix(rotations)
67 |         return rotations
68 |     elif oldtype in ["rotvec", "axisangle"]:
69 |         rotations = axis_angle_to_matrix(rotations)
70 |         return rotations
71 |     else:
72 |         raise NotImplementedError
73 | 


--------------------------------------------------------------------------------
/mGPT/utils/logger.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import os
 3 | import time
 4 | import logging
 5 | from omegaconf import OmegaConf
 6 | from pytorch_lightning.utilities.rank_zero import rank_zero_only
 7 | 
 8 | def create_logger(cfg, phase='train'):
 9 |     # root dir set by cfg
10 |     root_output_dir = Path(cfg.FOLDER)
11 |     # set up logger
12 |     if not root_output_dir.exists():
13 |         print('=> creating {}'.format(root_output_dir))
14 |         root_output_dir.mkdir()
15 | 
16 |     cfg_name = cfg.NAME
17 |     model = cfg.model.target.split('.')[-2]
18 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
19 | 
20 |     final_output_dir = root_output_dir / model / cfg_name
21 |     cfg.FOLDER_EXP = str(final_output_dir)
22 | 
23 |     time_str = time.strftime('%Y-%m-%d-%H-%M-%S')
24 | 
25 |     new_dir(cfg, phase, time_str, final_output_dir)
26 | 
27 |     head = '%(asctime)-15s %(message)s'
28 |     logger = config_logger(final_output_dir, time_str, phase, head)
29 |     if logger is None:
30 |         logger = logging.getLogger()
31 |         logger.setLevel(logging.CRITICAL)
32 |         logging.basicConfig(format=head)
33 |     return logger
34 | 
35 | 
36 | @rank_zero_only
37 | def config_logger(final_output_dir, time_str, phase, head):
38 |     log_file = '{}_{}_{}.log'.format('log', time_str, phase)
39 |     final_log_file = final_output_dir / log_file
40 |     logging.basicConfig(filename=str(final_log_file))
41 |     logger = logging.getLogger()
42 |     logger.setLevel(logging.INFO)
43 |     console = logging.StreamHandler()
44 |     formatter = logging.Formatter(head)
45 |     console.setFormatter(formatter)
46 |     logging.getLogger('').addHandler(console)
47 |     file_handler = logging.FileHandler(final_log_file, 'w')
48 |     file_handler.setFormatter(logging.Formatter(head))
49 |     file_handler.setLevel(logging.INFO)
50 |     logging.getLogger('').addHandler(file_handler)
51 |     return logger
52 | 
53 | 
54 | @rank_zero_only
55 | def new_dir(cfg, phase, time_str, final_output_dir):
56 |     # new experiment folder
57 |     cfg.TIME = str(time_str)
58 |     if os.path.exists(final_output_dir) and not os.path.exists(cfg.TRAIN.RESUME) and not cfg.DEBUG and phase not in ['test', 'demo']:
59 |         file_list = sorted(os.listdir(final_output_dir), reverse=True)
60 |         for item in file_list:
61 |             if item.endswith('.log'):
62 |                 os.rename(str(final_output_dir), str(final_output_dir) + '_' + cfg.TIME)
63 |                 break
64 |     final_output_dir.mkdir(parents=True, exist_ok=True)
65 |     # write config yaml
66 |     config_file = '{}_{}_{}.yaml'.format('config', time_str, phase)
67 |     final_config_file = final_output_dir / config_file
68 |     OmegaConf.save(config=cfg, f=final_config_file)
69 | 


--------------------------------------------------------------------------------
/mGPT/utils/tensors.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def lengths_to_mask(lengths):
 5 |     max_len = max(lengths)
 6 |     mask = torch.arange(max_len, device=lengths.device).expand(
 7 |         len(lengths), max_len) < lengths.unsqueeze(1)
 8 |     return mask
 9 | 
10 | 
11 | def collate_tensors(batch):
12 |     dims = batch[0].dim()
13 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
14 |     size = (len(batch),) + tuple(max_size)
15 |     canvas = batch[0].new_zeros(size=size)
16 |     for i, b in enumerate(batch):
17 |         sub_tensor = canvas[i]
18 |         for d in range(dims):
19 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
20 |         sub_tensor.add_(b)
21 |     return canvas
22 | 
23 | 
24 | def collate(batch):
25 |     databatch = [b[0] for b in batch]
26 |     labelbatch = [b[1] for b in batch]
27 |     lenbatch = [len(b[0][0][0]) for b in batch]
28 | 
29 |     databatchTensor = collate_tensors(databatch)
30 |     labelbatchTensor = torch.as_tensor(labelbatch)
31 |     lenbatchTensor = torch.as_tensor(lenbatch)
32 | 
33 |     maskbatchTensor = lengths_to_mask(lenbatchTensor)
34 |     # x - [bs, njoints, nfeats, lengths]
35 |     #   - nfeats, the representation of a joint
36 |     # y - [bs]
37 |     # mask - [bs, lengths]
38 |     # lengths - [bs]
39 |     batch = {"x": databatchTensor, "y": labelbatchTensor,
40 |              "mask": maskbatchTensor, 'lengths': lenbatchTensor}
41 |     return batch
42 | 
43 | 
44 | # slow version with padding
45 | def collate_data3d_slow(batch):
46 |     batchTensor = {}
47 |     for key in batch[0].keys():
48 |         databatch = [b[key] for b in batch]
49 |         batchTensor[key] = collate_tensors(databatch)
50 |     batch = batchTensor
51 |     # theta - [bs, lengths, 85], theta shape (85,)
52 |     #       - (np.array([1., 0., 0.]), pose(72), shape(10)), axis=0)
53 |     # kp_2d - [bs, lengths, njoints, nfeats], nfeats (x,y,weight)
54 |     # kp_3d - [bs, lengths, njoints, nfeats], nfeats (x,y,z)
55 |     # w_smpl - [bs, lengths] zeros
56 |     # w_3d - [bs, lengths] zeros
57 |     return batch
58 | 
59 | def collate_data3d(batch):
60 |     batchTensor = {}
61 |     for key in batch[0].keys():
62 |         databatch = [b[key] for b in batch]
63 |         if key == "paths":
64 |             batchTensor[key] = databatch
65 |         else:    
66 |             batchTensor[key] = torch.stack(databatch,axis=0)
67 |     batch = batchTensor
68 |     # theta - [bs, lengths, 85], theta shape (85,)
69 |     #       - (np.array([1., 0., 0.]), pose(72), shape(10)), axis=0)
70 |     # kp_2d - [bs, lengths, njoints, nfeats], nfeats (x,y,weight)
71 |     # kp_3d - [bs, lengths, njoints, nfeats], nfeats (x,y,z)
72 |     # w_smpl - [bs, lengths] zeros
73 |     # w_3d - [bs, lengths] zeros
74 |     return batch
75 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/xyz.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from typing import Optional
18 | from torch import Tensor
19 | 
20 | from .base import Datastruct, dataclass, Transform
21 | from ..tools import collate_tensor_with_padding
22 | 
23 | from .joints2jfeats import Joints2Jfeats
24 | 
25 | 
26 | class XYZTransform(Transform):
27 |     def __init__(self, joints2jfeats: Joints2Jfeats, **kwargs):
28 |         self.joints2jfeats = joints2jfeats
29 | 
30 |     def Datastruct(self, **kwargs):
31 |         return XYZDatastruct(_joints2jfeats=self.joints2jfeats,
32 |                              transforms=self,
33 |                              **kwargs)
34 | 
35 |     def __repr__(self):
36 |         return "XYZTransform()"
37 | 
38 | 
39 | @dataclass
40 | class XYZDatastruct(Datastruct):
41 |     transforms: XYZTransform
42 |     _joints2jfeats: Joints2Jfeats
43 | 
44 |     features: Optional[Tensor] = None
45 |     joints_: Optional[Tensor] = None
46 |     jfeats_: Optional[Tensor] = None
47 | 
48 |     def __post_init__(self):
49 |         self.datakeys = ["features", "joints_", "jfeats_"]
50 |         # starting point
51 |         if self.features is not None and self.jfeats_ is None:
52 |             self.jfeats_ = self.features
53 | 
54 |     @property
55 |     def joints(self):
56 |         # Cached value
57 |         if self.joints_ is not None:
58 |             return self.joints_
59 | 
60 |         # self.jfeats_ should be defined
61 |         assert self.jfeats_ is not None
62 | 
63 |         self._joints2jfeats.to(self.jfeats.device)
64 |         self.joints_ = self._joints2jfeats.inverse(self.jfeats)
65 |         return self.joints_
66 | 
67 |     @property
68 |     def jfeats(self):
69 |         # Cached value
70 |         if self.jfeats_ is not None:
71 |             return self.jfeats_
72 | 
73 |         # self.joints_ should be defined
74 |         assert self.joints_ is not None
75 | 
76 |         self._joints2jfeats.to(self.joints.device)
77 |         self.jfeats_ = self._joints2jfeats(self.joints)
78 |         return self.jfeats_
79 | 
80 |     def __len__(self):
81 |         return len(self.jfeats)
82 | 


--------------------------------------------------------------------------------
/mGPT/data/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import rich
 3 | import pickle
 4 | import numpy as np
 5 | 
 6 | 
 7 | def lengths_to_mask(lengths):
 8 |     max_len = max(lengths)
 9 |     mask = torch.arange(max_len, device=lengths.device).expand(
10 |         len(lengths), max_len) < lengths.unsqueeze(1)
11 |     return mask
12 | 
13 | 
14 | # padding to max length in one batch
15 | def collate_tensors(batch):
16 |     if isinstance(batch[0], np.ndarray):
17 |         batch = [torch.tensor(b).float() for b in batch]
18 | 
19 |     dims = batch[0].dim()
20 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
21 |     size = (len(batch), ) + tuple(max_size)
22 |     canvas = batch[0].new_zeros(size=size)
23 |     for i, b in enumerate(batch):
24 |         sub_tensor = canvas[i]
25 |         for d in range(dims):
26 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
27 |         sub_tensor.add_(b)
28 |     return canvas
29 | 
30 | def humanml3d_collate(batch):
31 |     notnone_batches = [b for b in batch if b is not None]
32 |     EvalFlag = False if notnone_batches[0][5] is None else True
33 | 
34 |     # Sort by text length
35 |     if EvalFlag:
36 |         notnone_batches.sort(key=lambda x: x[5], reverse=True)
37 | 
38 |     # Motion only
39 |     adapted_batch = {
40 |         "motion":
41 |         collate_tensors([torch.tensor(b[1]).float() for b in notnone_batches]),
42 |         "length": [b[2] for b in notnone_batches],
43 |     }
44 | 
45 |     # Text and motion
46 |     if notnone_batches[0][0] is not None:
47 |         adapted_batch.update({
48 |             "text": [b[0] for b in notnone_batches],
49 |             "all_captions": [b[7] for b in notnone_batches],
50 |         })
51 | 
52 |     # Evaluation related
53 |     if EvalFlag:
54 |         adapted_batch.update({
55 |             "text": [b[0] for b in notnone_batches],
56 |             "word_embs":
57 |             collate_tensors(
58 |                 [torch.tensor(b[3]).float() for b in notnone_batches]),
59 |             "pos_ohot":
60 |             collate_tensors(
61 |                 [torch.tensor(b[4]).float() for b in notnone_batches]),
62 |             "text_len":
63 |             collate_tensors([torch.tensor(b[5]) for b in notnone_batches]),
64 |             "tokens": [b[6] for b in notnone_batches],
65 |         })
66 | 
67 |     # Tasks
68 |     if len(notnone_batches[0]) == 9:
69 |         adapted_batch.update({"tasks": [b[8] for b in notnone_batches]})
70 | 
71 |     return adapted_batch
72 | 
73 | 
74 | def load_pkl(path, description=None, progressBar=False):
75 |     if progressBar:
76 |         with rich.progress.open(path, 'rb', description=description) as file:
77 |             data = pickle.load(file)
78 |     else:
79 |         with open(path, 'rb') as file:
80 |             data = pickle.load(file)
81 |     return data
82 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from dataclasses import dataclass, fields
18 | 
19 | 
20 | class Transform:
21 | 
22 |     def collate(self, lst_datastruct):
23 |         from ..tools import collate_tensor_with_padding
24 |         example = lst_datastruct[0]
25 | 
26 |         def collate_or_none(key):
27 |             if example[key] is None:
28 |                 return None
29 |             key_lst = [x[key] for x in lst_datastruct]
30 |             return collate_tensor_with_padding(key_lst)
31 | 
32 |         kwargs = {key: collate_or_none(key) for key in example.datakeys}
33 | 
34 |         return self.Datastruct(**kwargs)
35 | 
36 | 
37 | # Inspired from SMPLX library
38 | # need to define "datakeys" and transforms
39 | @dataclass
40 | class Datastruct:
41 | 
42 |     def __getitem__(self, key):
43 |         return getattr(self, key)
44 | 
45 |     def __setitem__(self, key, value):
46 |         self.__dict__[key] = value
47 | 
48 |     def get(self, key, default=None):
49 |         return getattr(self, key, default)
50 | 
51 |     def __iter__(self):
52 |         return self.keys()
53 | 
54 |     def keys(self):
55 |         keys = [t.name for t in fields(self)]
56 |         return iter(keys)
57 | 
58 |     def values(self):
59 |         values = [getattr(self, t.name) for t in fields(self)]
60 |         return iter(values)
61 | 
62 |     def items(self):
63 |         data = [(t.name, getattr(self, t.name)) for t in fields(self)]
64 |         return iter(data)
65 | 
66 |     def to(self, *args, **kwargs):
67 |         for key in self.datakeys:
68 |             if self[key] is not None:
69 |                 self[key] = self[key].to(*args, **kwargs)
70 |         return self
71 | 
72 |     @property
73 |     def device(self):
74 |         return self[self.datakeys[0]].device
75 | 
76 |     def detach(self):
77 | 
78 |         def detach_or_none(tensor):
79 |             if tensor is not None:
80 |                 return tensor.detach()
81 |             return None
82 | 
83 |         kwargs = {key: detach_or_none(self[key]) for key in self.datakeys}
84 |         return self.transforms.Datastruct(**kwargs)
85 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/dataset_t2m_token.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from torch.utils import data
 4 | from .dataset_t2m import Text2MotionDataset
 5 | import codecs as cs
 6 | from os.path import join as pjoin
 7 | 
 8 | 
 9 | class Text2MotionDatasetToken(data.Dataset):
10 | 
11 |     def __init__(
12 |         self,
13 |         data_root,
14 |         split,
15 |         mean,
16 |         std,
17 |         max_motion_length=196,
18 |         min_motion_length=40,
19 |         unit_length=4,
20 |         fps=20,
21 |         tmpFile=True,
22 |         tiny=False,
23 |         debug=False,
24 |         **kwargs,
25 |     ):
26 |         
27 |         self.max_motion_length = max_motion_length
28 |         self.min_motion_length = min_motion_length
29 |         self.unit_length = unit_length
30 |         
31 |         # Data mean and std
32 |         self.mean = mean
33 |         self.std = std
34 |         
35 |         # Data path
36 |         split_file = pjoin(data_root, split + '.txt')
37 |         motion_dir = pjoin(data_root, 'new_joint_vecs')
38 |         text_dir = pjoin(data_root, 'texts')
39 | 
40 |         # Data id list
41 |         self.id_list = []
42 |         with cs.open(split_file, "r") as f:
43 |             for line in f.readlines():
44 |                 self.id_list.append(line.strip())
45 |                 
46 |         new_name_list = []
47 |         length_list = []
48 |         data_dict = {}
49 |         for name in self.id_list:
50 |             try:
51 |                 motion = np.load(pjoin(motion_dir, name + '.npy'))
52 |                 if (len(motion)) <  self.min_motion_length or (len(motion) >= 200):
53 |                     continue
54 | 
55 |                 data_dict[name] = {'motion': motion,
56 |                                 'length': len(motion),
57 |                                 'name': name}
58 |                 new_name_list.append(name)
59 |                 length_list.append(len(motion))
60 |             except:
61 |                 # Some motion may not exist in KIT dataset
62 |                 pass
63 | 
64 |         self.length_arr = np.array(length_list)
65 |         self.data_dict = data_dict
66 |         self.name_list = new_name_list
67 |         self.nfeats = motion.shape[-1]
68 |     
69 |     
70 |     def __len__(self):
71 |         return len(self.data_dict)  
72 |         
73 |     def __getitem__(self, item):
74 |         name = self.name_list[item]
75 |         data = self.data_dict[name]
76 |         motion, m_length = data['motion'], data['length']
77 | 
78 |         m_length = (m_length // self.unit_length) * self.unit_length
79 | 
80 |         idx = random.randint(0, len(motion) - m_length)
81 |         motion = motion[idx:idx+m_length]
82 | 
83 |         "Z Normalization"
84 |         motion = (motion - self.mean) / self.std
85 | 
86 |         return name, motion, m_length, True, True, True, True, True, True
87 | 


--------------------------------------------------------------------------------
/mGPT/models/utils/adain.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class AdaptiveInstanceNorm1d(nn.Module):
 6 |     def __init__(self, num_features, eps=1e-5, momentum=0.1):
 7 |         super(AdaptiveInstanceNorm1d, self).__init__()
 8 |         self.num_features = num_features
 9 |         self.eps = eps
10 |         self.momentum = momentum
11 |         self.weight = None
12 |         self.bias = None
13 |         self.register_buffer('running_mean', torch.zeros(num_features))
14 |         self.register_buffer('running_var', torch.ones(num_features))
15 | 
16 |     def forward(self, x, direct_weighting=False, no_std=False):
17 |         assert self.weight is not None and \
18 |                self.bias is not None, "Please assign AdaIN weight first"
19 |         # (bs, nfeats, nframe) <= (nframe, bs, nfeats)
20 |         x = x.permute(1,2,0) 
21 | 
22 |         b, c = x.size(0), x.size(1)  # batch size & channels
23 |         running_mean = self.running_mean.repeat(b)
24 |         running_var = self.running_var.repeat(b)
25 |         # self.weight = torch.ones_like(self.weight)
26 | 
27 |         if direct_weighting:
28 |             x_reshaped = x.contiguous().view(b * c)
29 |             if no_std:
30 |                 out = x_reshaped + self.bias
31 |             else:
32 |                 out = x_reshaped.mul(self.weight) + self.bias
33 |             out = out.view(b, c, *x.size()[2:])
34 |         else:
35 |             x_reshaped = x.contiguous().view(1, b * c, *x.size()[2:])        
36 |             out = F.batch_norm(
37 |                 x_reshaped, running_mean, running_var, self.weight, self.bias,
38 |                 True, self.momentum, self.eps)
39 |             out = out.view(b, c, *x.size()[2:])
40 | 
41 |         # (nframe, bs, nfeats) <= (bs, nfeats, nframe)
42 |         out = out.permute(2,0,1) 
43 |         return out
44 | 
45 |     def __repr__(self):
46 |         return self.__class__.__name__ + '(' + str(self.num_features) + ')'
47 | 
48 | def assign_adain_params(adain_params, model):
49 |     # assign the adain_params to the AdaIN layers in model
50 |     for m in model.modules():
51 |         if m.__class__.__name__ == "AdaptiveInstanceNorm1d":
52 |             mean = adain_params[: , : m.num_features]
53 |             std = adain_params[: , m.num_features: 2 * m.num_features]
54 |             m.bias = mean.contiguous().view(-1)
55 |             m.weight = std.contiguous().view(-1)
56 |             if adain_params.size(1) > 2 * m.num_features:
57 |                 adain_params = adain_params[: , 2 * m.num_features:]
58 | 
59 | 
60 | def get_num_adain_params(model):
61 |     # return the number of AdaIN parameters needed by the model
62 |     num_adain_params = 0
63 |     for m in model.modules():
64 |         if m.__class__.__name__ == "AdaptiveInstanceNorm1d":
65 |             num_adain_params += 2 * m.num_features
66 |     return num_adain_params
67 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/meshes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .materials import body_material
 4 | 
 5 | # green
 6 | # GT_SMPL = body_material(0.009, 0.214, 0.029)
 7 | GT_SMPL = body_material(0.035, 0.415, 0.122)
 8 | 
 9 | # blue
10 | # GEN_SMPL = body_material(0.022, 0.129, 0.439)
11 | # Blues => cmap(0.87)
12 | # GEN_SMPL = body_material(0.035, 0.322, 0.615)
13 | # Oranges => cmap(0.87)
14 | GEN_SMPL = body_material(0.658, 0.214, 0.0114)
15 | 
16 | 
17 | class Meshes:
18 |     def __init__(self, data, *, gt, mode, faces_path, canonicalize, always_on_floor, oldrender=True, is_smplx=False, **kwargs):
19 |         data = prepare_meshes(data, canonicalize=canonicalize,
20 |                               always_on_floor=always_on_floor,
21 |                               is_smplx=is_smplx)
22 |         
23 |         if isinstance(faces_path, str):
24 |             self.faces = np.load(faces_path)
25 |         else:
26 |             self.faces = faces_path
27 | 
28 |         self.data = data
29 |         self.mode = mode
30 |         self.oldrender = oldrender
31 | 
32 |         self.N = len(data)
33 |         self.trajectory = data[:, :, [0, 1]].mean(1)
34 | 
35 |         if gt:
36 |             self.mat = GT_SMPL
37 |         else:
38 |             self.mat = GEN_SMPL
39 | 
40 |     def get_sequence_mat(self, frac):
41 |         import matplotlib
42 |         # cmap = matplotlib.cm.get_cmap('Blues')
43 |         cmap = matplotlib.cm.get_cmap('Oranges')
44 |         # begin = 0.60
45 |         # end = 0.90
46 |         begin = 0.50
47 |         end = 0.90
48 |         rgbcolor = cmap(begin + (end-begin)*frac)
49 |         mat = body_material(*rgbcolor, oldrender=self.oldrender)
50 |         return mat
51 | 
52 |     def get_root(self, index):
53 |         return self.data[index].mean(0)
54 | 
55 |     def get_mean_root(self):
56 |         return self.data.mean((0, 1))
57 | 
58 |     def load_in_blender(self, index, mat):
59 |         vertices = self.data[index]
60 |         faces = self.faces
61 |         name = f"{str(index).zfill(4)}"
62 | 
63 |         from .tools import load_numpy_vertices_into_blender
64 |         load_numpy_vertices_into_blender(vertices, faces, name, mat)
65 | 
66 |         return name
67 | 
68 |     def __len__(self):
69 |         return self.N
70 | 
71 | 
72 | def prepare_meshes(data, canonicalize=True, always_on_floor=False, is_smplx=False):
73 |     if canonicalize:
74 |         print("No canonicalization for now")
75 | 
76 |     # fitted mesh do not need fixing axis
77 |     # fix axis
78 |     if is_smplx:
79 |         data[..., 1] = - data[..., 1]
80 |     # data[..., 0] = - data[..., 0]
81 | 
82 | 
83 |     # Swap axis (gravity=Z instead of Y)
84 |     data = data[..., [2, 0, 1]]
85 | 
86 |     # Remove the floor
87 |     data[..., 2] -= data[..., 2].min()
88 | 
89 |     # Put all the body on the floor
90 |     if always_on_floor:
91 |         data[..., 2] -= data[..., 2].min(1)[:, None]
92 | 
93 |     return data
94 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/utils/word_vectorizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | from os.path import join as pjoin
 4 | 
 5 | POS_enumerator = {
 6 |     'VERB': 0,
 7 |     'NOUN': 1,
 8 |     'DET': 2,
 9 |     'ADP': 3,
10 |     'NUM': 4,
11 |     'AUX': 5,
12 |     'PRON': 6,
13 |     'ADJ': 7,
14 |     'ADV': 8,
15 |     'Loc_VIP': 9,
16 |     'Body_VIP': 10,
17 |     'Obj_VIP': 11,
18 |     'Act_VIP': 12,
19 |     'Desc_VIP': 13,
20 |     'OTHER': 14,
21 | }
22 | 
23 | Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward',
24 |             'up', 'down', 'straight', 'curve')
25 | 
26 | Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh')
27 | 
28 | Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball')
29 | 
30 | Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn',
31 |             'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll',
32 |             'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb')
33 | 
34 | Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily', 'angrily', 'sadly')
35 | 
36 | VIP_dict = {
37 |     'Loc_VIP': Loc_list,
38 |     'Body_VIP': Body_list,
39 |     'Obj_VIP': Obj_List,
40 |     'Act_VIP': Act_list,
41 |     'Desc_VIP': Desc_list,
42 | }
43 | 
44 | 
45 | class WordVectorizer(object):
46 |     def __init__(self, meta_root, prefix):
47 |         vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
48 |         words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
49 |         word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
50 |         self.word2vec = {w: vectors[word2idx[w]] for w in words}
51 | 
52 |     def _get_pos_ohot(self, pos):
53 |         pos_vec = np.zeros(len(POS_enumerator))
54 |         if pos in POS_enumerator:
55 |             pos_vec[POS_enumerator[pos]] = 1
56 |         else:
57 |             pos_vec[POS_enumerator['OTHER']] = 1
58 |         return pos_vec
59 | 
60 |     def __len__(self):
61 |         return len(self.word2vec)
62 | 
63 |     def __getitem__(self, item):
64 |         word, pos = item.split('/')
65 |         if word in self.word2vec:
66 |             word_vec = self.word2vec[word]
67 |             vip_pos = None
68 |             for key, values in VIP_dict.items():
69 |                 if word in values:
70 |                     vip_pos = key
71 |                     break
72 |             if vip_pos is not None:
73 |                 pos_vec = self._get_pos_ohot(vip_pos)
74 |             else:
75 |                 pos_vec = self._get_pos_ohot(pos)
76 |         else:
77 |             word_vec = self.word2vec['unk']
78 |             pos_vec = self._get_pos_ohot('OTHER')
79 |         return word_vec, pos_vec
80 | 


--------------------------------------------------------------------------------
/mGPT/archs/tools/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | class nonlinearity(nn.Module):
 5 |     def __init__(self):
 6 |         super().__init__()
 7 | 
 8 |     def forward(self, x):
 9 |         # swish
10 |         return x * torch.sigmoid(x)
11 | 
12 | class ResConv1DBlock(nn.Module):
13 |     def __init__(self, n_in, n_state, dilation=1, activation='silu', norm=None, dropout=None):
14 |         super().__init__()
15 |         padding = dilation
16 |         self.norm = norm
17 |         if norm == "LN":
18 |             self.norm1 = nn.LayerNorm(n_in)
19 |             self.norm2 = nn.LayerNorm(n_in)
20 |         elif norm == "GN":
21 |             self.norm1 = nn.GroupNorm(num_groups=32, num_channels=n_in, eps=1e-6, affine=True)
22 |             self.norm2 = nn.GroupNorm(num_groups=32, num_channels=n_in, eps=1e-6, affine=True)
23 |         elif norm == "BN":
24 |             self.norm1 = nn.BatchNorm1d(num_features=n_in, eps=1e-6, affine=True)
25 |             self.norm2 = nn.BatchNorm1d(num_features=n_in, eps=1e-6, affine=True)
26 |         
27 |         else:
28 |             self.norm1 = nn.Identity()
29 |             self.norm2 = nn.Identity()
30 | 
31 |         if activation == "relu":
32 |             self.activation1 = nn.ReLU()
33 |             self.activation2 = nn.ReLU()
34 |             
35 |         elif activation == "silu":
36 |             self.activation1 = nonlinearity()
37 |             self.activation2 = nonlinearity()
38 |             
39 |         elif activation == "gelu":
40 |             self.activation1 = nn.GELU()
41 |             self.activation2 = nn.GELU()
42 |             
43 |         
44 | 
45 |         self.conv1 = nn.Conv1d(n_in, n_state, 3, 1, padding, dilation)
46 |         self.conv2 = nn.Conv1d(n_state, n_in, 1, 1, 0,)     
47 | 
48 | 
49 |     def forward(self, x):
50 |         x_orig = x
51 |         if self.norm == "LN":
52 |             x = self.norm1(x.transpose(-2, -1))
53 |             x = self.activation1(x.transpose(-2, -1))
54 |         else:
55 |             x = self.norm1(x)
56 |             x = self.activation1(x)
57 |             
58 |         x = self.conv1(x)
59 | 
60 |         if self.norm == "LN":
61 |             x = self.norm2(x.transpose(-2, -1))
62 |             x = self.activation2(x.transpose(-2, -1))
63 |         else:
64 |             x = self.norm2(x)
65 |             x = self.activation2(x)
66 | 
67 |         x = self.conv2(x)
68 |         x = x + x_orig
69 |         return x
70 | 
71 | class Resnet1D(nn.Module):
72 |     def __init__(self, n_in, n_depth, dilation_growth_rate=1, reverse_dilation=True, activation='relu', norm=None):
73 |         super().__init__()
74 |         
75 |         blocks = [ResConv1DBlock(n_in, n_in, dilation=dilation_growth_rate ** depth, activation=activation, norm=norm) for depth in range(n_depth)]
76 |         if reverse_dilation:
77 |             blocks = blocks[::-1]
78 |         
79 |         self.model = nn.Sequential(*blocks)
80 | 
81 |     def forward(self, x):        
82 |         return self.model(x)


--------------------------------------------------------------------------------
/mGPT/utils/demo_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | # load example data
 6 | def load_example_input(txt_path):
 7 |     file = open(txt_path, "r")
 8 |     Lines = file.readlines()
 9 |     count = 0
10 |     texts, lens = [], []
11 |     # Strips the newline character
12 |     for line in Lines:
13 |         count += 1
14 |         s = line.strip()
15 |         s_l = s.split(" ")[0]
16 |         s_t = s[(len(s_l) + 1):]
17 |         lens.append(int(s_l))
18 |         texts.append(s_t)
19 |         print("Length-{}: {}".format(s_l, s_t))
20 |     return texts, lens
21 | 
22 | 
23 | # render batch
24 | def render_batch(npy_dir, execute_python="./scripts/visualize_motion.sh", mode="sequence"):
25 |     os.system(f"{execute_python} {npy_dir} {mode}")
26 | 
27 | 
28 | # render
29 | def render(execute_python, npy_path, jointtype, cfg_path):
30 |     # execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender"
31 |     # execute_python = "/apdcephfs/share_1227775/mingzhenzhu/jiangbiao/libs/blender-2.93.2-linux-x64/blender"
32 |     export_scripts = "render.py"
33 | 
34 |     os.system(
35 |         f"{execute_python} --background --python {export_scripts} -- --cfg={cfg_path} --npy={npy_path}"
36 |     )
37 | 
38 |     fig_path = Path(str(npy_path).replace(".npy", ".png"))
39 |     return fig_path
40 | 
41 | 
42 | # origin render
43 | # def render(npy_path, jointtype):
44 | #     execute_python = '/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender'
45 | #     export_scripts = 'render.py'
46 | 
47 | #     os.system(f"{execute_python} --background --python {export_scripts} -- npy={npy_path} jointstype={jointtype}")
48 | 
49 | #     fig_path = Path(str(npy_path).replace(".npy",".png"))
50 | #     return fig_path
51 | 
52 | # export fbx with hand params from pkl files
53 | # refer to /apdcephfs/share_1227775/shingxchen/AIMotion/TMOST/scripts/fbx_output_smplx.py
54 | def export_fbx_hand(pkl_path):
55 |     input = pkl_path
56 |     output = pkl_path.replace(".pkl", ".fbx")
57 | 
58 |     execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender"
59 |     export_scripts = "./scripts/fbx_output_smplx.py"
60 |     os.system(
61 |         f"{execute_python} -noaudio --background --python {export_scripts}\
62 |                 --input {input} \
63 |                 --output {output}"
64 |     )
65 | 
66 | 
67 | # export fbx without hand params from pkl files
68 | # refer to /apdcephfs/share_1227775/shingxchen/AIMotion/TMOST/scripts/fbx_output.py
69 | def export_fbx(pkl_path):
70 |     input = pkl_path
71 |     output = pkl_path.replace(".pkl", ".fbx")
72 | 
73 |     execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender"
74 |     export_scripts = "./scripts/fbx_output.py"
75 |     os.system(
76 |         f"{execute_python} -noaudio --background --python {export_scripts}\
77 |                 --input {input} \
78 |                 --output {output}"
79 |     )
80 | 


--------------------------------------------------------------------------------
/mGPT/utils/easyconvert.py:
--------------------------------------------------------------------------------
 1 | from .geometry_tools import *
 2 | 
 3 | 
 4 | def rep_to_rep(oldtype, newtype, rotations):
 5 |     if newtype in ["matrix"]:
 6 |         return to_matrix(oldtype, rotations)
 7 |     
 8 |     if oldtype in ["rotvec", "axisangle"]:
 9 |         return axis_angle_to(newtype, rotations)
10 |     elif oldtype in ["matrix"]:
11 |         return matrix_to(newtype, rotations)
12 |     else:
13 |         raise NotImplementedError("Only rotvec and matrix are supported.")
14 | 
15 | def nfeats_of(rottype):
16 |     if rottype in ["rotvec", "axisangle"]:
17 |         return 3
18 |     elif rottype in ["rotquat", "quaternion"]:
19 |         return 4
20 |     elif rottype in ["rot6d", "6drot", "rotation6d"]:
21 |         return 6
22 |     elif rottype in ["rotmat"]:
23 |         return 9
24 |     else:
25 |         return TypeError("This rotation type doesn't have features.")
26 | 
27 | 
28 | def axis_angle_to(newtype, rotations):
29 |     if newtype in ["matrix"]:
30 |         rotations = axis_angle_to_matrix(rotations)
31 |         return rotations
32 |     elif newtype in ["rotmat"]:
33 |         rotations = axis_angle_to_matrix(rotations)
34 |         rotations = matrix_to("rotmat", rotations)
35 |         return rotations
36 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
37 |         rotations = axis_angle_to_matrix(rotations)
38 |         rotations = matrix_to("rot6d", rotations)
39 |         return rotations
40 |     elif newtype in ["rotquat", "quaternion"]:
41 |         rotations = axis_angle_to_quaternion(rotations)
42 |         return rotations
43 |     elif newtype in ["rotvec", "axisangle"]:
44 |         return rotations
45 |     else:
46 |         raise NotImplementedError
47 | 
48 | 
49 | def matrix_to(newtype, rotations):
50 |     if newtype in ["matrix"]:
51 |         return rotations
52 |     if newtype in ["rotmat"]:
53 |         rotations = rotations.reshape((*rotations.shape[:-2], 9))
54 |         return rotations
55 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
56 |         rotations = matrix_to_rotation_6d(rotations)
57 |         return rotations
58 |     elif newtype in ["rotquat", "quaternion"]:
59 |         rotations = matrix_to_quaternion(rotations)
60 |         return rotations
61 |     elif newtype in ["rotvec", "axisangle"]:
62 |         rotations = matrix_to_axis_angle(rotations)
63 |         return rotations
64 |     else:
65 |         raise NotImplementedError
66 | 
67 | 
68 | def to_matrix(oldtype, rotations):
69 |     if oldtype in ["matrix"]:
70 |         return rotations
71 |     if oldtype in ["rotmat"]:
72 |         rotations = rotations.reshape((*rotations.shape[:-2], 3, 3))
73 |         return rotations
74 |     elif oldtype in ["rot6d", "6drot", "rotation6d"]:
75 |         rotations = rotation_6d_to_matrix(rotations)
76 |         return rotations
77 |     elif oldtype in ["rotquat", "quaternion"]:
78 |         rotations = quaternion_to_matrix(rotations)
79 |         return rotations
80 |     elif oldtype in ["rotvec", "axisangle"]:
81 |         rotations = axis_angle_to_matrix(rotations)
82 |         return rotations
83 |     else:
84 |         raise NotImplementedError
85 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/floor.py:
--------------------------------------------------------------------------------
 1 | import bpy
 2 | from .materials import floor_mat
 3 | 
 4 | 
 5 | def get_trajectory(data, is_mesh):
 6 |     if is_mesh:
 7 |         # mean of the vertices
 8 |         trajectory = data[:, :, [0, 1]].mean(1)
 9 |     else:
10 |         # get the root joint
11 |         trajectory = data[:, 0, [0, 1]]
12 |     return trajectory
13 | 
14 | 
15 | def plot_floor(data, big_plane=True):
16 |     # Create a floor
17 |     minx, miny, _ = data.min(axis=(0, 1))
18 |     maxx, maxy, _ = data.max(axis=(0, 1))
19 |     minz = 0
20 | 
21 |     location = ((maxx + minx)/2, (maxy + miny)/2, 0)
22 |     # a little bit bigger
23 |     scale = (1.08*(maxx - minx)/2, 1.08*(maxy - miny)/2, 1)
24 | 
25 |     bpy.ops.mesh.primitive_plane_add(size=2, enter_editmode=False, align='WORLD', location=location, scale=(1, 1, 1))
26 | 
27 |     bpy.ops.transform.resize(value=scale, orient_type='GLOBAL', orient_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)), orient_matrix_type='GLOBAL',
28 |                              constraint_axis=(False, True, False), mirror=True, use_proportional_edit=False,
29 |                              proportional_edit_falloff='SMOOTH', proportional_size=1, use_proportional_connected=False,
30 |                              use_proportional_projected=False, release_confirm=True)
31 |     obj = bpy.data.objects["Plane"]
32 |     obj.name = "SmallPlane"
33 |     obj.data.name = "SmallPlane"
34 | 
35 |     if not big_plane:
36 |         obj.active_material = floor_mat(color=(0.2, 0.2, 0.2, 1))
37 |     else:
38 |         obj.active_material = floor_mat(color=(0.1, 0.1, 0.1, 1))
39 | 
40 |     if big_plane:
41 |         location = ((maxx + minx)/2, (maxy + miny)/2, -0.01)
42 |         bpy.ops.mesh.primitive_plane_add(size=2, enter_editmode=False, align='WORLD', location=location, scale=(1, 1, 1))
43 | 
44 |         bpy.ops.transform.resize(value=[2*x for x in scale], orient_type='GLOBAL', orient_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)), orient_matrix_type='GLOBAL',
45 |                                  constraint_axis=(False, True, False), mirror=True, use_proportional_edit=False,
46 |                                  proportional_edit_falloff='SMOOTH', proportional_size=1, use_proportional_connected=False,
47 |                                  use_proportional_projected=False, release_confirm=True)
48 | 
49 |         obj = bpy.data.objects["Plane"]
50 |         obj.name = "BigPlane"
51 |         obj.data.name = "BigPlane"
52 |         obj.active_material = floor_mat(color=(0.2, 0.2, 0.2, 1))
53 | 
54 | 
55 | def show_traj(coords):
56 |     pass
57 |     # create the Curve Datablock
58 |     # curveData = bpy.data.curves.new('myCurve', type='CURVE')
59 |     # curveData.dimensions = '3D'
60 |     # curveData.resolution_u = 2
61 | 
62 |     # # map coords to spline
63 |     # polyline = curveData.splines.new('POLY')
64 |     # polyline.points.add(len(coords)-1)
65 |     # for i, coord in enumerate(coords):
66 |     #     x, y = coord
67 |     #     polyline.points[i].co = (x, y, 0.001, 1)
68 | 
69 |     # # create Object
70 |     # curveOB = bpy.data.objects.new('myCurve', curveData)
71 |     # curveData.bevel_depth = 0.01
72 | 
73 |     # bpy.context.collection.objects.link(curveOB)
74 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import torch
 4 | import pytorch_lightning as pl
 5 | from omegaconf import OmegaConf
 6 | from mGPT.callback import build_callbacks
 7 | from mGPT.config import parse_args, instantiate_from_config
 8 | from mGPT.data.build_data import build_data
 9 | from mGPT.models.build_model import build_model
10 | from mGPT.utils.logger import create_logger
11 | from mGPT.utils.load_checkpoint import load_pretrained, load_pretrained_vae
12 | 
13 | def main():
14 |     # Configs
15 |     cfg = parse_args(phase="train")  # parse config file
16 | 
17 |     # Logger
18 |     logger = create_logger(cfg, phase="train")  # create logger
19 |     logger.info(OmegaConf.to_yaml(cfg))  # print config file
20 | 
21 |     # Seed
22 |     pl.seed_everything(cfg.SEED_VALUE)
23 | 
24 |     # Environment Variables
25 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
26 | 
27 |     # Metric Logger
28 |     pl_loggers = []
29 |     for loggerName in cfg.LOGGER.TYPE:
30 |         if loggerName == 'tenosrboard' or cfg.LOGGER.WANDB.params.project:
31 |             pl_logger = instantiate_from_config(
32 |                 eval(f'cfg.LOGGER.{loggerName.upper()}'))
33 |             pl_loggers.append(pl_logger)
34 | 
35 |     # Callbacks
36 |     callbacks = build_callbacks(cfg, logger=logger, phase='train')
37 |     logger.info("Callbacks initialized")
38 | 
39 |     # Dataset
40 |     datamodule = build_data(cfg)
41 |     logger.info("datasets module {} initialized".format("".join(
42 |         cfg.DATASET.target.split('.')[-2])))
43 | 
44 |     # Model
45 |     model = build_model(cfg, datamodule)
46 |     logger.info("model {} loaded".format(cfg.model.target))
47 | 
48 |     # Lightning Trainer
49 |     trainer = pl.Trainer(
50 |         default_root_dir=cfg.FOLDER_EXP,
51 |         max_epochs=cfg.TRAIN.END_EPOCH,
52 |         # precision='16',
53 |         logger=pl_loggers,
54 |         callbacks=callbacks,
55 |         check_val_every_n_epoch=cfg.LOGGER.VAL_EVERY_STEPS,
56 |         accelerator=cfg.ACCELERATOR,
57 |         devices=cfg.DEVICE,
58 |         num_nodes=cfg.NUM_NODES,
59 |         strategy="ddp_find_unused_parameters_true"
60 |         if len(cfg.DEVICE) > 1 else 'auto',
61 |         benchmark=False,
62 |         deterministic=False,
63 |     )
64 |     logger.info("Trainer initialized")
65 | 
66 |     # Strict load pretrianed model
67 |     if cfg.TRAIN.PRETRAINED:
68 |         load_pretrained(cfg, model, logger)
69 | 
70 |     # Strict load vae model
71 |     if cfg.TRAIN.PRETRAINED_VAE:
72 |         load_pretrained_vae(cfg, model, logger)
73 | 
74 |     # Pytorch 2.0 Compile
75 |     # if torch.__version__ >= "2.0.0":
76 |     #     model = torch.compile(model, mode="reduce-overhead")
77 |     # model = torch.compile(model)
78 | 
79 |     # Lightning Fitting
80 |     if cfg.TRAIN.RESUME:
81 |         trainer.fit(model,
82 |                     datamodule=datamodule,
83 |                     ckpt_path=cfg.TRAIN.PRETRAINED)
84 |     else:
85 |         trainer.fit(model, datamodule=datamodule)
86 | 
87 |     # Training ends
88 |     logger.info(
89 |         f"The outputs of this experiment are stored in {cfg.FOLDER_EXP}")
90 |     logger.info("Training ends!")
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     main()
95 | 


--------------------------------------------------------------------------------
/scripts/get_code_visual.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pytorch_lightning as pl
 4 | import torch
 5 | from pathlib import Path
 6 | from tqdm import tqdm
 7 | from mGPT.config import parse_args
 8 | from mGPT.data.build_data import build_data
 9 | from mGPT.models.build_model import build_model
10 | from mGPT.utils.load_checkpoint import load_pretrained, load_pretrained_vae
11 | 
12 | 
13 | def main():
14 | 
15 |     # parse options
16 |     cfg = parse_args(phase="test")  # parse config file
17 |     cfg.TRAIN.STAGE = "token"
18 |     cfg.TRAIN.BATCH_SIZE = 1
19 | 
20 |     model_name = cfg.model.target.split('.')[-2].lower()
21 |     output_dir = Path(
22 |         os.path.join(cfg.FOLDER, model_name, cfg.NAME,
23 |                      "tokens_visual_" + cfg.TIME))
24 | 
25 |     # set seed
26 |     pl.seed_everything(cfg.SEED_VALUE)
27 | 
28 |     # gpu setting
29 |     if cfg.ACCELERATOR == "gpu":
30 |         os.environ["PYTHONWARNINGS"] = "ignore"
31 |         os.environ["TOKENIZERS_PARALLELISM"] = "false"
32 | 
33 |     # create dataset
34 |     datamodule = build_data(cfg, phase="test")
35 |     print("datasets module {} initialized".format("".join(cfg.TRAIN.DATASETS)))
36 | 
37 |     os.makedirs(output_dir, exist_ok=True)
38 | 
39 |     # create model
40 |     model = build_model(cfg, datamodule)
41 |     print("model {} loaded".format(cfg.model.target))
42 | 
43 |     # Strict load vae model
44 |     if cfg.TRAIN.PRETRAINED_VAE:
45 |         load_pretrained_vae(cfg, model)
46 | 
47 |     # loading state dict
48 |     if cfg.TEST.CHECKPOINTS:
49 |         load_pretrained(cfg, model, phase="test")
50 | 
51 |     if cfg.ACCELERATOR == "gpu":
52 |         model = model.cuda()
53 | 
54 |     model.eval()
55 |     codes = cfg.model.params.codebook_size
56 |     with torch.no_grad():
57 |         for i in tqdm(range(codes)):
58 | 
59 |             # Generate motion from token
60 |             m_token = torch.LongTensor(1, 1).fill_(i).to(model.device)
61 |             # vq_latent = model.vae.quantizer.dequantize(m_token)
62 |             gen_motion = model.vae.decode(m_token)
63 |             gen_motion = model.feats2joints(gen_motion).to('cpu').numpy()
64 | 
65 |             # Generate translation from token
66 |             texts = [
67 |                 f'Generate text: <motion_id_{codes}><motion_id_{i}><motion_id_{codes +1}>'
68 |             ]
69 |             # texts = [f'Use only one word to describe: <motion_id_{codes}><motion_id_{i}><motion_id_{codes +1}>']
70 |             batch = {"text": texts, "length": [0]}
71 | 
72 |             # out_text = model(batch)['texts']
73 |             # print(out_text)
74 |             # out_text_path = os.path.join(output_dir, f'{i}.txt')
75 |             # Path(out_text_path).parent.mkdir(parents=True, exist_ok=True)
76 |             # with open(out_text_path, 'w') as f:
77 |             #     f.write(out_text[0])
78 | 
79 |             target_path = os.path.join(output_dir, f'{i}.npy')
80 |             Path(target_path).parent.mkdir(parents=True, exist_ok=True)
81 | 
82 |             np.save(target_path, gen_motion)
83 | 
84 |     print(
85 |         f'Motion tokenization done, the motion tokens are saved to {output_dir}'
86 |     )
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     main()
91 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/joints2rots/config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from mGPT.utils.joints import mmm_joints, smplh2mmm_indexes
  3 | 
  4 | # Map joints Name to SMPL joints idx
  5 | JOINT_MAP = {
  6 |     'MidHip': 0,
  7 |     'LHip': 1,
  8 |     'LKnee': 4,
  9 |     'LAnkle': 7,
 10 |     'LFoot': 10,
 11 |     'RHip': 2,
 12 |     'RKnee': 5,
 13 |     'RAnkle': 8,
 14 |     'RFoot': 11,
 15 |     'LShoulder': 16,
 16 |     'LElbow': 18,
 17 |     'LWrist': 20,
 18 |     'LHand': 22,
 19 |     'RShoulder': 17,
 20 |     'RElbow': 19,
 21 |     'RWrist': 21,
 22 |     'RHand': 23,
 23 |     'spine1': 3,
 24 |     'spine2': 6,
 25 |     'spine3': 9,
 26 |     'Neck': 12,
 27 |     'Head': 15,
 28 |     'LCollar': 13,
 29 |     'Rcollar': 14,
 30 |     'Nose': 24,
 31 |     'REye': 26,
 32 |     'LEye': 26,
 33 |     'REar': 27,
 34 |     'LEar': 28,
 35 |     'LHeel': 31,
 36 |     'RHeel': 34,
 37 |     'OP RShoulder': 17,
 38 |     'OP LShoulder': 16,
 39 |     'OP RHip': 2,
 40 |     'OP LHip': 1,
 41 |     'OP Neck': 12,
 42 | }
 43 | 
 44 | mmm2smpl_correspondence = {
 45 |     "root": "MidHip",
 46 |     "BP": "spine1",
 47 |     "BT": "spine3",
 48 |     "BLN": "Neck",
 49 |     "BUN": "Head",
 50 |     "LS": "LShoulder",
 51 |     "LE": "LElbow",
 52 |     "LW": "LWrist",
 53 |     "RS": "RShoulder",
 54 |     "RE": "RElbow",
 55 |     "RW": "RWrist",
 56 |     "LH": "LHip",
 57 |     "LK": "LKnee",
 58 |     "LA": "LAnkle",
 59 |     "LMrot": "LHeel",
 60 |     "LF": "LFoot",
 61 |     "RH": "RHip",
 62 |     "RK": "RKnee",
 63 |     "RA": "RAnkle",
 64 |     "RMrot": "RHeel",
 65 |     "RF": "RFoot"
 66 | }
 67 | 
 68 | full_smpl_idx = range(24)
 69 | key_smpl_idx = [0, 1, 4, 7, 2, 5, 8, 17, 19, 21, 16, 18, 20]
 70 | 
 71 | AMASS_JOINT_MAP = {
 72 |     'MidHip': 0,
 73 |     'LHip': 1,
 74 |     'LKnee': 4,
 75 |     'LAnkle': 7,
 76 |     'LFoot': 10,
 77 |     'RHip': 2,
 78 |     'RKnee': 5,
 79 |     'RAnkle': 8,
 80 |     'RFoot': 11,
 81 |     'LShoulder': 16,
 82 |     'LElbow': 18,
 83 |     'LWrist': 20,
 84 |     'RShoulder': 17,
 85 |     'RElbow': 19,
 86 |     'RWrist': 21,
 87 |     'spine1': 3,
 88 |     'spine2': 6,
 89 |     'spine3': 9,
 90 |     'Neck': 12,
 91 |     'Head': 15,
 92 |     'LCollar': 13,
 93 |     'Rcollar': 14,
 94 | }
 95 | amass_idx = range(22)
 96 | amass_smpl_idx = range(22)
 97 | 
 98 | # cal mmm in smpl index
 99 | smpl2mmm_correspondence = {
100 |     val: key
101 |     for key, val in mmm2smpl_correspondence.items()
102 | }
103 | smpl2mmm_indexes = [JOINT_MAP[mmm2smpl_correspondence[x]] for x in mmm_joints]
104 | 
105 | # cal mmm joints map
106 | MMM_JOINT_MAP = {
107 |     val: JOINT_MAP[val]
108 |     for key, val in mmm2smpl_correspondence.items()
109 | }
110 | 
111 | # mmm_idx = range(21)
112 | # mmm_smpl_dix = smpl2mmm_indexes
113 | # mmm_smpl_dix = smplh2mmm_indexes
114 | # todo - configable
115 | SMPL_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/"
116 | GMM_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/"
117 | SMPL_MEAN_FILE = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/neutral_smpl_mean_params.h5"
118 | # for collsion
119 | Part_Seg_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/smplx_parts_segm.pkl"
120 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/dataset_t2m_eval.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from .dataset_t2m import Text2MotionDataset
 4 | 
 5 | 
 6 | class Text2MotionDatasetEval(Text2MotionDataset):
 7 | 
 8 |     def __init__(
 9 |         self,
10 |         data_root,
11 |         split,
12 |         mean,
13 |         std,
14 |         w_vectorizer,
15 |         max_motion_length=196,
16 |         min_motion_length=40,
17 |         unit_length=4,
18 |         fps=20,
19 |         tmpFile=True,
20 |         tiny=False,
21 |         debug=False,
22 |         **kwargs,
23 |     ):
24 |         super().__init__(data_root, split, mean, std, max_motion_length,
25 |                          min_motion_length, unit_length, fps, tmpFile, tiny,
26 |                          debug, **kwargs)
27 | 
28 |         self.w_vectorizer = w_vectorizer
29 | 
30 | 
31 |     def __getitem__(self, item):
32 |         # Get text data
33 |         idx = self.pointer + item
34 |         data = self.data_dict[self.name_list[idx]]
35 |         motion, m_length, text_list = data["motion"], data["length"], data["text"]
36 | 
37 |         all_captions = [
38 |             ' '.join([token.split('/')[0] for token in text_dic['tokens']])
39 |             for text_dic in text_list
40 |         ]
41 | 
42 |         if len(all_captions) > 3:
43 |             all_captions = all_captions[:3]
44 |         elif len(all_captions) == 2:
45 |             all_captions = all_captions + all_captions[0:1]
46 |         elif len(all_captions) == 1:
47 |             all_captions = all_captions * 3
48 | 
49 |         # Randomly select a caption
50 |         text_data = random.choice(text_list)
51 |         caption, tokens = text_data["caption"], text_data["tokens"]
52 | 
53 |         # Text
54 |         max_text_len = 20
55 |         if len(tokens) < max_text_len:
56 |             # pad with "unk"
57 |             tokens = ["sos/OTHER"] + tokens + ["eos/OTHER"]
58 |             sent_len = len(tokens)
59 |             tokens = tokens + ["unk/OTHER"] * (max_text_len + 2 - sent_len)
60 |         else:
61 |             # crop
62 |             tokens = tokens[:max_text_len]
63 |             tokens = ["sos/OTHER"] + tokens + ["eos/OTHER"]
64 |             sent_len = len(tokens)
65 |         pos_one_hots = []
66 |         word_embeddings = []
67 |         for token in tokens:
68 |             word_emb, pos_oh = self.w_vectorizer[token]
69 |             pos_one_hots.append(pos_oh[None, :])
70 |             word_embeddings.append(word_emb[None, :])
71 |         pos_one_hots = np.concatenate(pos_one_hots, axis=0)
72 |         word_embeddings = np.concatenate(word_embeddings, axis=0)
73 |         
74 |         # Random crop
75 |         if self.unit_length < 10:
76 |             coin2 = np.random.choice(["single", "single", "double"])
77 |         else:
78 |             coin2 = "single"
79 | 
80 |         if coin2 == "double":
81 |             m_length = (m_length // self.unit_length - 1) * self.unit_length
82 |         elif coin2 == "single":
83 |             m_length = (m_length // self.unit_length) * self.unit_length
84 | 
85 |         idx = random.randint(0, len(motion) - m_length)
86 |         motion = motion[idx:idx + m_length]
87 |         
88 |         # Z Normalization
89 |         motion = (motion - self.mean) / self.std
90 | 
91 |         return caption, motion, m_length, word_embeddings, pos_one_hots, sent_len, "_".join(
92 |             tokens), all_captions
93 | 


--------------------------------------------------------------------------------
/mGPT/metrics/m2m.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch import Tensor
 5 | from torchmetrics import Metric
 6 | 
 7 | from .utils import *
 8 | 
 9 | 
10 | # motion reconstruction metric
11 | class PredMetrics(Metric):
12 | 
13 |     def __init__(self,
14 |                  cfg,
15 |                  njoints: int = 22,
16 |                  jointstype: str = "mmm",
17 |                  force_in_meter: bool = True,
18 |                  align_root: bool = True,
19 |                  dist_sync_on_step=True,
20 |                  task: str = "pred",
21 |                  **kwargs):
22 |         super().__init__(dist_sync_on_step=dist_sync_on_step)
23 | 
24 |         self.name = 'Motion Prdiction'
25 |         self.cfg = cfg
26 |         self.jointstype = jointstype
27 |         self.align_root = align_root
28 |         self.task = task
29 |         self.force_in_meter = force_in_meter
30 | 
31 |         self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
32 |         self.add_state("count_seq",
33 |                        default=torch.tensor(0),
34 |                        dist_reduce_fx="sum")
35 | 
36 |         self.add_state("APD",
37 |                        default=torch.tensor([0.0]),
38 |                        dist_reduce_fx="sum")
39 |         self.add_state("ADE",
40 |                        default=torch.tensor([0.0]),
41 |                        dist_reduce_fx="sum")
42 |         self.add_state("FDE",
43 |                        default=torch.tensor([0.0]),
44 |                        dist_reduce_fx="sum")
45 | 
46 |         self.MR_metrics = ["APD", "ADE", "FDE"]
47 | 
48 |         # All metric
49 |         self.metrics = self.MR_metrics
50 | 
51 |     def compute(self, sanity_flag):
52 | 
53 |         count = self.count
54 |         count_seq = self.count_seq
55 |         mr_metrics = {}
56 |         mr_metrics["APD"] = self.APD / count_seq 
57 |         mr_metrics["ADE"] = self.ADE / count_seq
58 |         mr_metrics["FDE"] = self.FDE / count_seq
59 |         
60 |         # Reset
61 |         self.reset()
62 |         
63 |         return mr_metrics
64 | 
65 |     def update(self, joints_rst: Tensor, joints_ref: Tensor,
66 |                lengths: List[int]):
67 |         
68 |         assert joints_rst.shape == joints_ref.shape
69 |         assert joints_rst.dim() == 4
70 |         # (bs, seq, njoint=22, 3)
71 | 
72 |         self.count += sum(lengths)
73 |         self.count_seq += len(lengths)
74 | 
75 |         rst = torch.flatten(joints_rst, start_dim=2)
76 |         ref = torch.flatten(joints_ref, start_dim=2)
77 |         
78 |         for i, l in enumerate(lengths):
79 |             if self.task == "pred":
80 |                 pred_start = int(l*self.cfg.ABLATION.predict_ratio)
81 |                 diff = rst[i,pred_start:] - ref[i,pred_start:]
82 |             elif self.task == "inbetween":
83 |                 inbetween_start = int(l*self.cfg.ABLATION.inbetween_ratio)
84 |                 inbetween_end = l - int(l*self.cfg.ABLATION.inbetween_ratio)
85 |                 diff = rst[i,inbetween_start:inbetween_end] - ref[i,inbetween_start:inbetween_end]
86 |             else:
87 |                 print(f"Task {self.task} not implemented.")
88 |                 diff = rst - ref
89 |             
90 |             dist = torch.linalg.norm(diff, dim=-1)[None]
91 | 
92 |             ade = dist.mean(dim=1)
93 |             fde = dist[:,-1]
94 |             self.ADE = self.ADE + ade
95 |             self.FDE = self.FDE + fde
96 | 


--------------------------------------------------------------------------------
/mGPT/archs/tools/token_emb.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch import Tensor, nn
 3 | 
 4 | class NewTokenEmb(nn.Module):
 5 |     """
 6 |     For adding new tokens to a pretrained model
 7 |     """
 8 | 
 9 |     def __init__(self,
10 |                  old_embeddings: nn.Embedding,
11 |                  new_num_tokens: int = None) -> None:
12 | 
13 |         super().__init__()
14 | 
15 |         self.num_tokens = old_embeddings.num_embeddings + new_num_tokens
16 |         self.old_num_tokens = old_embeddings.num_embeddings
17 |         self.new_num_tokens = new_num_tokens
18 |         self.embedding_dim = old_embeddings.embedding_dim
19 | 
20 |         # For text embeddings
21 |         self.text_embeddings = nn.Embedding(
22 |             self.num_tokens,
23 |             self.embedding_dim,
24 |             device=old_embeddings.weight.device,
25 |             dtype=old_embeddings.weight.dtype)
26 |         with torch.no_grad():
27 |             self.text_embeddings.weight.data[:old_embeddings.
28 |                                              num_embeddings] = old_embeddings.weight.data
29 |             self.text_embeddings.weight.data[
30 |                 self.old_num_tokens:] = torch.zeros(
31 |                     self.new_num_tokens,
32 |                     self.embedding_dim,
33 |                     dtype=old_embeddings.weight.dtype,
34 |                     device=old_embeddings.weight.device)
35 |         self.text_embeddings.weight.requires_grad_(False)
36 | 
37 |         # For motion embeddings
38 |         self.motion_embeddings = nn.Embedding(
39 |             new_num_tokens,
40 |             self.embedding_dim,
41 |             device=old_embeddings.weight.device,
42 |             dtype=old_embeddings.weight.dtype)
43 |         with torch.no_grad():
44 |             self.motion_embeddings.weight.data[:self.
45 |                                                old_num_tokens] = torch.zeros(
46 |                                                    new_num_tokens,
47 |                                                    self.embedding_dim,
48 |                                                    dtype=old_embeddings.weight.
49 |                                                    dtype,
50 |                                                    device=old_embeddings.
51 |                                                    weight.device)
52 |         self.word2motionProj = nn.Linear(self.old_num_tokens, new_num_tokens)
53 | 
54 |     def forward(self, input: Tensor) -> Tensor:
55 | 
56 |         with torch.no_grad():
57 |             self.motion_embeddings.weight.data[:self.
58 |                                                old_num_tokens] = torch.zeros(
59 |                                                    self.new_num_tokens,
60 |                                                    self.embedding_dim,
61 |                                                    dtype=self.motion_embeddings
62 |                                                    .weight.dtype,
63 |                                                    device=self.
64 |                                                    motion_embeddings.weight.
65 |                                                    device)
66 | 
67 |         self.motion_embeddings.weight.data[
68 |             self.old_num_tokens:] = self.word2motionProj(
69 |                 self.text_embeddings.weight.data[:self.old_num_tokens].permute(
70 |                     1, 0)).permute(1, 0)
71 | 
72 |         return self.text_embeddings(input) + self.motion_embeddings(input)
73 | 
74 | 


--------------------------------------------------------------------------------
/mGPT/metrics/mr.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch import Tensor
 5 | from torchmetrics import Metric
 6 | 
 7 | from .utils import *
 8 | 
 9 | 
10 | # motion reconstruction metric
11 | class MRMetrics(Metric):
12 | 
13 |     def __init__(self,
14 |                  njoints,
15 |                  jointstype: str = "mmm",
16 |                  force_in_meter: bool = True,
17 |                  align_root: bool = True,
18 |                  dist_sync_on_step=True,
19 |                  **kwargs):
20 |         super().__init__(dist_sync_on_step=dist_sync_on_step)
21 | 
22 |         self.name = 'Motion Reconstructions'
23 |         self.jointstype = jointstype
24 |         self.align_root = align_root
25 |         self.force_in_meter = force_in_meter
26 | 
27 |         self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
28 |         self.add_state("count_seq",
29 |                        default=torch.tensor(0),
30 |                        dist_reduce_fx="sum")
31 | 
32 |         self.add_state("MPJPE",
33 |                        default=torch.tensor([0.0]),
34 |                        dist_reduce_fx="sum")
35 |         self.add_state("PAMPJPE",
36 |                        default=torch.tensor([0.0]),
37 |                        dist_reduce_fx="sum")
38 |         self.add_state("ACCEL",
39 |                        default=torch.tensor([0.0]),
40 |                        dist_reduce_fx="sum")
41 |         # todo
42 |         # self.add_state("ROOT", default=torch.tensor([0.0]), dist_reduce_fx="sum")
43 | 
44 |         self.MR_metrics = ["MPJPE", "PAMPJPE", "ACCEL"]
45 | 
46 |         # All metric
47 |         self.metrics = self.MR_metrics
48 | 
49 |     def compute(self, sanity_flag):
50 |         if self.force_in_meter:
51 |             # different jointstypes have different scale factors
52 |             # if self.jointstype == 'mmm':
53 |             #     factor = 1000.0
54 |             # elif self.jointstype == 'humanml3d':
55 |             #     factor = 1000.0 * 0.75 / 480
56 |             factor = 1000.0
57 |         else:
58 |             factor = 1.0
59 | 
60 |         count = self.count
61 |         count_seq = self.count_seq
62 |         mr_metrics = {}
63 |         mr_metrics["MPJPE"] = self.MPJPE / count * factor
64 |         mr_metrics["PAMPJPE"] = self.PAMPJPE / count * factor
65 |         # accel error: joints_gt[:-2] - 2 * joints_gt[1:-1] + joints_gt[2:]
66 |         # n-2 for each sequences
67 |         mr_metrics["ACCEL"] = self.ACCEL / (count - 2 * count_seq) * factor
68 |         
69 |         # Reset
70 |         self.reset()
71 |         
72 |         return mr_metrics
73 | 
74 |     def update(self, joints_rst: Tensor, joints_ref: Tensor,
75 |                lengths: List[int]):
76 |         assert joints_rst.shape == joints_ref.shape
77 |         assert joints_rst.dim() == 4
78 |         # (bs, seq, njoint=22, 3)
79 | 
80 |         self.count += sum(lengths)
81 |         self.count_seq += len(lengths)
82 | 
83 |         # avoid cuda error of DDP in pampjpe
84 |         rst = joints_rst.detach().cpu()
85 |         ref = joints_ref.detach().cpu()
86 | 
87 |         # align root joints index
88 |         if self.align_root and self.jointstype in ['mmm', 'humanml3d']:
89 |             align_inds = [0]
90 |         else:
91 |             align_inds = None
92 | 
93 |         for i in range(len(lengths)):
94 |             self.MPJPE += torch.sum(
95 |                 calc_mpjpe(rst[i], ref[i], align_inds=align_inds))
96 |             self.PAMPJPE += torch.sum(calc_pampjpe(rst[i], ref[i]))
97 |             self.ACCEL += torch.sum(calc_accel(rst[i], ref[i]))
98 | 


--------------------------------------------------------------------------------
/mGPT/losses/mgpt.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .base import BaseLosses
 4 | 
 5 | 
 6 | class CommitLoss(nn.Module):
 7 |     """
 8 |     Useless Wrapper
 9 |     """
10 |     def __init__(self, **kwargs):
11 |         super().__init__()
12 | 
13 |     def forward(self, commit, commit2, **kwargs):
14 |         return commit
15 | 
16 | 
17 | class GPTLosses(BaseLosses):
18 |     
19 |     def __init__(self, cfg, stage, num_joints, **kwargs):
20 |         # Save parameters
21 |         self.stage = stage
22 |         recons_loss = cfg.LOSS.ABLATION.RECONS_LOSS
23 | 
24 |         # Define losses
25 |         losses = []
26 |         params = {}
27 |         if stage == "vae":
28 |             losses.append("recons_feature")
29 |             params['recons_feature'] = cfg.LOSS.LAMBDA_FEATURE
30 | 
31 |             losses.append("recons_velocity")
32 |             params['recons_velocity'] = cfg.LOSS.LAMBDA_VELOCITY
33 | 
34 |             losses.append("vq_commit")
35 |             params['vq_commit'] = cfg.LOSS.LAMBDA_COMMIT
36 |         elif stage in ["lm_pretrain", "lm_instruct"]:
37 |             losses.append("gpt_loss")
38 |             params['gpt_loss'] = cfg.LOSS.LAMBDA_CLS
39 | 
40 |         # Define loss functions & weights
41 |         losses_func = {}
42 |         for loss in losses:
43 |             if loss.split('_')[0] == 'recons':
44 |                 if recons_loss == "l1":
45 |                     losses_func[loss] = nn.L1Loss
46 |                 elif recons_loss == "l2":
47 |                     losses_func[loss] = nn.MSELoss
48 |                 elif recons_loss == "l1_smooth":
49 |                     losses_func[loss] = nn.SmoothL1Loss
50 |             elif loss.split('_')[1] in [
51 |                     'commit', 'loss', 'gpt', 'm2t2m', 't2m2t'
52 |             ]:
53 |                 losses_func[loss] = CommitLoss
54 |             elif loss.split('_')[1] in ['cls', 'lm']:
55 |                 losses_func[loss] = nn.CrossEntropyLoss
56 |             else:
57 |                 raise NotImplementedError(f"Loss {loss} not implemented.")
58 | 
59 |         super().__init__(cfg, losses, params, losses_func, num_joints,
60 |                          **kwargs)
61 | 
62 |     def update(self, rs_set):
63 |         '''Update the losses'''
64 |         total: float = 0.0
65 | 
66 |         if self.stage in ["vae"]:
67 |             total += self._update_loss("recons_feature", rs_set['m_rst'],
68 |                                        rs_set['m_ref'])
69 |             # total += self._update_loss("recons_joints", rs_set['joints_rst'], rs_set['joints_ref'])
70 |             nfeats = rs_set['m_rst'].shape[-1]
71 |             if nfeats in [263, 135 + 263]:
72 |                 if nfeats == 135 + 263:
73 |                     vel_start = 135 + 4
74 |                 elif nfeats == 263:
75 |                     vel_start = 4
76 |                 total += self._update_loss(
77 |                     "recons_velocity",
78 |                     rs_set['m_rst'][..., vel_start:(self.num_joints - 1) * 3 +
79 |                                     vel_start],
80 |                     rs_set['m_ref'][..., vel_start:(self.num_joints - 1) * 3 +
81 |                                     vel_start])
82 |             else:
83 |                 if self._params['recons_velocity'] != 0.0:
84 |                     raise NotImplementedError(
85 |                         "Velocity not implemented for nfeats = {})".format(nfeats))
86 |             total += self._update_loss("vq_commit", rs_set['loss_commit'],
87 |                                        rs_set['loss_commit'])
88 | 
89 |         if self.stage in ["lm_pretrain", "lm_instruct"]:
90 |             total += self._update_loss("gpt_loss", rs_set['outputs'].loss,
91 |                                        rs_set['outputs'].loss)
92 | 
93 |         # Update the total loss
94 |         self.total += total.detach()
95 |         self.count += 1
96 | 
97 |         return total
98 | 


--------------------------------------------------------------------------------
/mGPT/render/blender/scene.py:
--------------------------------------------------------------------------------
  1 | import bpy
  2 | from .materials import plane_mat  # noqa
  3 | 
  4 | 
  5 | def setup_renderer(denoising=True, oldrender=True, accelerator="gpu", device=[0]):
  6 |     bpy.context.scene.render.engine = "CYCLES"
  7 |     bpy.data.scenes[0].render.engine = "CYCLES"
  8 |     if accelerator.lower() == "gpu":
  9 |         bpy.context.preferences.addons[
 10 |             "cycles"
 11 |         ].preferences.compute_device_type = "CUDA"
 12 |         bpy.context.scene.cycles.device = "GPU"
 13 |         i = 0
 14 |         bpy.context.preferences.addons["cycles"].preferences.get_devices()
 15 |         for d in bpy.context.preferences.addons["cycles"].preferences.devices:
 16 |             if i in device:  # gpu id
 17 |                 d["use"] = 1
 18 |                 print(d["name"], "".join(str(i) for i in device))
 19 |             else:
 20 |                 d["use"] = 0
 21 |             i += 1
 22 | 
 23 |     if denoising:
 24 |         bpy.context.scene.cycles.use_denoising = True
 25 | 
 26 |     try:
 27 |         bpy.context.scene.render.tile_x = 256
 28 |         bpy.context.scene.render.tile_y = 256
 29 |     except AttributeError as e:
 30 |         print(e)
 31 |         bpy.context.scene.cycles.tile_size = 256
 32 |     bpy.context.scene.cycles.samples = 64
 33 |     # bpy.context.scene.cycles.denoiser = 'OPTIX'
 34 | 
 35 |     if not oldrender:
 36 |         bpy.context.scene.view_settings.view_transform = "Standard"
 37 |         bpy.context.scene.render.film_transparent = True
 38 |         bpy.context.scene.display_settings.display_device = "sRGB"
 39 |         bpy.context.scene.view_settings.gamma = 1.2
 40 |         bpy.context.scene.view_settings.exposure = -0.75
 41 | 
 42 | 
 43 | # Setup scene
 44 | def setup_scene(
 45 |     res="high", denoising=True, oldrender=True, accelerator="gpu", device=[0]
 46 | ):
 47 |     scene = bpy.data.scenes["Scene"]
 48 |     assert res in ["ultra", "high", "med", "low"]
 49 |     if res == "high":
 50 |         scene.render.resolution_x = 1280
 51 |         scene.render.resolution_y = 1024
 52 |     elif res == "med":
 53 |         scene.render.resolution_x = 1280 // 2
 54 |         scene.render.resolution_y = 1024 // 2
 55 |     elif res == "low":
 56 |         scene.render.resolution_x = 1280 // 4
 57 |         scene.render.resolution_y = 1024 // 4
 58 |     elif res == "ultra":
 59 |         scene.render.resolution_x = 1280 * 2
 60 |         scene.render.resolution_y = 1024 * 2
 61 | 
 62 |     scene.render.film_transparent= True
 63 |     world = bpy.data.worlds["World"]
 64 |     world.use_nodes = True
 65 |     bg = world.node_tree.nodes["Background"]
 66 |     bg.inputs[0].default_value[:3] = (1.0, 1.0, 1.0)
 67 |     bg.inputs[1].default_value = 1.0
 68 | 
 69 |     # Remove default cube
 70 |     if "Cube" in bpy.data.objects:
 71 |         bpy.data.objects["Cube"].select_set(True)
 72 |         bpy.ops.object.delete()
 73 | 
 74 |     bpy.ops.object.light_add(
 75 |         type="SUN", align="WORLD", location=(0, 0, 0), scale=(1, 1, 1)
 76 |     )
 77 |     bpy.data.objects["Sun"].data.energy = 1.5
 78 | 
 79 |     # rotate camera
 80 |     bpy.ops.object.empty_add(
 81 |         type="PLAIN_AXES", align="WORLD", location=(0, 0, 0), scale=(1, 1, 1)
 82 |     )
 83 |     bpy.ops.transform.resize(
 84 |         value=(10, 10, 10),
 85 |         orient_type="GLOBAL",
 86 |         orient_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)),
 87 |         orient_matrix_type="GLOBAL",
 88 |         mirror=True,
 89 |         use_proportional_edit=False,
 90 |         proportional_edit_falloff="SMOOTH",
 91 |         proportional_size=1,
 92 |         use_proportional_connected=False,
 93 |         use_proportional_projected=False,
 94 |     )
 95 |     bpy.ops.object.select_all(action="DESELECT")
 96 | 
 97 |     setup_renderer(
 98 |         denoising=denoising, oldrender=oldrender, accelerator=accelerator, device=device
 99 |     )
100 |     return scene
101 | 


--------------------------------------------------------------------------------
/mGPT/data/Kit.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import os 
 4 | from os.path import join as pjoin
 5 | from .humanml.utils.word_vectorizer import WordVectorizer
 6 | from .humanml.scripts.motion_process import (process_file, recover_from_ric)
 7 | from .HumanML3D import HumanML3DDataModule
 8 | from .humanml import Text2MotionDatasetEval, Text2MotionDataset, Text2MotionDatasetCB, MotionDataset, MotionDatasetVQ, Text2MotionDatasetToken
 9 | 
10 | 
11 | class KitDataModule(HumanML3DDataModule):
12 |     def __init__(self, cfg, **kwargs):
13 | 
14 |         super().__init__(cfg, **kwargs)
15 | 
16 |         # Basic info of the dataset
17 |         self.name = "kit"
18 |         self.njoints = 21
19 | 
20 |         # Path to the dataset
21 |         data_root = cfg.DATASET.KIT.ROOT
22 |         self.hparams.data_root = data_root
23 |         self.hparams.text_dir = pjoin(data_root, "texts")
24 |         self.hparams.motion_dir = pjoin(data_root, 'new_joint_vecs')
25 | 
26 |         # Mean and std of the dataset
27 |         dis_data_root = pjoin(cfg.DATASET.KIT.MEAN_STD_PATH, 'kit',
28 |                               "VQVAEV3_CB1024_CMT_H1024_NRES3", "meta")
29 |         self.hparams.mean = np.load(pjoin(dis_data_root, "mean.npy"))
30 |         self.hparams.std = np.load(pjoin(dis_data_root, "std.npy"))
31 | 
32 |         # Mean and std for fair evaluation
33 |         dis_data_root_eval = pjoin(cfg.DATASET.KIT.MEAN_STD_PATH, 't2m',
34 |                                    "Comp_v6_KLD005", "meta")
35 |         self.hparams.mean_eval = np.load(pjoin(dis_data_root_eval, "mean.npy"))
36 |         self.hparams.std_eval = np.load(pjoin(dis_data_root_eval, "std.npy"))
37 | 
38 |         # Length of the dataset
39 |         self.hparams.max_motion_length = cfg.DATASET.KIT.MAX_MOTION_LEN
40 |         self.hparams.min_motion_length = cfg.DATASET.KIT.MIN_MOTION_LEN
41 |         self.hparams.max_text_len = cfg.DATASET.KIT.MAX_TEXT_LEN
42 |         self.hparams.unit_length = cfg.DATASET.KIT.UNIT_LEN
43 |         
44 |         # Get additional info of the dataset
45 |         self._sample_set = self.get_sample_set(overrides={"split": "test", "tiny": True})
46 |         self.nfeats = self._sample_set.nfeats
47 |         cfg.DATASET.NFEATS = self.nfeats
48 | 
49 |     def joints2feats(self, features):
50 |         example_data = np.load(os.path.join(self.hparams.data_root, 'joints', '03950_gt.npy'))
51 |         example_data = example_data.reshape(len(example_data), -1, 3)
52 |         example_data = torch.from_numpy(example_data)
53 |         features = process_file(features, self.njoints, example_data, 'kit')[0]
54 |         return features
55 | 
56 |     def normalize(self, features):
57 |         mean = torch.tensor(self.hparams.mean).to(features)
58 |         std = torch.tensor(self.hparams.std).to(features)
59 |         features = (features - mean) / std
60 |         return features
61 | 
62 |     def renorm4t2m(self, features):
63 |         # renorm to t2m norms for using t2m evaluators
64 |         ori_mean = torch.tensor(self.hparams.mean).to(features)
65 |         ori_std = torch.tensor(self.hparams.std).to(features)
66 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
67 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
68 |         features = features * ori_std + ori_mean
69 |         features = (features - eval_mean) / eval_std
70 |         return features
71 | 
72 |     def mm_mode(self, mm_on=True):
73 |         # random select samples for mm
74 |         if mm_on:
75 |             self.is_mm = True
76 |             self.name_list = self.test_dataset.name_list
77 |             self.mm_list = np.random.choice(self.name_list,
78 |                                             self.cfg.METRIC.MM_NUM_SAMPLES,
79 |                                             replace=False)
80 |             self.test_dataset.name_list = self.mm_list
81 |         else:
82 |             self.is_mm = False
83 |             self.test_dataset.name_list = self.name_list
84 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/joints2jfeats/tools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | import torch
18 | import torch.nn.functional as F
19 | 
20 | from mGPT.utils.joints import mmm_joints
21 | 
22 | # Get the indexes of particular body part SMPLH case
23 | # Feet
24 | # LM, RM = smplh_joints.index("left_ankle"), smplh_joints.index("right_ankle")
25 | # LF, RF = smplh_joints.index("left_foot"), smplh_joints.index("right_foot")
26 | # # Shoulders
27 | # LS, RS = smplh_joints.index("left_shoulder"), smplh_joints.index("right_shoulder")
28 | # # Hips
29 | # LH, RH = smplh_joints.index("left_hip"), smplh_joints.index("right_hip")
30 | 
31 | # Get the indexes of particular body part
32 | # Feet
33 | LM, RM = mmm_joints.index("LMrot"), mmm_joints.index("RMrot")
34 | LF, RF = mmm_joints.index("LF"), mmm_joints.index("RF")
35 | # Shoulders
36 | LS, RS = mmm_joints.index("LS"), mmm_joints.index("RS")
37 | # Hips
38 | LH, RH = mmm_joints.index("LH"), mmm_joints.index("RH")
39 | 
40 | 
41 | def get_forward_direction(poses, jointstype="mmm"):
42 |     # assert jointstype == 'mmm'
43 |     across = poses[..., RH, :] - poses[..., LH, :] + poses[..., RS, :] - poses[
44 |         ..., LS, :]
45 |     forward = torch.stack((-across[..., 2], across[..., 0]), axis=-1)
46 |     forward = torch.nn.functional.normalize(forward, dim=-1)
47 |     return forward
48 | 
49 | 
50 | def get_floor(poses, jointstype="mmm"):
51 |     # assert jointstype == 'mmm'
52 |     ndim = len(poses.shape)
53 |     foot_heights = poses[..., (LM, LF, RM, RF), 1].min(-1).values
54 |     floor_height = softmin(foot_heights, softness=0.5, dim=-1)
55 |     # changed this thing Mathis version 1.11 pytorch
56 |     return floor_height[(ndim - 2) * [None]].transpose(0, -1)
57 | 
58 | 
59 | def softmax(x, softness=1.0, dim=None):
60 |     maxi, mini = x.max(dim=dim).values, x.min(dim=dim).values
61 |     return maxi + torch.log(softness + torch.exp(mini - maxi))
62 | 
63 | 
64 | def softmin(x, softness=1.0, dim=0):
65 |     return -softmax(-x, softness=softness, dim=dim)
66 | 
67 | 
68 | def gaussian_filter1d(_inputs, sigma, truncate=4.0):
69 |     # Code adapted/mixed from scipy library into pytorch
70 |     # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L211
71 |     # and gaussian kernel
72 |     # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L179
73 |     # Correspond to mode="nearest" and order = 0
74 |     # But works batched
75 |     if len(_inputs.shape) == 2:
76 |         inputs = _inputs[None]
77 |     else:
78 |         inputs = _inputs
79 | 
80 |     sd = float(sigma)
81 |     radius = int(truncate * sd + 0.5)
82 |     sigma2 = sigma * sigma
83 |     x = torch.arange(-radius,
84 |                      radius + 1,
85 |                      device=inputs.device,
86 |                      dtype=inputs.dtype)
87 |     phi_x = torch.exp(-0.5 / sigma2 * x**2)
88 |     phi_x = phi_x / phi_x.sum()
89 | 
90 |     # Conv1d weights
91 |     groups = inputs.shape[-1]
92 |     weights = torch.tile(phi_x, (groups, 1, 1))
93 |     inputs = inputs.transpose(-1, -2)
94 |     outputs = F.conv1d(inputs, weights, padding="same",
95 |                        groups=groups).transpose(-1, -2)
96 | 
97 |     return outputs.reshape(_inputs.shape)
98 | 


--------------------------------------------------------------------------------
/mGPT/data/__init__.py:
--------------------------------------------------------------------------------
  1 | import pytorch_lightning as pl
  2 | from torch.utils.data import DataLoader
  3 | 
  4 | 
  5 | class BASEDataModule(pl.LightningDataModule):
  6 |     def __init__(self, collate_fn):
  7 |         super().__init__()
  8 | 
  9 |         self.dataloader_options = {"collate_fn": collate_fn}
 10 |         self.persistent_workers = True
 11 |         self.is_mm = False
 12 | 
 13 |         self._train_dataset = None
 14 |         self._val_dataset = None
 15 |         self._test_dataset = None
 16 | 
 17 |     def get_sample_set(self, overrides={}):
 18 |         sample_params = self.hparams.copy()
 19 |         sample_params.update(overrides)
 20 |         return self.DatasetEval(**sample_params)
 21 | 
 22 |     @property
 23 |     def train_dataset(self):
 24 |         if self._train_dataset is None:
 25 |             self._train_dataset = self.Dataset(split=self.cfg.TRAIN.SPLIT,
 26 |                                                **self.hparams)
 27 |         return self._train_dataset
 28 | 
 29 |     @property
 30 |     def val_dataset(self):
 31 |         if self._val_dataset is None:
 32 |             params = self.hparams.copy()
 33 |             params['code_path'] = None
 34 |             params['split'] = self.cfg.EVAL.SPLIT
 35 |             self._val_dataset = self.DatasetEval(**params)
 36 |         return self._val_dataset
 37 | 
 38 |     @property
 39 |     def test_dataset(self):
 40 |         if self._test_dataset is None:
 41 |             # self._test_dataset = self.DatasetEval(split=self.cfg.TEST.SPLIT,
 42 |             #                                       **self.hparams)
 43 |             params = self.hparams.copy()
 44 |             params['code_path'] = None
 45 |             params['split'] = self.cfg.TEST.SPLIT
 46 |             self._test_dataset = self.DatasetEval( **params)
 47 |         return self._test_dataset
 48 | 
 49 |     def setup(self, stage=None):
 50 |         # Use the getter the first time to load the data
 51 |         if stage in (None, "fit"):
 52 |             _ = self.train_dataset
 53 |             _ = self.val_dataset
 54 |         if stage in (None, "test"):
 55 |             _ = self.test_dataset
 56 | 
 57 |     def train_dataloader(self):
 58 |         dataloader_options = self.dataloader_options.copy()
 59 |         dataloader_options["batch_size"] = self.cfg.TRAIN.BATCH_SIZE
 60 |         dataloader_options["num_workers"] = self.cfg.TRAIN.NUM_WORKERS
 61 |         return DataLoader(
 62 |             self.train_dataset,
 63 |             shuffle=False,
 64 |             persistent_workers=True,
 65 |             **dataloader_options,
 66 |         )
 67 | 
 68 |     def predict_dataloader(self):
 69 |         dataloader_options = self.dataloader_options.copy()
 70 |         dataloader_options[
 71 |             "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
 72 |         dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
 73 |         dataloader_options["shuffle"] = False
 74 |         return DataLoader(
 75 |             self.test_dataset,
 76 |             persistent_workers=True,
 77 |             **dataloader_options,
 78 |         )
 79 | 
 80 |     def val_dataloader(self):
 81 |         # overrides batch_size and num_workers
 82 |         dataloader_options = self.dataloader_options.copy()
 83 |         dataloader_options["batch_size"] = self.cfg.EVAL.BATCH_SIZE
 84 |         dataloader_options["num_workers"] = self.cfg.EVAL.NUM_WORKERS
 85 |         dataloader_options["shuffle"] = False
 86 |         return DataLoader(
 87 |             self.val_dataset,
 88 |             persistent_workers=True,
 89 |             **dataloader_options,
 90 |         )
 91 | 
 92 |     def test_dataloader(self):
 93 |         # overrides batch_size and num_workers
 94 |         dataloader_options = self.dataloader_options.copy()
 95 |         dataloader_options[
 96 |             "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
 97 |         dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
 98 |         dataloader_options["shuffle"] = False
 99 |         return DataLoader(
100 |             self.test_dataset,
101 |             persistent_workers=True,
102 |             **dataloader_options,
103 |         )
104 | 


--------------------------------------------------------------------------------
/mGPT/archs/tm2t_evaluator.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn.utils.rnn import pack_padded_sequence
  4 | 
  5 | 
  6 | class MovementConvEncoder(nn.Module):
  7 |     def __init__(self, input_size, hidden_size, output_size):
  8 |         super(MovementConvEncoder, self).__init__()
  9 |         self.main = nn.Sequential(
 10 |             nn.Conv1d(input_size, hidden_size, 4, 2, 1),
 11 |             nn.Dropout(0.2, inplace=True),
 12 |             nn.LeakyReLU(0.2, inplace=True),
 13 |             nn.Conv1d(hidden_size, output_size, 4, 2, 1),
 14 |             nn.Dropout(0.2, inplace=True),
 15 |             nn.LeakyReLU(0.2, inplace=True),
 16 |         )
 17 |         self.out_net = nn.Linear(output_size, output_size)
 18 |         # self.main.apply(init_weight)
 19 |         # self.out_net.apply(init_weight)
 20 | 
 21 |     def forward(self, inputs):
 22 |         inputs = inputs.permute(0, 2, 1)
 23 |         outputs = self.main(inputs).permute(0, 2, 1)
 24 |         # print(outputs.shape)
 25 |         return self.out_net(outputs)
 26 | 
 27 | 
 28 | class MotionEncoderBiGRUCo(nn.Module):
 29 |     def __init__(self, input_size, hidden_size, output_size):
 30 |         super(MotionEncoderBiGRUCo, self).__init__()
 31 | 
 32 |         self.input_emb = nn.Linear(input_size, hidden_size)
 33 |         self.gru = nn.GRU(
 34 |             hidden_size, hidden_size, batch_first=True, bidirectional=True
 35 |         )
 36 |         self.output_net = nn.Sequential(
 37 |             nn.Linear(hidden_size * 2, hidden_size),
 38 |             nn.LayerNorm(hidden_size),
 39 |             nn.LeakyReLU(0.2, inplace=True),
 40 |             nn.Linear(hidden_size, output_size),
 41 |         )
 42 | 
 43 |         # self.input_emb.apply(init_weight)
 44 |         # self.output_net.apply(init_weight)
 45 |         self.hidden_size = hidden_size
 46 |         self.hidden = nn.Parameter(
 47 |             torch.randn((2, 1, self.hidden_size), requires_grad=True)
 48 |         )
 49 | 
 50 |     # input(batch_size, seq_len, dim)
 51 |     def forward(self, inputs, m_lens):
 52 |         num_samples = inputs.shape[0]
 53 | 
 54 |         input_embs = self.input_emb(inputs)
 55 |         hidden = self.hidden.repeat(1, num_samples, 1)
 56 | 
 57 |         cap_lens = m_lens.data.tolist()
 58 |         
 59 |         # emb = pack_padded_sequence(input=input_embs, lengths=cap_lens, batch_first=True)
 60 |         emb = input_embs
 61 | 
 62 |         gru_seq, gru_last = self.gru(emb, hidden)
 63 | 
 64 |         gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1)
 65 | 
 66 |         return self.output_net(gru_last)
 67 | 
 68 | 
 69 | class TextEncoderBiGRUCo(nn.Module):
 70 |     def __init__(self, word_size, pos_size, hidden_size, output_size):
 71 |         super(TextEncoderBiGRUCo, self).__init__()
 72 | 
 73 |         self.pos_emb = nn.Linear(pos_size, word_size)
 74 |         self.input_emb = nn.Linear(word_size, hidden_size)
 75 |         self.gru = nn.GRU(
 76 |             hidden_size, hidden_size, batch_first=True, bidirectional=True
 77 |         )
 78 |         self.output_net = nn.Sequential(
 79 |             nn.Linear(hidden_size * 2, hidden_size),
 80 |             nn.LayerNorm(hidden_size),
 81 |             nn.LeakyReLU(0.2, inplace=True),
 82 |             nn.Linear(hidden_size, output_size),
 83 |         )
 84 | 
 85 |         # self.input_emb.apply(init_weight)
 86 |         # self.pos_emb.apply(init_weight)
 87 |         # self.output_net.apply(init_weight)
 88 |         # self.linear2.apply(init_weight)
 89 |         # self.batch_size = batch_size
 90 |         self.hidden_size = hidden_size
 91 |         self.hidden = nn.Parameter(
 92 |             torch.randn((2, 1, self.hidden_size), requires_grad=True)
 93 |         )
 94 | 
 95 |     # input(batch_size, seq_len, dim)
 96 |     def forward(self, word_embs, pos_onehot, cap_lens):
 97 |         num_samples = word_embs.shape[0]
 98 | 
 99 |         pos_embs = self.pos_emb(pos_onehot)
100 |         inputs = word_embs + pos_embs
101 |         input_embs = self.input_emb(inputs)
102 |         hidden = self.hidden.repeat(1, num_samples, 1)
103 | 
104 |         cap_lens = cap_lens.data.tolist()
105 |         emb = pack_padded_sequence(input=input_embs, lengths=cap_lens, batch_first=True)
106 | 
107 |         gru_seq, gru_last = self.gru(emb, hidden)
108 | 
109 |         gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1)
110 | 
111 |         return self.output_net(gru_last)
112 | 


--------------------------------------------------------------------------------
/configs/default.yaml:
--------------------------------------------------------------------------------
  1 | SEED_VALUE: 1234 # Seed value
  2 | DEBUG: True # Debug mode
  3 | FULL_CONFIG: false
  4 | 
  5 | TRAIN:
  6 |   SPLIT: 'train' # Training split name
  7 |   NUM_WORKERS: 8 # Number of workers
  8 |   BATCH_SIZE: 8 # Size of batches
  9 |   END_EPOCH: 2000 # End epoch
 10 | 
 11 |   RESUME: '' # Experiment path to be resumed training
 12 |   PRETRAINED_VAE: '' # Pretrained vae/vqvae model path
 13 |   PRETRAINED: '' # Pretrained model path
 14 | 
 15 |   OPTIM:
 16 |     target: AdamW
 17 |     params:
 18 |       lr: 2e-4
 19 |       betas: [0.9, 0.99]
 20 |       weight_decay: 0.0
 21 | 
 22 |   LR_SCHEDULER:
 23 |     target: CosineAnnealingLR
 24 |     params:
 25 |       T_max: ${eval:${LOGGER.VAL_EVERY_STEPS} * 100}
 26 |       eta_min: 1e-6
 27 | 
 28 | EVAL:
 29 |   SPLIT: 'val' # Validation split name
 30 |   BATCH_SIZE: 16 # Validation Batch size
 31 |   NUM_WORKERS: 8 # Validation Batch size
 32 | 
 33 | TEST:
 34 |   CHECKPOINTS: '' # Pretrained model path
 35 |   SPLIT: 'test' # Testing split name
 36 |   BATCH_SIZE: 16 # Testing Batch size
 37 |   NUM_WORKERS: 8 # Testing Batch size
 38 | 
 39 |   SAVE_PREDICTIONS: False # Weather to save predictions
 40 |   COUNT_TIME: False # Weather to count time during test
 41 |   REPLICATION_TIMES: 20 # Number of times to replicate the test
 42 |   REP_I: 0 # For counting replication times
 43 | 
 44 | model:
 45 |   target: mGPT.models.mgpt.MotionGPT
 46 |   params:
 47 |     condition: 'text'
 48 |     task: 't2m'
 49 |     lm: ${lm.default}
 50 |     motion_vae: ${vq.default}
 51 | 
 52 |     # Related parameters
 53 |     stage: ${TRAIN.STAGE}
 54 |     debug: ${DEBUG}
 55 |     codebook_size: ${model.params.motion_vae.params.code_num}
 56 |     metrics_dict: ${METRIC.TYPE}
 57 | 
 58 | LOSS:
 59 |   LAMBDA_REC: 1.0 # Lambda for reconstruction losses
 60 |   LAMBDA_JOINT: 1.0 # Lambda for joint losses
 61 | 
 62 |   LAMBDA_LATENT: 1e-5 # Lambda for latent losses
 63 |   LAMBDA_KL: 1e-5 # Lambda for kl losses
 64 |   LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
 65 |   LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses
 66 |   LAMBDA_CYCLE: 1.0 # Lambda for cycle losses
 67 |   LAMBDA_PRIOR: 0.0 # Lambda for diffusion prior losses
 68 | 
 69 |   LAMBDA_VELOCITY: 0.5 # Lambda for velocity losses
 70 |   LAMBDA_COMMIT: 0.02 # Lambda for commitment losses
 71 | 
 72 |   ABLATION:
 73 |     RECONS_LOSS: 'l1_smooth'
 74 | 
 75 | METRIC:
 76 |   TASK: 't2m'
 77 |   FORCE_IN_METER: True
 78 |   DIST_SYNC_ON_STEP: True
 79 |   MM_NUM_SAMPLES: 100 # Number of samples for multimodal test
 80 |   MM_NUM_REPEATS: 30 # Number of repeats for multimodal test
 81 |   MM_NUM_TIMES: 10 # Number of times to repeat the multimodal test
 82 |   DIVERSITY_TIMES: 300 # Number of times to repeat the diversity test
 83 |   TM2T: ${evaluator.tm2t}
 84 | 
 85 | DATASET:
 86 |   target: mGPT.data.HumanML3D.HumanML3DDataModule
 87 |   CODE_PATH: 'VQVAE'
 88 |   TASK_ROOT: ''
 89 |   TASK_PATH: ''
 90 |   NFEATS: 263
 91 |   KIT:
 92 |     MAX_MOTION_LEN: 196
 93 |     MIN_MOTION_LEN: 24
 94 |     MAX_TEXT_LEN: 20
 95 |     PICK_ONE_TEXT: true
 96 |     FRAME_RATE: 12.5
 97 |     UNIT_LEN: 4
 98 |   HUMANML3D:
 99 |     MAX_MOTION_LEN: 196
100 |     MIN_MOTION_LEN: 40
101 |     MAX_TEXT_LEN: 20
102 |     PICK_ONE_TEXT: true
103 |     FRAME_RATE: 20.0
104 |     UNIT_LEN: 4
105 |     STD_TEXT: False
106 | 
107 | ABLATION:
108 |   # For MotionGPT
109 |   use_length: False
110 |   predict_ratio: 0.2
111 |   inbetween_ratio: 0.25
112 |   image_size: 256
113 | 
114 |   # For Motion-latent-diffusion
115 |   VAE_TYPE: 'actor' # vae ablation: actor or mcross
116 |   VAE_ARCH: 'encoder_decoder' # mdiffusion vae architecture
117 |   PE_TYPE: 'actor' # mdiffusion mld or actor
118 |   DIFF_PE_TYPE: 'actor' # mdiffusion mld or actor
119 |   SKIP_CONNECT: False # skip connection for denoiser va
120 |   MLP_DIST: False # use linear to expand mean and std rather expand token nums
121 |   IS_DIST: False # Mcross distribution kl
122 |   PREDICT_EPSILON: True # noise or motion
123 | 
124 | DEMO:
125 |   EXAMPLE: null
126 |   TASK: t2m
127 | 
128 | LOGGER:
129 |   VAL_EVERY_STEPS: 10
130 |   LOGGERS: ['tensorboard', 'wandb']
131 |   TENSORBOARD:
132 |     target: pytorch_lightning.loggers.TensorBoardLogger
133 |     params:
134 |       save_dir: ${FOLDER_EXP}
135 |       name: 'tensorboard'
136 |       version: ''
137 |   WANDB:
138 |     target: pytorch_lightning.loggers.WandbLogger
139 |     params:
140 |       project: null
141 |       offline: False
142 |       id: null
143 |       version: ''
144 |       name: ${NAME}
145 |       save_dir: ${FOLDER_EXP}
146 | 


--------------------------------------------------------------------------------
/mGPT/data/tools/collate.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | from typing import List, Dict
 18 | from torch import Tensor
 19 | 
 20 | 
 21 | def collate_tensor_with_padding(batch: List[Tensor]) -> Tensor:
 22 |     dims = batch[0].dim()
 23 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
 24 |     size = (len(batch),) + tuple(max_size)
 25 |     canvas = batch[0].new_zeros(size=size)
 26 |     for i, b in enumerate(batch):
 27 |         sub_tensor = canvas[i]
 28 |         for d in range(dims):
 29 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
 30 |         sub_tensor.add_(b)
 31 |     return canvas
 32 | 
 33 | 
 34 | def collate_datastruct_and_text(lst_elements: List) -> Dict:
 35 |     collate_datastruct = lst_elements[0]["datastruct"].transforms.collate
 36 | 
 37 |     batch = {
 38 |         # Collate with padding for the datastruct
 39 |         "datastruct": collate_datastruct([x["datastruct"] for x in lst_elements]),
 40 |         # Collate normally for the length
 41 |         "length": [x["length"] for x in lst_elements],
 42 |         # Collate the text
 43 |         "text": [x["text"] for x in lst_elements]}
 44 | 
 45 |     # add keyid for example
 46 |     otherkeys = [x for x in lst_elements[0].keys() if x not in batch]
 47 |     for key in otherkeys:
 48 |         batch[key] = [x[key] for x in lst_elements]
 49 | 
 50 |     return batch
 51 | 
 52 | def collate_length_and_text(lst_elements: List) -> Dict:
 53 | 
 54 |     batch = {
 55 |             "length_0": [x["length_0"] for x in lst_elements], 
 56 |             "length_1": [x["length_1"] for x in lst_elements], 
 57 |             "length_transition": [x["length_transition"] for x in lst_elements], 
 58 |             "length_1_with_transition": [x["length_1_with_transition"] for x in lst_elements],
 59 |             "text_0": [x["text_0"] for x in lst_elements],
 60 |             "text_1": [x["text_1"] for x in lst_elements]
 61 |     }
 62 | 
 63 |     return batch
 64 | 
 65 | def collate_pairs_and_text(lst_elements: List, ) -> Dict:
 66 |     if 'features_0' not in lst_elements[0]: # test set
 67 |         collate_datastruct = lst_elements[0]["datastruct"].transforms.collate
 68 |         batch = {"datastruct": collate_datastruct([x["datastruct"] for x in lst_elements]),
 69 |                 "length_0": [x["length_0"] for x in lst_elements], 
 70 |                 "length_1": [x["length_1"] for x in lst_elements], 
 71 |                 "length_transition": [x["length_transition"] for x in lst_elements], 
 72 |                 "length_1_with_transition": [x["length_1_with_transition"] for x in lst_elements],
 73 |                 "text_0": [x["text_0"] for x in lst_elements],
 74 |                 "text_1": [x["text_1"] for x in lst_elements]
 75 |         }
 76 | 
 77 |     else:
 78 |         batch = {"motion_feats_0": collate_tensor_with_padding([el["features_0"] for el in lst_elements]),
 79 |                 "motion_feats_1": collate_tensor_with_padding([el["features_1"] for el in lst_elements]),
 80 |                 "motion_feats_1_with_transition": collate_tensor_with_padding([el["features_1_with_transition"] for el in lst_elements]),
 81 |                 "length_0": [x["length_0"] for x in lst_elements], 
 82 |                 "length_1": [x["length_1"] for x in lst_elements], 
 83 |                 "length_transition": [x["length_transition"] for x in lst_elements], 
 84 |                 "length_1_with_transition": [x["length_1_with_transition"] for x in lst_elements],
 85 |                 "text_0": [x["text_0"] for x in lst_elements],
 86 |                 "text_1": [x["text_1"] for x in lst_elements]
 87 |                 }
 88 |     return batch
 89 | 
 90 | 
 91 | def collate_text_and_length(lst_elements: Dict) -> Dict:
 92 |     batch = {"length": [x["length"] for x in lst_elements],
 93 |              "text": [x["text"] for x in lst_elements]}
 94 | 
 95 |     # add keyid for example
 96 |     otherkeys = [x for x in lst_elements[0].keys() if x not in batch and x != "datastruct"]
 97 |     for key in otherkeys:
 98 |         batch[key] = [x[key] for x in lst_elements]
 99 |     return batch
100 | 


--------------------------------------------------------------------------------
/mGPT/data/humanml/dataset_t2m_m2t.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | from torch.utils import data
  4 | from .dataset_t2m import Text2MotionDataset
  5 | import codecs as cs
  6 | from os.path import join as pjoin
  7 | 
  8 | 
  9 | class Text2MotionDatasetM2T(data.Dataset):
 10 | 
 11 |     def __init__(
 12 |         self,
 13 |         data_root,
 14 |         split,
 15 |         mean,
 16 |         std,
 17 |         max_motion_length=196,
 18 |         min_motion_length=40,
 19 |         unit_length=4,
 20 |         fps=20,
 21 |         tmpFile=True,
 22 |         tiny=False,
 23 |         debug=False,
 24 |         **kwargs,
 25 |     ):
 26 |         
 27 |         self.max_motion_length = max_motion_length
 28 |         self.min_motion_length = min_motion_length
 29 |         self.unit_length = unit_length
 30 |         
 31 |         # Data mean and std
 32 |         self.mean = mean
 33 |         self.std = std
 34 |         
 35 |         # Data path
 36 |         split_file = pjoin(data_root, split + '.txt')
 37 |         motion_dir = pjoin(data_root, 'new_joint_vecs')
 38 |         text_dir = pjoin(data_root, 'texts')
 39 | 
 40 |         # Data id list
 41 |         self.id_list = []
 42 |         with cs.open(split_file, "r") as f:
 43 |             for line in f.readlines():
 44 |                 self.id_list.append(line.strip())
 45 |                 
 46 |         new_name_list = []
 47 |         length_list = []
 48 |         data_dict = {}
 49 |         for name in self.id_list:
 50 |             # try:
 51 |                 motion = np.load(pjoin(motion_dir, name + '.npy'))
 52 |                 if (len(motion)) <  self.min_motion_length or (len(motion) >= 200):
 53 |                     continue
 54 |                 
 55 |                         
 56 |                 text_data = []
 57 |                 flag = False
 58 |                 
 59 |                 with cs.open(pjoin(text_dir, name + '.txt')) as f:
 60 |                     for line in f.readlines():
 61 |                         text_dict = {}
 62 |                         line_split = line.strip().split('#')
 63 |                         caption = line_split[0]
 64 |                         tokens = line_split[1].split(' ')
 65 |                         f_tag = float(line_split[2])
 66 |                         to_tag = float(line_split[3])
 67 |                         f_tag = 0.0 if np.isnan(f_tag) else f_tag
 68 |                         to_tag = 0.0 if np.isnan(to_tag) else to_tag
 69 | 
 70 |                         text_dict['caption'] = caption
 71 |                         text_dict['tokens'] = tokens
 72 |                         if f_tag == 0.0 and to_tag == 0.0:
 73 |                             flag = True
 74 |                             text_data.append(text_dict)
 75 |                         else:
 76 |                             try:
 77 |                                 n_motion = motion[int(f_tag*20) : int(to_tag*20)]
 78 | 
 79 |                                 if (len(n_motion)) < min_motion_length or (len(n_motion) >= 200):
 80 |                                     continue
 81 |                                 
 82 |                                 new_name = "%s_%f_%f"%(name, f_tag, to_tag)
 83 |                                 data_dict[new_name] = {'motion': n_motion,
 84 |                                                        'length': len(n_motion),
 85 |                                                        'text':[text_dict]}
 86 |                                 new_name_list.append(new_name)
 87 |                             except:
 88 |                                 print(line_split)
 89 |                                 print(line_split[2], line_split[3], f_tag, to_tag, name)
 90 |                 if flag:
 91 |                     data_dict[name] = {'motion': motion,
 92 |                                     'length': len(motion),
 93 |                                     'name': name,
 94 |                                     'text': text_data}
 95 |                     
 96 |                     new_name_list.append(name)
 97 |                     length_list.append(len(motion))
 98 |             # except:
 99 |             #     # Some motion may not exist in KIT dataset
100 |             #     pass
101 | 
102 |         self.length_arr = np.array(length_list)
103 |         self.data_dict = data_dict
104 |         self.name_list = new_name_list
105 |         self.nfeats = motion.shape[-1]
106 |     
107 |     
108 |     def __len__(self):
109 |         return len(self.data_dict)  
110 |         
111 |     def __getitem__(self, item):
112 |         name = self.name_list[item]
113 |         data = self.data_dict[name]
114 |         motion, m_length = data['motion'], data['length']
115 | 
116 |         "Z Normalization"
117 |         motion = (motion - self.mean) / self.std
118 | 
119 |         return name, motion, m_length, True, True, True, True, True, True
120 | 


--------------------------------------------------------------------------------
/mGPT/utils/temos_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from torch import Tensor
  6 | 
  7 | import mGPT.utils.geometry_conver as geometry_conver
  8 | 
  9 | 
 10 | def lengths_to_mask(lengths: List[int],
 11 |                     device: torch.device,
 12 |                     max_len: int = None) -> Tensor:
 13 |     lengths = torch.tensor(lengths, device=device)
 14 |     max_len = max_len if max_len else max(lengths)
 15 |     mask = torch.arange(max_len, device=device).expand(
 16 |         len(lengths), max_len) < lengths.unsqueeze(1)
 17 |     return mask
 18 | 
 19 | 
 20 | def detach_to_numpy(tensor):
 21 |     return tensor.detach().cpu().numpy()
 22 | 
 23 | 
 24 | def remove_padding(tensors, lengths):
 25 |     return [
 26 |         tensor[:tensor_length]
 27 |         for tensor, tensor_length in zip(tensors, lengths)
 28 |     ]
 29 | 
 30 | 
 31 | def nfeats_of(rottype):
 32 |     if rottype in ["rotvec", "axisangle"]:
 33 |         return 3
 34 |     elif rottype in ["rotquat", "quaternion"]:
 35 |         return 4
 36 |     elif rottype in ["rot6d", "6drot", "rotation6d"]:
 37 |         return 6
 38 |     elif rottype in ["rotmat"]:
 39 |         return 9
 40 |     else:
 41 |         return TypeError("This rotation type doesn't have features.")
 42 | 
 43 | 
 44 | def axis_angle_to(newtype, rotations):
 45 |     if newtype in ["matrix"]:
 46 |         rotations = geometry_conver.axis_angle_to_matrix(rotations)
 47 |         return rotations
 48 |     elif newtype in ["rotmat"]:
 49 |         rotations = geometry_conver.axis_angle_to_matrix(rotations)
 50 |         rotations = matrix_to("rotmat", rotations)
 51 |         return rotations
 52 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
 53 |         rotations = geometry_conver.axis_angle_to_matrix(rotations)
 54 |         rotations = matrix_to("rot6d", rotations)
 55 |         return rotations
 56 |     elif newtype in ["rotquat", "quaternion"]:
 57 |         rotations = geometry_conver.axis_angle_to_quaternion(rotations)
 58 |         return rotations
 59 |     elif newtype in ["rotvec", "axisangle"]:
 60 |         return rotations
 61 |     else:
 62 |         raise NotImplementedError
 63 | 
 64 | 
 65 | def matrix_to(newtype, rotations):
 66 |     if newtype in ["matrix"]:
 67 |         return rotations
 68 |     if newtype in ["rotmat"]:
 69 |         rotations = rotations.reshape((*rotations.shape[:-2], 9))
 70 |         return rotations
 71 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
 72 |         rotations = geometry_conver.matrix_to_rotation_6d(rotations)
 73 |         return rotations
 74 |     elif newtype in ["rotquat", "quaternion"]:
 75 |         rotations = geometry_conver.matrix_to_quaternion(rotations)
 76 |         return rotations
 77 |     elif newtype in ["rotvec", "axisangle"]:
 78 |         rotations = geometry_conver.matrix_to_axis_angle(rotations)
 79 |         return rotations
 80 |     else:
 81 |         raise NotImplementedError
 82 | 
 83 | 
 84 | def to_matrix(oldtype, rotations):
 85 |     if oldtype in ["matrix"]:
 86 |         return rotations
 87 |     if oldtype in ["rotmat"]:
 88 |         rotations = rotations.reshape((*rotations.shape[:-2], 3, 3))
 89 |         return rotations
 90 |     elif oldtype in ["rot6d", "6drot", "rotation6d"]:
 91 |         rotations = geometry_conver.rotation_6d_to_matrix(rotations)
 92 |         return rotations
 93 |     elif oldtype in ["rotquat", "quaternion"]:
 94 |         rotations = geometry_conver.quaternion_to_matrix(rotations)
 95 |         return rotations
 96 |     elif oldtype in ["rotvec", "axisangle"]:
 97 |         rotations = geometry_conver.axis_angle_to_matrix(rotations)
 98 |         return rotations
 99 |     else:
100 |         raise NotImplementedError
101 | 
102 | 
103 | # TODO: use a real subsampler..
104 | def subsample(num_frames, last_framerate, new_framerate):
105 |     step = int(last_framerate / new_framerate)
106 |     assert step >= 1
107 |     frames = np.arange(0, num_frames, step)
108 |     return frames
109 | 
110 | 
111 | # TODO: use a real upsampler..
112 | def upsample(motion, last_framerate, new_framerate):
113 |     step = int(new_framerate / last_framerate)
114 |     assert step >= 1
115 | 
116 |     # Alpha blending => interpolation
117 |     alpha = np.linspace(0, 1, step + 1)
118 |     last = np.einsum("l,...->l...", 1 - alpha, motion[:-1])
119 |     new = np.einsum("l,...->l...", alpha, motion[1:])
120 | 
121 |     chuncks = (last + new)[:-1]
122 |     output = np.concatenate(chuncks.swapaxes(1, 0))
123 |     # Don't forget the last one
124 |     output = np.concatenate((output, motion[[-1]]))
125 |     return output
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     motion = np.arange(105)
130 |     submotion = motion[subsample(len(motion), 100.0, 12.5)]
131 |     newmotion = upsample(submotion, 12.5, 100)
132 | 
133 |     print(newmotion)
134 | 


--------------------------------------------------------------------------------
/mGPT/models/utils/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from mGPT.models.notused import AdaptiveInstanceNorm1d
  5 | 
  6 | 
  7 | class MLP(nn.Module):
  8 | 
  9 |     def __init__(self, cfg, out_dim, is_init):
 10 |         super(MLP, self).__init__()
 11 |         dims = cfg.MODEL.MOTION_DECODER.MLP_DIM
 12 |         n_blk = len(dims)
 13 |         norm = 'none'
 14 |         acti = 'lrelu'
 15 | 
 16 |         layers = []
 17 |         for i in range(n_blk - 1):
 18 |             layers += LinearBlock(dims[i], dims[i + 1], norm=norm, acti=acti)
 19 |         layers += LinearBlock(dims[-1], out_dim, norm='none', acti='none')
 20 |         self.model = nn.Sequential(*layers)
 21 | 
 22 |         if is_init:
 23 |             for m in self.modules():
 24 |                 if isinstance(m, nn.Linear):
 25 |                     #nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 26 |                     nn.init.constant_(m.weight, 1)
 27 |                 elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
 28 |                     nn.init.constant_(m.weight, 1)
 29 |                     nn.init.constant_(m.bias, 0)
 30 | 
 31 |     def forward(self, x):
 32 |         return self.model(x.view(x.size(0), -1))
 33 | 
 34 | 
 35 | def ZeroPad1d(sizes):
 36 |     return nn.ConstantPad1d(sizes, 0)
 37 | 
 38 | 
 39 | def get_acti_layer(acti='relu', inplace=True):
 40 | 
 41 |     if acti == 'relu':
 42 |         return [nn.ReLU(inplace=inplace)]
 43 |     elif acti == 'lrelu':
 44 |         return [nn.LeakyReLU(0.2, inplace=inplace)]
 45 |     elif acti == 'tanh':
 46 |         return [nn.Tanh()]
 47 |     elif acti == 'none':
 48 |         return []
 49 |     else:
 50 |         assert 0, "Unsupported activation: {}".format(acti)
 51 | 
 52 | 
 53 | def get_norm_layer(norm='none', norm_dim=None):
 54 | 
 55 |     if norm == 'bn':
 56 |         return [nn.BatchNorm1d(norm_dim)]
 57 |     elif norm == 'in':
 58 |         # return [nn.InstanceNorm1d(norm_dim, affine=False)]  # for rt42!
 59 |         return [nn.InstanceNorm1d(norm_dim, affine=True)]
 60 |     elif norm == 'adain':
 61 |         return [AdaptiveInstanceNorm1d(norm_dim)]
 62 |     elif norm == 'none':
 63 |         return []
 64 |     else:
 65 |         assert 0, "Unsupported normalization: {}".format(norm)
 66 | 
 67 | 
 68 | def get_dropout_layer(dropout=None):
 69 |     if dropout is not None:
 70 |         return [nn.Dropout(p=dropout)]
 71 |     else:
 72 |         return []
 73 | 
 74 | 
 75 | def ConvLayers(kernel_size,
 76 |                in_channels,
 77 |                out_channels,
 78 |                stride=1,
 79 |                pad_type='reflect',
 80 |                use_bias=True):
 81 |     """
 82 |     returns a list of [pad, conv] => should be += to some list, then apply sequential
 83 |     """
 84 | 
 85 |     if pad_type == 'reflect':
 86 |         pad = nn.ReflectionPad1d
 87 |     elif pad_type == 'replicate':
 88 |         pad = nn.ReplicationPad1d
 89 |     elif pad_type == 'zero':
 90 |         pad = ZeroPad1d
 91 |     else:
 92 |         assert 0, "Unsupported padding type: {}".format(pad_type)
 93 | 
 94 |     pad_l = (kernel_size - 1) // 2
 95 |     pad_r = kernel_size - 1 - pad_l
 96 |     return [
 97 |         pad((pad_l, pad_r)),
 98 |         nn.Conv1d(in_channels,
 99 |                   out_channels,
100 |                   kernel_size=kernel_size,
101 |                   stride=stride,
102 |                   bias=use_bias)
103 |     ]
104 | 
105 | 
106 | def ConvBlock(kernel_size,
107 |               in_channels,
108 |               out_channels,
109 |               stride=1,
110 |               pad_type='reflect',
111 |               dropout=None,
112 |               norm='none',
113 |               acti='lrelu',
114 |               acti_first=False,
115 |               use_bias=True,
116 |               inplace=True):
117 |     """
118 |     returns a list of [pad, conv, norm, acti] or [acti, pad, conv, norm]
119 |     """
120 | 
121 |     layers = ConvLayers(kernel_size,
122 |                         in_channels,
123 |                         out_channels,
124 |                         stride=stride,
125 |                         pad_type=pad_type,
126 |                         use_bias=use_bias)
127 |     layers += get_dropout_layer(dropout)
128 |     layers += get_norm_layer(norm, norm_dim=out_channels)
129 |     acti_layers = get_acti_layer(acti, inplace=inplace)
130 | 
131 |     if acti_first:
132 |         return acti_layers + layers
133 |     else:
134 |         return layers + acti_layers
135 | 
136 | 
137 | def LinearBlock(in_dim, out_dim, dropout=None, norm='none', acti='relu'):
138 | 
139 |     use_bias = True
140 |     layers = []
141 |     layers.append(nn.Linear(in_dim, out_dim, bias=use_bias))
142 |     layers += get_dropout_layer(dropout)
143 |     layers += get_norm_layer(norm, norm_dim=out_dim)
144 |     layers += get_acti_layer(acti)
145 | 
146 |     return layers
147 | 


--------------------------------------------------------------------------------
/prepare/merge_smplh_mano.py:
--------------------------------------------------------------------------------
  1 | # inspired and modified by Mathis Petrovich from
  2 | # https://github.com/vchoutas/smplx/tree/master/tools
  3 | 
  4 | # -*- coding: utf-8 -*-
  5 | 
  6 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  7 | # holder of all proprietary rights on this computer program.
  8 | # You can only use this computer program if you have closed
  9 | # a license agreement with MPG or you get the right to use the computer
 10 | # program from someone who is authorized to grant you that right.
 11 | # Any use of the computer program without a valid license is prohibited and
 12 | # liable to prosecution.
 13 | #
 14 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 15 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 16 | # for Intelligent Systems and the Max Planck Institute for Biological
 17 | # Cybernetics. All rights reserved.
 18 | #
 19 | # Contact: ps-license@tuebingen.mpg.de
 20 | 
 21 | import os
 22 | import os.path as osp
 23 | import pickle
 24 | 
 25 | import argparse
 26 | 
 27 | import numpy as np
 28 | 
 29 | 
 30 | def remove_chumpy_dep(dico):
 31 |     output_dict = {}
 32 |     for key, val in dico.items():
 33 |         if 'chumpy' in str(type(val)):
 34 |             output_dict[key] = np.array(val)
 35 |         else:
 36 |             output_dict[key] = val
 37 |     return output_dict
 38 | 
 39 | 
 40 | def load_and_remove_chumpy_dep(path):
 41 |     with open(path, 'rb') as pkl_file:
 42 |         import warnings
 43 |         warnings.filterwarnings("ignore", category=DeprecationWarning)
 44 |         data = pickle.load(pkl_file, encoding="latin1")
 45 | 
 46 |     data = remove_chumpy_dep(data)
 47 |     return data
 48 | 
 49 | 
 50 | def load_npz_into_dict(path):
 51 |     data = {key: val for key, val in np.load(smplh_fn).items()}
 52 |     data = remove_chumpy_dep(data)
 53 |     return data
 54 | 
 55 | 
 56 | def load_and_clean_data(path):
 57 |     ext = os.path.splitext(path)[-1]
 58 |     if ext == ".npz":
 59 |         data = load_npz_into_dict(path)
 60 |     elif ext == ".pkl":
 61 |         data = load_and_remove_chumpy_dep(path)
 62 |     else:
 63 |         raise TypeError("The format should be pkl or npz")
 64 |     return data
 65 | 
 66 | 
 67 | def merge_models(smplh_fn, mano_left_fn, mano_right_fn,
 68 |                  output_folder='output'):
 69 | 
 70 |     body_data = load_and_clean_data(smplh_fn)
 71 |     lhand_data = load_and_clean_data(mano_left_fn)
 72 |     rhand_data = load_and_clean_data(mano_right_fn)
 73 | 
 74 |     modelname = osp.split(smplh_fn)[1]
 75 |     parent_folder = osp.split(osp.split(smplh_fn)[0])[1]
 76 |     if "female" in parent_folder + "_" + modelname.lower():
 77 |         out_fn = "SMPLH_FEMALE.npz"
 78 |     elif "male" in parent_folder + "_" + modelname.lower():
 79 |         out_fn = "SMPLH_MALE.npz"
 80 |     elif "neutral" in parent_folder + "_" + modelname.lower():
 81 |         out_fn = "SMPLH_NEUTRAL.npz"
 82 |     else:
 83 |         out_fn = modelname
 84 | 
 85 |     output_data = body_data.copy()
 86 |     output_data['hands_componentsl'] = lhand_data['hands_components']
 87 |     output_data['hands_componentsr'] = rhand_data['hands_components']
 88 | 
 89 |     output_data['hands_coeffsl'] = lhand_data['hands_coeffs']
 90 |     output_data['hands_coeffsr'] = rhand_data['hands_coeffs']
 91 | 
 92 |     output_data['hands_meanl'] = lhand_data['hands_mean']
 93 |     output_data['hands_meanr'] = rhand_data['hands_mean']
 94 | 
 95 |     # Just in case
 96 |     output_data = remove_chumpy_dep(output_data)
 97 | 
 98 |     out_path = osp.join(output_folder, out_fn)
 99 |     print('Saving to {}'.format(out_path))
100 | 
101 |     # np.savez(out_path, output_data)
102 |     np.savez_compressed(out_path, **output_data)
103 |     # with open(out_path, 'wb') as output_file:
104 |     #     pickle.dump(output_data, output_file)
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     parser = argparse.ArgumentParser()
109 |     parser.add_argument('--smplh-fn', dest='smplh_fn', required=True,
110 |                         type=str, help='The path to the SMPLH model')
111 |     parser.add_argument('--mano-left-fn', dest='mano_left_fn', required=True,
112 |                         type=str, help='The path to the left hand MANO model')
113 |     parser.add_argument('--mano-right-fn', dest='mano_right_fn', required=True,
114 |                         type=str, help='The path to the right hand MANO model')
115 |     parser.add_argument('--output-folder', dest='output_folder',
116 |                         required=True, type=str,
117 |                         help='The path to the output folder')
118 | 
119 |     args = parser.parse_args()
120 | 
121 |     smplh_fn = args.smplh_fn
122 |     mano_left_fn = args.mano_left_fn
123 |     mano_right_fn = args.mano_right_fn
124 |     output_folder = args.output_folder
125 | 
126 |     if not osp.exists(output_folder):
127 |         print('Creating directory: {}'.format(output_folder))
128 |         os.makedirs(output_folder)
129 | 
130 |     merge_models(smplh_fn, mano_left_fn, mano_right_fn, output_folder)
131 | 


--------------------------------------------------------------------------------
/mGPT/metrics/mm.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | from torchmetrics import Metric
  6 | from torchmetrics.functional import pairwise_euclidean_distance
  7 | from .utils import *
  8 | import os
  9 | from mGPT.config import instantiate_from_config
 10 | 
 11 | class MMMetrics(Metric):
 12 |     full_state_update = True
 13 | 
 14 |     def __init__(self, cfg, dataname='humanml3d', mm_num_times=10, dist_sync_on_step=True, **kwargs):
 15 |         super().__init__(dist_sync_on_step=dist_sync_on_step)
 16 | 
 17 |         self.name = "MultiModality scores"
 18 |         self.cfg = cfg
 19 |         self.dataname = dataname
 20 |         self.mm_num_times = mm_num_times
 21 | 
 22 |         self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
 23 |         self.add_state("count_seq",
 24 |                        default=torch.tensor(0),
 25 |                        dist_reduce_fx="sum")
 26 | 
 27 |         self.metrics = ["MultiModality"]
 28 |         self.add_state("MultiModality",
 29 |                        default=torch.tensor(0.),
 30 |                        dist_reduce_fx="sum")
 31 | 
 32 |         # chached batches
 33 |         self.add_state("mm_motion_embeddings", default=[], dist_reduce_fx=None)
 34 | 
 35 |         # T2M Evaluator
 36 |         self._get_t2m_evaluator(cfg)
 37 | 
 38 |     def _get_t2m_evaluator(self, cfg):
 39 |         """
 40 |         load T2M text encoder and motion encoder for evaluating
 41 |         """
 42 |         # init module
 43 |         self.t2m_textencoder = instantiate_from_config(cfg.METRIC.TM2T.t2m_textencoder)
 44 |         self.t2m_moveencoder = instantiate_from_config(cfg.METRIC.TM2T.t2m_moveencoder)
 45 |         self.t2m_motionencoder = instantiate_from_config(cfg.METRIC.TM2T.t2m_motionencoder)
 46 | 
 47 |         # load pretrianed
 48 |         if self.dataname == "kit":
 49 |             dataname = "kit"
 50 |         else:
 51 |             dataname = "t2m"
 52 |         t2m_checkpoint = torch.load(os.path.join(
 53 |             cfg.METRIC.TM2T.t2m_path, dataname,
 54 |             "text_mot_match/model/finest.tar"),
 55 |                                     map_location="cpu")
 56 | 
 57 |         self.t2m_textencoder.load_state_dict(t2m_checkpoint["text_encoder"])
 58 |         self.t2m_moveencoder.load_state_dict(
 59 |             t2m_checkpoint["movement_encoder"])
 60 |         self.t2m_motionencoder.load_state_dict(
 61 |             t2m_checkpoint["motion_encoder"])
 62 | 
 63 |         # freeze params
 64 |         self.t2m_textencoder.eval()
 65 |         self.t2m_moveencoder.eval()
 66 |         self.t2m_motionencoder.eval()
 67 |         for p in self.t2m_textencoder.parameters():
 68 |             p.requires_grad = False
 69 |         for p in self.t2m_moveencoder.parameters():
 70 |             p.requires_grad = False
 71 |         for p in self.t2m_motionencoder.parameters():
 72 |             p.requires_grad = False
 73 | 
 74 |     def compute(self, sanity_flag):
 75 |         count = self.count.item()
 76 |         count_seq = self.count_seq.item()
 77 | 
 78 |         # init metrics
 79 |         metrics = {metric: getattr(self, metric) for metric in self.metrics}
 80 | 
 81 |         # if in sanity check stage then jump
 82 |         if sanity_flag:
 83 |             return metrics
 84 | 
 85 |         # cat all embeddings
 86 |         all_mm_motions = torch.cat(self.mm_motion_embeddings,
 87 |                                    axis=0).cpu().numpy()
 88 |         metrics['MultiModality'] = calculate_multimodality_np(
 89 |             all_mm_motions, self.mm_num_times)
 90 | 
 91 |         # Reset
 92 |         self.reset()
 93 | 
 94 |         return {**metrics}
 95 | 
 96 |     def update(
 97 |         self,
 98 |         feats_rst: Tensor,
 99 |         lengths_rst: List[int],
100 |     ):
101 |         self.count += sum(lengths_rst)
102 |         self.count_seq += len(lengths_rst)
103 | 
104 |         align_idx = np.argsort(lengths_rst)[::-1].copy()
105 |         feats_rst = feats_rst[align_idx]
106 |         lengths_rst = np.array(lengths_rst)[align_idx]
107 |         recmotion_embeddings = self.get_motion_embeddings(
108 |             feats_rst, lengths_rst)
109 |         cache = [0] * len(lengths_rst)
110 |         for i in range(len(lengths_rst)):
111 |             cache[align_idx[i]] = recmotion_embeddings[i:i + 1]
112 | 
113 |         mm_motion_embeddings = torch.cat(cache, axis=0).unsqueeze(0)
114 |         # self.mm_motion_embeddings.extend(cache)
115 |         # print(mm_motion_embeddings.shape)
116 |         # # store all mm motion embeddings
117 |         self.mm_motion_embeddings.append(mm_motion_embeddings)
118 | 
119 |     def get_motion_embeddings(self, feats: Tensor, lengths: List[int]):
120 |         m_lens = torch.tensor(lengths)
121 |         m_lens = torch.div(m_lens,
122 |                            self.cfg.DATASET.HUMANML3D.UNIT_LEN,
123 |                            rounding_mode="floor")
124 | 
125 |         mov = self.t2m_moveencoder(feats[..., :-4]).detach()
126 |         emb = self.t2m_motionencoder(mov, m_lens)
127 | 
128 |         # [bs, nlatent*ndim] <= [bs, nlatent, ndim]
129 |         return torch.flatten(emb, start_dim=1).detach()
130 | 


--------------------------------------------------------------------------------
/demos/inbetween.txt:
--------------------------------------------------------------------------------
 1 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/012657.npy
 2 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M003137.npy
 3 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M011458.npy
 4 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/009410.npy
 5 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M001298.npy
 6 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M010526.npy
 7 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M001632.npy
 8 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M009521.npy
 9 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/000086.npy
10 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M005152.npy
11 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M008910.npy
12 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M010563.npy
13 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/000307.npy
14 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/008588.npy
15 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M011731.npy
16 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/000921.npy
17 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M004975.npy
18 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/010698.npy
19 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/P_M008159.npy
20 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/005413.npy
21 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/004867.npy
22 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M002246.npy
23 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M010043.npy
24 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M014536.npy
25 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/J_M002982.npy
26 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M011785.npy
27 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/002093.npy
28 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M013476.npy
29 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M000343.npy
30 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M012561.npy
31 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M000472.npy
32 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M006819.npy
33 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/000363.npy
34 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M014253.npy
35 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/013112.npy
36 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M000379.npy
37 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/008567.npy
38 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/013514.npy
39 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M003365.npy
40 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/002550.npy
41 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/011095.npy
42 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/R_000889.npy
43 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/006236.npy
44 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/009031.npy
45 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/001676.npy
46 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M013314.npy
47 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M012611.npy
48 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/M005468.npy
49 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/008357.npy
50 | Complete the masked motion: <Motion_Placeholder_Masked>#datasets/humanml3d/new_joint_vecs/V_M007878.npy
51 | 


--------------------------------------------------------------------------------
/mGPT/data/transforms/rots2rfeats/globvelandy.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | from typing import Optional
 18 | 
 19 | import torch
 20 | from torch import Tensor
 21 | from einops import rearrange
 22 | 
 23 | from mGPT.utils.easyconvert import rep_to_rep, nfeats_of, to_matrix
 24 | import mGPT.utils.geometry_tools as geometry_tools
 25 | 
 26 | from .base import Rots2Rfeats
 27 | 
 28 | 
 29 | class Globalvelandy(Rots2Rfeats):
 30 |     def __init__(self,
 31 |                  path: Optional[str] = None,
 32 |                  normalization: bool = False,
 33 |                  pose_rep: str = "rot6d",
 34 |                  canonicalize: bool = False,
 35 |                  offset: bool = True,
 36 |                  **kwargs) -> None:
 37 |         super().__init__(path=path, normalization=normalization)
 38 | 
 39 |         self.canonicalize = canonicalize
 40 |         self.pose_rep = pose_rep
 41 |         self.nfeats = nfeats_of(pose_rep)
 42 |         self.offset = offset
 43 | 
 44 |     def forward(self, data, data_rep='matrix', first_frame=None) -> Tensor:
 45 | 
 46 |         poses, trans = data.rots, data.trans
 47 | 
 48 |         # extract the root gravity axis
 49 |         # for smpl it is the last coordinate
 50 |         root_y = trans[..., 2]
 51 |         trajectory = trans[..., [0, 1]]
 52 | 
 53 |         # Compute the difference of trajectory
 54 |         vel_trajectory = torch.diff(trajectory, dim=-2)
 55 | 
 56 |         # 0 for the first one => keep the dimentionality
 57 |         if first_frame is None:
 58 |             first_frame = 0 * vel_trajectory[..., [0], :]
 59 | 
 60 |         vel_trajectory = torch.cat((first_frame, vel_trajectory), dim=-2)
 61 | 
 62 |         # first normalize the data
 63 |         if self.canonicalize:
 64 | 
 65 |             matrix_poses = rep_to_rep(data_rep, 'matrix', poses)
 66 |             global_orient = matrix_poses[..., 0, :, :]
 67 | 
 68 |             # remove the rotation
 69 |             rot2d = rep_to_rep(data_rep, 'rotvec', poses[0, 0, ...])
 70 |             
 71 |             # Remove the fist rotation along the vertical axis
 72 |             rot2d[..., :2] = 0
 73 | 
 74 |             if self.offset:
 75 |                 # add a bit more rotation
 76 |                 rot2d[..., 2] += torch.pi / 2
 77 | 
 78 |             rot2d = rep_to_rep('rotvec', 'matrix', rot2d)
 79 |             
 80 |             # turn with the same amount all the rotations
 81 |             global_orient = torch.einsum("...kj,...kl->...jl", rot2d,
 82 |                                          global_orient)
 83 | 
 84 |             matrix_poses = torch.cat(
 85 |                 (global_orient[..., None, :, :], matrix_poses[..., 1:, :, :]),
 86 |                 dim=-3)
 87 | 
 88 |             poses = rep_to_rep('matrix', data_rep, matrix_poses)
 89 | 
 90 |             # Turn the trajectory as well
 91 |             vel_trajectory = torch.einsum("...kj,...lk->...lj",
 92 |                                           rot2d[..., :2, :2], vel_trajectory)
 93 | 
 94 |         poses = rep_to_rep(data_rep, self.pose_rep, poses)
 95 |         features = torch.cat(
 96 |             (root_y[..., None], vel_trajectory,
 97 |              rearrange(poses, "... joints rot -> ... (joints rot)")),
 98 |             dim=-1)
 99 |         features = self.normalize(features)
100 | 
101 |         return features
102 | 
103 |     def extract(self, features):
104 |         root_y = features[..., 0]
105 |         vel_trajectory = features[..., 1:3]
106 |         poses_features = features[..., 3:]
107 |         poses = rearrange(poses_features,
108 |                           "... (joints rot) -> ... joints rot",
109 |                           rot=self.nfeats)
110 |         return root_y, vel_trajectory, poses
111 | 
112 |     def inverse(self, features, last_frame=None):
113 |         features = self.unnormalize(features)
114 |         root_y, vel_trajectory, poses = self.extract(features)
115 | 
116 |         # integrate the trajectory
117 |         trajectory = torch.cumsum(vel_trajectory, dim=-2)
118 |         if last_frame is None:
119 |             pass
120 |         # First frame should be 0, but if infered it is better to ensure it
121 |         trajectory = trajectory - trajectory[..., [0], :]
122 | 
123 |         # Get back the translation
124 |         trans = torch.cat([trajectory, root_y[..., None]], dim=-1)
125 |         matrix_poses = rep_to_rep(self.pose_rep, 'matrix',  poses)
126 | 
127 |         from ..smpl import RotTransDatastruct
128 |         return RotTransDatastruct(rots=matrix_poses, trans=trans)
129 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | import pytorch_lightning as pl
  5 | import torch
  6 | from pathlib import Path
  7 | from rich import get_console
  8 | from rich.table import Table
  9 | from omegaconf import OmegaConf
 10 | from mGPT.callback import build_callbacks
 11 | from mGPT.config import parse_args
 12 | from mGPT.data.build_data import build_data
 13 | from mGPT.models.build_model import build_model
 14 | from mGPT.utils.logger import create_logger
 15 | from mGPT.utils.load_checkpoint import load_pretrained, load_pretrained_vae
 16 | 
 17 | 
 18 | def print_table(title, metrics, logger=None):
 19 |     table = Table(title=title)
 20 | 
 21 |     table.add_column("Metrics", style="cyan", no_wrap=True)
 22 |     table.add_column("Value", style="magenta")
 23 | 
 24 |     for key, value in metrics.items():
 25 |         table.add_row(key, str(value))
 26 | 
 27 |     console = get_console()
 28 |     console.print(table, justify="center")
 29 | 
 30 |     logger.info(metrics) if logger else None
 31 | 
 32 | 
 33 | def get_metric_statistics(values, replication_times):
 34 |     mean = np.mean(values, axis=0)
 35 |     std = np.std(values, axis=0)
 36 |     conf_interval = 1.96 * std / np.sqrt(replication_times)
 37 |     return mean, conf_interval
 38 | 
 39 | 
 40 | def main():
 41 |     # parse options
 42 |     cfg = parse_args(phase="test")  # parse config file
 43 |     cfg.FOLDER = cfg.TEST.FOLDER
 44 | 
 45 |     # Logger
 46 |     logger = create_logger(cfg, phase="test")
 47 |     logger.info(OmegaConf.to_yaml(cfg))
 48 | 
 49 |     # Output dir
 50 |     model_name = cfg.model.target.split('.')[-2].lower()
 51 |     output_dir = Path(
 52 |         os.path.join(cfg.FOLDER, model_name, cfg.NAME, "samples_" + cfg.TIME))
 53 |     if cfg.TEST.SAVE_PREDICTIONS:
 54 |         output_dir.mkdir(parents=True, exist_ok=True)
 55 |         logger.info(f"Saving predictions to {str(output_dir)}")
 56 | 
 57 |     # Seed
 58 |     pl.seed_everything(cfg.SEED_VALUE)
 59 | 
 60 |     # Environment Variables
 61 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
 62 | 
 63 |     # Callbacks
 64 |     callbacks = build_callbacks(cfg, logger=logger, phase="test")
 65 |     logger.info("Callbacks initialized")
 66 | 
 67 |     # Dataset
 68 |     datamodule = build_data(cfg)
 69 |     logger.info("datasets module {} initialized".format("".join(
 70 |         cfg.DATASET.target.split('.')[-2])))
 71 | 
 72 |     # Model
 73 |     model = build_model(cfg, datamodule)
 74 |     logger.info("model {} loaded".format(cfg.model.target))
 75 | 
 76 |     # Lightning Trainer
 77 |     trainer = pl.Trainer(
 78 |         benchmark=False,
 79 |         max_epochs=cfg.TRAIN.END_EPOCH,
 80 |         accelerator=cfg.ACCELERATOR,
 81 |         devices=list(range(len(cfg.DEVICE))),
 82 |         default_root_dir=cfg.FOLDER_EXP,
 83 |         reload_dataloaders_every_n_epochs=1,
 84 |         deterministic=False,
 85 |         detect_anomaly=False,
 86 |         enable_progress_bar=True,
 87 |         logger=None,
 88 |         callbacks=callbacks,
 89 |     )
 90 | 
 91 |     # Strict load vae model
 92 |     if cfg.TRAIN.PRETRAINED_VAE:
 93 |         load_pretrained_vae(cfg, model, logger)
 94 | 
 95 |     # loading state dict
 96 |     if cfg.TEST.CHECKPOINTS:
 97 |         load_pretrained(cfg, model, logger, phase="test")
 98 |     else:
 99 |         logger.warning("No checkpoints provided!!!")
100 | 
101 |     # Calculate metrics
102 |     all_metrics = {}
103 |     replication_times = cfg.TEST.REPLICATION_TIMES
104 | 
105 |     for i in range(replication_times):
106 |         metrics_type = ", ".join(cfg.METRIC.TYPE)
107 |         logger.info(f"Evaluating {metrics_type} - Replication {i}")
108 |         metrics = trainer.test(model, datamodule=datamodule)[0]
109 |         if "TM2TMetrics" in metrics_type and cfg.model.params.task == "t2m" and cfg.model.params.stage != 'vae':
110 |             # mm meteics
111 |             logger.info(f"Evaluating MultiModality - Replication {i}")
112 |             datamodule.mm_mode(True)
113 |             mm_metrics = trainer.test(model, datamodule=datamodule)[0]
114 |             # metrics.update(mm_metrics)
115 |             metrics.update(mm_metrics)
116 |             datamodule.mm_mode(False)
117 |         for key, item in metrics.items():
118 |             if key not in all_metrics:
119 |                 all_metrics[key] = [item]
120 |             else:
121 |                 all_metrics[key] += [item]
122 | 
123 |     all_metrics_new = {}
124 | 
125 |     for key, item in all_metrics.items():
126 |         mean, conf_interval = get_metric_statistics(np.array(item),
127 |                                                     replication_times)
128 |         all_metrics_new[key + "/mean"] = mean
129 |         all_metrics_new[key + "/conf_interval"] = conf_interval
130 | 
131 |     print_table(f"Mean Metrics", all_metrics_new, logger=logger)
132 |     all_metrics_new.update(all_metrics)
133 | 
134 |     # Save metrics to file
135 |     metric_file = output_dir.parent / f"metrics_{cfg.TIME}.json"
136 |     with open(metric_file, "w", encoding="utf-8") as f:
137 |         json.dump(all_metrics_new, f, indent=4)
138 |     logger.info(f"Testing done, the metrics are saved to {str(metric_file)}")
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     main()
143 | 


--------------------------------------------------------------------------------
/mGPT/data/webui.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from os.path import join as pjoin
  4 | from .humanml.scripts.motion_process import (process_file, recover_from_ric)
  5 | from . import BASEDataModule
  6 | from .humanml import Text2MotionDatasetEval, Text2MotionDataset, Text2MotionDatasetCB, MotionDataset, MotionDatasetVQ, Text2MotionDatasetToken, Text2MotionDatasetM2T
  7 | from .utils import humanml3d_collate
  8 | 
  9 | 
 10 | class HumanML3DDataModule(BASEDataModule):
 11 |     def __init__(self, cfg, **kwargs):
 12 | 
 13 |         super().__init__(collate_fn=humanml3d_collate)
 14 |         self.cfg = cfg
 15 |         self.save_hyperparameters(logger=False)
 16 |         
 17 |         # Basic info of the dataset
 18 |         cfg.DATASET.JOINT_TYPE = 'humanml3d'
 19 |         self.name = "humanml3d"
 20 |         self.njoints = 22
 21 |         
 22 |         # Path to the dataset
 23 |         data_root = cfg.DATASET.HUMANML3D.ROOT
 24 |         self.hparams.data_root = data_root
 25 |         self.hparams.text_dir = pjoin(data_root, "texts")
 26 |         self.hparams.motion_dir = pjoin(data_root, 'new_joint_vecs')
 27 |         
 28 |         # Mean and std of the dataset
 29 |         self.hparams.mean = np.load(pjoin('assets/meta', "mean.npy"))
 30 |         self.hparams.std = np.load(pjoin('assets/meta', "std.npy"))
 31 |         
 32 |         # Mean and std for fair evaluation
 33 |         self.hparams.mean_eval = np.load(pjoin('assets/meta', "mean_eval.npy"))
 34 |         self.hparams.std_eval = np.load(pjoin('assets/meta', "std_eval.npy"))
 35 |         
 36 |         # Length of the dataset
 37 |         self.hparams.max_motion_length = cfg.DATASET.HUMANML3D.MAX_MOTION_LEN
 38 |         self.hparams.min_motion_length = cfg.DATASET.HUMANML3D.MIN_MOTION_LEN
 39 |         self.hparams.max_text_len = cfg.DATASET.HUMANML3D.MAX_TEXT_LEN
 40 |         self.hparams.unit_length = cfg.DATASET.HUMANML3D.UNIT_LEN
 41 | 
 42 |         # Additional parameters
 43 |         self.hparams.debug = cfg.DEBUG
 44 |         self.hparams.stage = cfg.TRAIN.STAGE
 45 | 
 46 |         # Dataset switch
 47 |         self.DatasetEval = Text2MotionDatasetEval
 48 | 
 49 |         if cfg.TRAIN.STAGE == "vae":
 50 |             if cfg.model.params.motion_vae.target.split('.')[-1].lower() == "vqvae":
 51 |                 self.hparams.win_size = 64
 52 |                 self.Dataset = MotionDatasetVQ
 53 |             else:
 54 |                 self.Dataset = MotionDataset
 55 |         elif 'lm' in cfg.TRAIN.STAGE:
 56 |             self.hparams.code_path = cfg.DATASET.CODE_PATH
 57 |             self.hparams.task_path = cfg.DATASET.TASK_PATH
 58 |             self.hparams.std_text = cfg.DATASET.HUMANML3D.STD_TEXT
 59 |             self.Dataset = Text2MotionDatasetCB
 60 |         elif cfg.TRAIN.STAGE == "token":
 61 |             self.Dataset = Text2MotionDatasetToken
 62 |             self.DatasetEval = Text2MotionDatasetToken
 63 |         elif cfg.TRAIN.STAGE == "m2t":
 64 |             self.Dataset = Text2MotionDatasetM2T
 65 |             self.DatasetEval = Text2MotionDatasetM2T
 66 |         else:
 67 |             self.Dataset = Text2MotionDataset
 68 | 
 69 |         # Get additional info of the dataset
 70 |         self.nfeats = 263
 71 |         cfg.DATASET.NFEATS = self.nfeats
 72 |         
 73 | 
 74 |     def feats2joints(self, features):
 75 |         mean = torch.tensor(self.hparams.mean).to(features)
 76 |         std = torch.tensor(self.hparams.std).to(features)
 77 |         features = features * std + mean
 78 |         return recover_from_ric(features, self.njoints)
 79 | 
 80 |     def joints2feats(self, features):
 81 |         features = process_file(features, self.njoints)[0]
 82 |         return features
 83 | 
 84 |     def normalize(self, features):
 85 |         mean = torch.tensor(self.hparams.mean).to(features)
 86 |         std = torch.tensor(self.hparams.std).to(features)
 87 |         features = (features - mean) / std
 88 |         return features
 89 | 
 90 |     def denormalize(self, features):
 91 |         mean = torch.tensor(self.hparams.mean).to(features)
 92 |         std = torch.tensor(self.hparams.std).to(features)
 93 |         features = features * std + mean
 94 |         return features
 95 | 
 96 |     def renorm4t2m(self, features):
 97 |         # renorm to t2m norms for using t2m evaluators
 98 |         ori_mean = torch.tensor(self.hparams.mean).to(features)
 99 |         ori_std = torch.tensor(self.hparams.std).to(features)
100 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
101 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
102 |         features = features * ori_std + ori_mean
103 |         features = (features - eval_mean) / eval_std
104 |         return features
105 | 
106 |     def mm_mode(self, mm_on=True):
107 |         if mm_on:
108 |             self.is_mm = True
109 |             self.name_list = self.test_dataset.name_list
110 |             self.mm_list = np.random.choice(self.name_list,
111 |                                             self.cfg.METRIC.MM_NUM_SAMPLES,
112 |                                             replace=False)
113 |             self.test_dataset.name_list = self.mm_list
114 |         else:
115 |             self.is_mm = False
116 |             self.test_dataset.name_list = self.name_list
117 | 


--------------------------------------------------------------------------------
/mGPT/render/rendermotion.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import imageio
  3 | import os
  4 | import argparse
  5 | from tqdm import tqdm
  6 | from .renderer import get_renderer
  7 | 
  8 | 
  9 | def get_rotation(theta=np.pi / 3):
 10 |     import mGPT.utils.rotation_conversions as geometry
 11 |     import torch
 12 |     axis = torch.tensor([0, 1, 0], dtype=torch.float)
 13 |     axisangle = theta * axis
 14 |     matrix = geometry.axis_angle_to_matrix(axisangle)
 15 |     return matrix.numpy()
 16 | 
 17 | 
 18 | def render_video(meshes,
 19 |                  key,
 20 |                  action,
 21 |                  renderer,
 22 |                  savepath,
 23 |                  backgrounds,
 24 |                  cam_pose,
 25 |                  cams=(0.75, 0.75, 0, 0.10),
 26 |                  color=[0.11, 0.53, 0.8]):
 27 |     #  cams=(0.75, 0.75, 0, 0.10), color=[165.0/255,112/255,140/255]):
 28 |     # center the first frame
 29 |     if key not in ["real", "ntf", "side"]:
 30 |         w = int(key) / 6.0
 31 |         # purpole to green
 32 |         # color = w*np.array([0.9,102/255,120/255]) + (1-w)*np.array([0.11, 0.9, 0.11])
 33 |         # color = (1-w)*np.array([165.0/255,112/255,140/255]) + w*np.array([0.11, 0.8, 0.11])
 34 |         color = (1 - w) * np.array([0.75, 0.13, 0.7]) + w * np.array(
 35 |             [0.12, 0.7, 0.14])
 36 | 
 37 |     meshes = meshes - meshes[0].mean(axis=0)
 38 |     imgs = []
 39 |     idx = 0
 40 |     # for mesh in meshes:
 41 |     for mesh in tqdm(meshes, desc=f"Visualize {key}, action {action}"):
 42 |         # file_name = '3dpw_rot-90_glob_trimesh.ply' mesh_filename=file_name,
 43 |         # prepare background
 44 |         if len(backgrounds.shape) == 3:
 45 |             background = backgrounds
 46 |             cam = cams
 47 |         elif len(backgrounds.shape) == 4:
 48 |             background = backgrounds[idx]
 49 |             cam = cams[idx]
 50 |             idx += 1
 51 |         # prepare cams
 52 |         img = renderer.render(background,
 53 |                               mesh,
 54 |                               cam,
 55 |                               color=color,
 56 |                               cam_pose=cam_pose)
 57 |         imgs.append(img)
 58 |         # show(img)
 59 | 
 60 |     imgs = np.array(imgs)
 61 |     # masks = ~(imgs/255. > 0.96).all(-1)
 62 |     # coords = np.argwhere(masks.sum(axis=0))
 63 |     # y1, x1 = coords.min(axis=0)
 64 |     # y2, x2 = coords.max(axis=0)
 65 |     # writer = imageio.get_writer(savepath, fps=30)
 66 |     # for cimg in imgs[:, y1:y2, x1:x2]:
 67 |     #     writer.append_data(cimg)
 68 |     # writer.close()
 69 | 
 70 |     # from mld.utils.uicap_utils import write_rgba_seqs
 71 |     # write_rgba_seqs(imgs, savepath)
 72 | 
 73 |     writer = imageio.get_writer(savepath, fps=30)
 74 |     for cimg in imgs:
 75 |         writer.append_data(cimg)
 76 |     writer.close()
 77 | 
 78 | 
 79 | def main():
 80 |     parser = argparse.ArgumentParser()
 81 |     parser.add_argument("filename")
 82 |     opt = parser.parse_args()
 83 |     filename = opt.filename
 84 |     savefolder = os.path.splitext(filename)[0]
 85 |     os.makedirs(savefolder, exist_ok=True)
 86 | 
 87 |     output = np.load(filename)
 88 | 
 89 |     if output.shape[0] == 3:
 90 |         visualization, generation, reconstruction = output
 91 |         output = {
 92 |             "visualization": visualization,
 93 |             "generation": generation,
 94 |             "reconstruction": reconstruction
 95 |         }
 96 |     else:
 97 |         # output = {f"generation_{key}": output[key] for key in range(2)} #  len(output))}
 98 |         # output = {f"generation_{key}": output[key] for key in range(len(output))}
 99 |         output = {
100 |             f"generation_{key}": output[key]
101 |             for key in range(len(output))
102 |         }
103 | 
104 |     width = 1024
105 |     height = 1024
106 | 
107 |     background = np.zeros((height, width, 3))
108 |     renderer = get_renderer(width, height)
109 | 
110 |     # if duration mode, put back durations
111 |     if output["generation_3"].shape[-1] == 100:
112 |         output["generation_0"] = output["generation_0"][:, :, :, :40]
113 |         output["generation_1"] = output["generation_1"][:, :, :, :60]
114 |         output["generation_2"] = output["generation_2"][:, :, :, :80]
115 |         output["generation_3"] = output["generation_3"][:, :, :, :100]
116 |     elif output["generation_3"].shape[-1] == 160:
117 |         print("160 mode")
118 |         output["generation_0"] = output["generation_0"][:, :, :, :100]
119 |         output["generation_1"] = output["generation_1"][:, :, :, :120]
120 |         output["generation_2"] = output["generation_2"][:, :, :, :140]
121 |         output["generation_3"] = output["generation_3"][:, :, :, :160]
122 | 
123 |     # if str(action) == str(1) and str(key) == "generation_4":
124 |     for key in output:
125 |         vidmeshes = output[key]
126 |         for action in range(len(vidmeshes)):
127 |             meshes = vidmeshes[action].transpose(2, 0, 1)
128 |             path = os.path.join(savefolder,
129 |                                 "action{}_{}.mp4".format(action, key))
130 |             render_video(meshes, key, action, renderer, path, background)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     main()
135 | 


--------------------------------------------------------------------------------
/render.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import shutil
  4 | import sys
  5 | import natsort
  6 | from pathlib import Path
  7 | from argparse import ArgumentParser
  8 | 
  9 | try:
 10 |     import bpy
 11 | 
 12 |     sys.path.append(os.path.dirname(bpy.data.filepath))
 13 | 
 14 |     # local packages
 15 |     sys.path.append(os.path.expanduser("~/.local/lib/python3.9/site-packages"))
 16 | except ImportError:
 17 |     raise ImportError(
 18 |         "Blender is not properly installed or not launch properly. See README.md to have instruction on how to install and use blender."
 19 |     )
 20 | 
 21 | 
 22 | # Monkey patch argparse such that
 23 | # blender / python / hydra parsing works
 24 | def parse_args(self, args=None, namespace=None):
 25 |     if args is not None:
 26 |         return self.parse_args_bak(args=args, namespace=namespace)
 27 |     try:
 28 |         idx = sys.argv.index("--")
 29 |         args = sys.argv[idx + 1:]  # the list after '--'
 30 |     except ValueError as e:  # '--' not in the list:
 31 |         args = []
 32 |     return self.parse_args_bak(args=args, namespace=namespace)
 33 | 
 34 | 
 35 | setattr(ArgumentParser, 'parse_args_bak', ArgumentParser.parse_args)
 36 | setattr(ArgumentParser, 'parse_args', parse_args)
 37 | 
 38 | from mGPT.config import parse_args
 39 | 
 40 | 
 41 | def render_cli() -> None:
 42 |     # parse options
 43 |     cfg = parse_args(phase="render")  # parse config file
 44 |     cfg.FOLDER = cfg.RENDER.FOLDER
 45 | 
 46 |     if cfg.RENDER.INPUT_MODE.lower() == "npy":
 47 |         output_dir = Path(os.path.dirname(cfg.RENDER.NPY))
 48 |         paths = [cfg.RENDER.NPY]
 49 |     elif cfg.RENDER.INPUT_MODE.lower() == "dir":
 50 |         output_dir = Path(cfg.RENDER.DIR)
 51 |         paths = []
 52 |         file_list = natsort.natsorted(os.listdir(cfg.RENDER.DIR))
 53 |         begin_id = random.randrange(0, len(file_list))
 54 |         file_list = file_list[begin_id:] + file_list[:begin_id]
 55 | 
 56 |         # render mesh npy first
 57 |         for item in file_list:
 58 |             if item.endswith("_mesh.npy"):
 59 |                 paths.append(os.path.join(cfg.RENDER.DIR, item))
 60 | 
 61 |         # then render joint npy
 62 |         for item in file_list:
 63 |             if item.endswith(".npy") and not item.endswith("_mesh.npy"):
 64 |                 paths.append(os.path.join(cfg.RENDER.DIR, item))
 65 | 
 66 |         print(f"begin to render for {paths[0]}")
 67 | 
 68 |     import numpy as np
 69 | 
 70 |     from mGPT.render.blender import render
 71 |     from mGPT.render.video import Video
 72 | 
 73 |     init = True
 74 |     for path in paths:
 75 |         # check existed mp4 or under rendering
 76 |         if cfg.RENDER.MODE == "video":
 77 |             if os.path.exists(path.replace(".npy", ".mp4")) or os.path.exists(
 78 |                     path.replace(".npy", "_frames")):
 79 |                 print(f"npy is rendered or under rendering {path}")
 80 |                 continue
 81 |         else:
 82 |             # check existed png
 83 |             if os.path.exists(path.replace(".npy", ".png")):
 84 |                 print(f"npy is rendered or under rendering {path}")
 85 |                 continue
 86 | 
 87 |         if cfg.RENDER.MODE == "video":
 88 |             frames_folder = os.path.join(
 89 |                 output_dir,
 90 |                 path.replace(".npy", "_frames").split('/')[-1])
 91 |             os.makedirs(frames_folder, exist_ok=True)
 92 |         else:
 93 |             frames_folder = os.path.join(
 94 |                 output_dir,
 95 |                 path.replace(".npy", ".png").split('/')[-1])
 96 | 
 97 |         try:
 98 |             data = np.load(path)
 99 |             if data.shape[0] == 1:
100 |                 data = data[0]
101 |         except FileNotFoundError:
102 |             print(f"{path} not found")
103 |             continue
104 | 
105 |         if cfg.RENDER.MODE == "video":
106 |             frames_folder = os.path.join(
107 |                 output_dir,
108 |                 path.replace(".npy", "_frames").split("/")[-1])
109 |         else:
110 |             frames_folder = os.path.join(
111 |                 output_dir,
112 |                 path.replace(".npy", ".png").split("/")[-1])
113 | 
114 |         out = render(
115 |             data,
116 |             frames_folder,
117 |             canonicalize=cfg.RENDER.CANONICALIZE,
118 |             exact_frame=cfg.RENDER.EXACT_FRAME,
119 |             num=cfg.RENDER.NUM,
120 |             mode=cfg.RENDER.MODE,
121 |             model_path=cfg.RENDER.MODEL_PATH,
122 |             faces_path=cfg.RENDER.FACES_PATH,
123 |             downsample=cfg.RENDER.DOWNSAMPLE,
124 |             always_on_floor=cfg.RENDER.ALWAYS_ON_FLOOR,
125 |             oldrender=cfg.RENDER.OLDRENDER,
126 |             res=cfg.RENDER.RES,
127 |             init=init,
128 |             gt=cfg.RENDER.GT,
129 |             accelerator=cfg.ACCELERATOR,
130 |             device=cfg.DEVICE,
131 |         )
132 | 
133 |         init = False
134 | 
135 |         if cfg.RENDER.MODE == "video":
136 |             shutil.copytree(frames_folder, frames_folder+'_img') 
137 |             if cfg.RENDER.DOWNSAMPLE:
138 |                 video = Video(frames_folder, fps=cfg.RENDER.FPS)
139 |             else:
140 |                 video = Video(frames_folder, fps=cfg.RENDER.FPS)
141 | 
142 |             vid_path = frames_folder.replace("_frames", ".mp4")
143 |             video.save(out_path=vid_path)
144 |             shutil.rmtree(frames_folder)
145 |             print(f"remove tmp fig folder and save video in {vid_path}")
146 | 
147 |         else:
148 |             print(f"Frame generated at: {out}")
149 | 
150 | 
151 | if __name__ == "__main__":
152 |     render_cli()
153 | 


--------------------------------------------------------------------------------