├── mGPT ├── __init__.py ├── archs │ ├── __init__.py │ ├── tools │ │ ├── resnet.py │ │ └── token_emb.py │ └── tm2t_evaluator.py ├── models │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── position_encoding_layer.py │ │ ├── tools.py │ │ ├── adain.py │ │ └── blocks.py │ └── build_model.py ├── render │ ├── __init__.py │ ├── blender │ │ ├── __init__.py │ │ ├── data.py │ │ ├── vertices.py │ │ ├── sampler.py │ │ ├── camera.py │ │ ├── tools.py │ │ ├── meshes.py │ │ ├── floor.py │ │ └── scene.py │ ├── pyrender │ │ └── j3ds_render_smpl.py │ ├── video.py │ └── rendermotion.py ├── utils │ ├── __init__.py │ ├── fixseed.py │ ├── misc.py │ ├── sample_utils.py │ ├── load_checkpoint.py │ ├── logger.py │ ├── tensors.py │ ├── demo_utils.py │ ├── easyconvert.py │ └── temos_utils.py ├── losses │ ├── __init__.py │ ├── base.py │ └── mgpt.py ├── metrics │ ├── __init__.py │ ├── base.py │ ├── m2m.py │ ├── mr.py │ └── mm.py └── data │ ├── humanml │ ├── README.md │ ├── __init__.py │ ├── dataset_m_vq.py │ ├── utils │ │ ├── paramUtil.py │ │ └── word_vectorizer.py │ ├── dataset_t2m_token.py │ ├── dataset_t2m_eval.py │ └── dataset_t2m_m2t.py │ ├── transforms │ ├── joints2jfeats │ │ ├── __init__.py │ │ ├── base.py │ │ └── tools.py │ ├── rots2joints │ │ ├── __init__.py │ │ └── base.py │ ├── rots2rfeats │ │ ├── __init__.py │ │ ├── base.py │ │ └── globvelandy.py │ ├── __init__.py │ ├── identity.py │ ├── xyz.py │ ├── base.py │ └── joints2rots │ │ └── config.py │ ├── tools │ ├── __init__.py │ ├── tensors.py │ ├── easyconvert.py │ └── collate.py │ ├── build_data.py │ ├── utils.py │ ├── Kit.py │ ├── __init__.py │ └── webui.py ├── scripts ├── fit_motion.sh ├── visualize_motion.sh ├── visualize_motion_parallel.sh ├── fit_motion_parallel.sh ├── plys2npy.py ├── get_motion_code.py └── get_code_visual.py ├── assets ├── meta │ ├── std.npy │ ├── mean.npy │ ├── std_eval.npy │ └── mean_eval.npy ├── images │ ├── table7.png │ ├── table8.png │ ├── figure10.png │ ├── figure12.png │ ├── figure13.png │ ├── pipeline.png │ ├── table15.png │ ├── avatar_bot.jpg │ └── avatar_user.png └── videos │ ├── example0.mp4 │ ├── example1.mp4 │ ├── example2.mp4 │ ├── example3.mp4 │ ├── example4.mp4 │ ├── example5.mp4 │ ├── example6.mp4 │ ├── example7.mp4 │ ├── example8.mp4 │ ├── example0_fast.mp4 │ └── example0_blender.mp4 ├── prepare ├── prepare_t5.sh ├── requirements_render.txt ├── download_pretrained_models.sh ├── download_smpl_model.sh ├── download_t2m_evaluators.sh ├── instructions │ └── template_pretrain.json ├── smplh.sh └── merge_smplh_mano.py ├── configs ├── lm │ ├── default.yaml │ ├── gpt2_medium.yaml │ ├── t5_large.yaml │ └── t5_small.yaml ├── vq │ └── default.yaml ├── evaluator │ └── tm2t.yaml ├── render.yaml ├── assets.yaml ├── config_h3d_stage1.yaml ├── config_h3d_stage2.yaml ├── webui.yaml ├── config_h3d_stage3.yaml └── default.yaml ├── requirements.txt ├── setup.py ├── demos ├── pred.txt └── inbetween.txt ├── LICENSE ├── train.py ├── test.py └── render.py /mGPT/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mGPT/archs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mGPT/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mGPT/render/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mGPT/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mGPT/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mGPT/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseLosses 2 | -------------------------------------------------------------------------------- /mGPT/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseMetrics 2 | -------------------------------------------------------------------------------- /mGPT/render/blender/__init__.py: -------------------------------------------------------------------------------- 1 | from .render import render 2 | -------------------------------------------------------------------------------- /scripts/fit_motion.sh: -------------------------------------------------------------------------------- 1 | python -m fit --dir $1 --save_folder $2 --cuda True -------------------------------------------------------------------------------- /assets/meta/std.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/std.npy -------------------------------------------------------------------------------- /mGPT/data/humanml/README.md: -------------------------------------------------------------------------------- 1 | This code is based on https://github.com/EricGuo5513/text-to-motion.git -------------------------------------------------------------------------------- /mGPT/render/blender/data.py: -------------------------------------------------------------------------------- 1 | class Data: 2 | def __len__(self): 3 | return self.N 4 | -------------------------------------------------------------------------------- /assets/meta/mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/mean.npy -------------------------------------------------------------------------------- /assets/images/table7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/table7.png -------------------------------------------------------------------------------- /assets/images/table8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/table8.png -------------------------------------------------------------------------------- /assets/meta/std_eval.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/std_eval.npy -------------------------------------------------------------------------------- /assets/images/figure10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/figure10.png -------------------------------------------------------------------------------- /assets/images/figure12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/figure12.png -------------------------------------------------------------------------------- /assets/images/figure13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/figure13.png -------------------------------------------------------------------------------- /assets/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/pipeline.png -------------------------------------------------------------------------------- /assets/images/table15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/table15.png -------------------------------------------------------------------------------- /assets/meta/mean_eval.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/meta/mean_eval.npy -------------------------------------------------------------------------------- /assets/videos/example0.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example0.mp4 -------------------------------------------------------------------------------- /assets/videos/example1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example1.mp4 -------------------------------------------------------------------------------- /assets/videos/example2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example2.mp4 -------------------------------------------------------------------------------- /assets/videos/example3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example3.mp4 -------------------------------------------------------------------------------- /assets/videos/example4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example4.mp4 -------------------------------------------------------------------------------- /assets/videos/example5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example5.mp4 -------------------------------------------------------------------------------- /assets/videos/example6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example6.mp4 -------------------------------------------------------------------------------- /assets/videos/example7.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example7.mp4 -------------------------------------------------------------------------------- /assets/videos/example8.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example8.mp4 -------------------------------------------------------------------------------- /mGPT/data/transforms/joints2jfeats/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Joints2Jfeats 2 | from .rifke import Rifke 3 | -------------------------------------------------------------------------------- /assets/images/avatar_bot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/avatar_bot.jpg -------------------------------------------------------------------------------- /assets/images/avatar_user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/images/avatar_user.png -------------------------------------------------------------------------------- /assets/videos/example0_fast.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example0_fast.mp4 -------------------------------------------------------------------------------- /prepare/prepare_t5.sh: -------------------------------------------------------------------------------- 1 | cd deps/ 2 | git lfs install 3 | git clone https://huggingface.co/google/flan-t5-base 4 | cd .. 5 | -------------------------------------------------------------------------------- /assets/videos/example0_blender.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMotionLab/MotionGPT/HEAD/assets/videos/example0_blender.mp4 -------------------------------------------------------------------------------- /mGPT/data/transforms/rots2joints/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Rots2Joints 2 | from .smplh import SMPLH 3 | from .smplx import SMPLX 4 | -------------------------------------------------------------------------------- /prepare/requirements_render.txt: -------------------------------------------------------------------------------- 1 | # for rendering in blender python 2 | pytest-shutil 3 | matplotlib 4 | tqdm 5 | hydra-core 6 | six 7 | natsort 8 | smplx 9 | moviepy -------------------------------------------------------------------------------- /mGPT/data/transforms/rots2rfeats/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Rots2Rfeats 2 | # from .globvel import Globalvel 3 | 4 | from .globvelandy import Globalvelandy 5 | # from .rifeats import Rifeats 6 | -------------------------------------------------------------------------------- /mGPT/data/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .tensors import lengths_to_mask 2 | from .collate import collate_text_and_length, collate_pairs_and_text, collate_datastruct_and_text, collate_tensor_with_padding 3 | -------------------------------------------------------------------------------- /configs/lm/default.yaml: -------------------------------------------------------------------------------- 1 | target: mGPT.archs.mgpt_lm.MLM 2 | params: 3 | model_type: t5 4 | model_path: ./deps/flan-t5-base 5 | stage: ${TRAIN.STAGE} 6 | motion_codebook_size: ${model.params.codebook_size} 7 | ablation: ${ABLATION} 8 | -------------------------------------------------------------------------------- /configs/lm/gpt2_medium.yaml: -------------------------------------------------------------------------------- 1 | target: mGPT.archs.mgpt_lm.MLM 2 | params: 3 | model_type: gpt2 4 | model_path: openai/gpt2-medium 5 | stage: ${TRAIN.STAGE} 6 | motion_codebook_size: ${model.params.codebook_size} 7 | ablation: ${ABLATION} 8 | -------------------------------------------------------------------------------- /configs/lm/t5_large.yaml: -------------------------------------------------------------------------------- 1 | target: mGPT.archs.mgpt_lm.MLM 2 | params: 3 | model_type: t5 4 | model_path: google/flan-t5-large 5 | stage: ${TRAIN.STAGE} 6 | motion_codebook_size: ${model.params.codebook_size} 7 | ablation: ${ABLATION} 8 | -------------------------------------------------------------------------------- /configs/lm/t5_small.yaml: -------------------------------------------------------------------------------- 1 | target: mGPT.archs.mgpt_lm.MLM 2 | params: 3 | model_type: t5 4 | model_path: google/flan-t5-small 5 | stage: ${TRAIN.STAGE} 6 | motion_codebook_size: ${model.params.codebook_size} 7 | ablation: ${ABLATION} 8 | -------------------------------------------------------------------------------- /prepare/download_pretrained_models.sh: -------------------------------------------------------------------------------- 1 | mkdir -p checkpoints/ 2 | cd checkpoints/ 3 | echo -e "The pretrained models will stored in the 'checkpoints' folder\n" 4 | mkdir -p mld_humanml3d_checkpoint/ 5 | 6 | git lfs install 7 | git clone https://huggingface.co/OpenMotionLab/MotionGPT-base 8 | 9 | echo -e "Downloading done!" 10 | -------------------------------------------------------------------------------- /configs/vq/default.yaml: -------------------------------------------------------------------------------- 1 | target: mGPT.archs.mgpt_vq.VQVae 2 | params: 3 | quantizer: 'ema_reset' 4 | code_num: 512 5 | code_dim: 512 6 | output_emb_width: 512 7 | down_t: 2 8 | stride_t: 2 9 | width: 512 10 | depth: 3 11 | dilation_growth_rate: 3 12 | norm: None 13 | activation: 'relu' 14 | nfeats: ${DATASET.NFEATS} 15 | ablation: ${ABLATION} 16 | -------------------------------------------------------------------------------- /mGPT/models/build_model.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | from mGPT.config import instantiate_from_config 3 | 4 | def build_model(cfg, datamodule): 5 | model_config = OmegaConf.to_container(cfg.model, resolve=True) 6 | model_config['params']['cfg'] = cfg 7 | model_config['params']['datamodule'] = datamodule 8 | return instantiate_from_config(model_config) 9 | -------------------------------------------------------------------------------- /mGPT/data/humanml/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset_t2m import Text2MotionDataset 2 | from .dataset_t2m_eval import Text2MotionDatasetEval 3 | from .dataset_t2m_cb import Text2MotionDatasetCB 4 | from .dataset_t2m_token import Text2MotionDatasetToken 5 | from .dataset_t2m_m2t import Text2MotionDatasetM2T 6 | from .dataset_m import MotionDataset 7 | from .dataset_m_vq import MotionDatasetVQ 8 | -------------------------------------------------------------------------------- /prepare/download_smpl_model.sh: -------------------------------------------------------------------------------- 1 | mkdir -p deps/ 2 | cd deps/ 3 | 4 | echo "The smpl model will be stored in the './deps' folder" 5 | 6 | # SMPL Models 7 | echo "Downloading" 8 | gdown "https://drive.google.com/uc?id=1qrFkPZyRwRGd0Q3EY76K8oJaIgs_WK9i" 9 | echo "Extracting" 10 | tar xfzv smpl.tar.gz 11 | echo "Cleaning" 12 | rm smpl.tar.gz 13 | 14 | echo "Downloading done!" 15 | -------------------------------------------------------------------------------- /prepare/download_t2m_evaluators.sh: -------------------------------------------------------------------------------- 1 | mkdir -p deps/ 2 | cd deps/ 3 | 4 | echo "The t2m evaluators will be stored in the './deps' folder" 5 | 6 | # HumanAct12 poses 7 | echo "Downloading" 8 | gdown "https://drive.google.com/uc?id=1AYsmEG8I3fAAoraT4vau0GnesWBWyeT8" 9 | echo "Extracting" 10 | tar xfzv t2m.tar.gz 11 | echo "Cleaning" 12 | rm t2m.tar.gz 13 | 14 | echo "Downloading done!" 15 | -------------------------------------------------------------------------------- /mGPT/utils/fixseed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import random 4 | 5 | 6 | def fixseed(seed): 7 | random.seed(seed) 8 | np.random.seed(seed) 9 | torch.manual_seed(seed) 10 | 11 | 12 | SEED = 10 13 | EVALSEED = 0 14 | # Provoc warning: not fully functionnal yet 15 | # torch.set_deterministic(True) 16 | torch.backends.cudnn.benchmark = False 17 | 18 | fixseed(SEED) 19 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | pytorch_lightning 3 | torchmetrics 4 | omegaconf 5 | shortuuid 6 | chumpy 7 | transformers 8 | diffusers 9 | einops 10 | wandb 11 | rich 12 | matplotlib 13 | 14 | # for visualization 15 | smplx==0.1.28 16 | trimesh==3.9.24 17 | joblib==1.2.0 18 | h5py 19 | scikit-image 20 | spacy 21 | ftfy 22 | more-itertools 23 | natsort 24 | pyrender 25 | moviepy 26 | librosa 27 | triangle==20220202 28 | -------------------------------------------------------------------------------- /mGPT/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Transform 2 | from .smpl import SMPLTransform 3 | from .xyz import XYZTransform 4 | 5 | # rots2rfeats 6 | from .rots2rfeats import Rots2Rfeats 7 | from .rots2rfeats import Globalvelandy 8 | 9 | # rots2joints 10 | from .rots2joints import Rots2Joints 11 | from .rots2joints import SMPLH, SMPLX 12 | 13 | # joints2jfeats 14 | from .joints2jfeats import Joints2Jfeats 15 | from .joints2jfeats import Rifke 16 | -------------------------------------------------------------------------------- /mGPT/render/blender/vertices.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def prepare_vertices(vertices, canonicalize=True): 5 | data = vertices 6 | # Swap axis (gravity=Z instead of Y) 7 | # data = data[..., [2, 0, 1]] 8 | 9 | # Make left/right correct 10 | # data[..., [1]] = -data[..., [1]] 11 | 12 | # Center the first root to the first frame 13 | data -= data[[0], [0], :] 14 | 15 | # Remove the floor 16 | data[..., 2] -= np.min(data[..., 2]) 17 | return data 18 | -------------------------------------------------------------------------------- /mGPT/render/blender/sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def get_frameidx(*, mode, nframes, exact_frame, frames_to_keep): 4 | if mode == "sequence": 5 | frameidx = np.linspace(0, nframes - 1, frames_to_keep) 6 | frameidx = np.round(frameidx).astype(int) 7 | frameidx = list(frameidx) 8 | elif mode == "frame": 9 | index_frame = int(exact_frame*nframes) 10 | frameidx = [index_frame] 11 | elif mode == "video": 12 | frameidx = range(0, nframes) 13 | else: 14 | raise ValueError(f"Not support {mode} render mode") 15 | return frameidx 16 | -------------------------------------------------------------------------------- /configs/evaluator/tm2t.yaml: -------------------------------------------------------------------------------- 1 | t2m_textencoder: 2 | target: mGPT.archs.tm2t_evaluator.TextEncoderBiGRUCo 3 | params: 4 | word_size: 300 5 | pos_size: 15 6 | hidden_size: 512 7 | output_size: 512 8 | t2m_moveencoder: 9 | target: mGPT.archs.tm2t_evaluator.MovementConvEncoder 10 | params: 11 | input_size: ${eval:${DATASET.NFEATS} - 4} 12 | hidden_size: 512 13 | output_size: 512 14 | t2m_motionencoder: 15 | target: mGPT.archs.tm2t_evaluator.MotionEncoderBiGRUCo 16 | params: 17 | input_size: ${evaluator.tm2t.t2m_moveencoder.params.output_size} 18 | hidden_size: 1024 19 | output_size: 512 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | import numpy as np 6 | 7 | setup( 8 | name="MotionGPT", 9 | version="0.1.0", 10 | author="Biao Jiang and Xin Chen", 11 | author_email="jiangb22@m.fudan.edu.cn", 12 | description="MotionGPT: Human motion as a foreign language.", 13 | packages=find_packages(exclude=("configs", "deps")), 14 | python_requires=">=3.8", 15 | install_requires=[ 16 | "torch", 17 | "numpy", 18 | "tqdm", 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /configs/render.yaml: -------------------------------------------------------------------------------- 1 | NAME: '___render_do_not_need_name__' # Experiment name 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 3 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 4 | 5 | RENDER: 6 | FOLDER: '___no_need__' 7 | INPUT_MODE: 'npy' 8 | DIR: '' 9 | NPY: '___no_need__' 10 | DENOISING: True 11 | OLDRENDER: True 12 | # ["ultra", "high", "med", "low"] 13 | # RES: 'high' 14 | RES: 'med' 15 | DOWNSAMPLE: False 16 | FPS: 20.0 17 | CANONICALIZE: True 18 | EXACT_FRAME: 0.5 19 | NUM: 8 20 | MODE: '___no_need__' #sequence frame video 21 | VID_EXT: mp4 22 | ALWAYS_ON_FLOOR: false 23 | GT: false 24 | -------------------------------------------------------------------------------- /scripts/visualize_motion.sh: -------------------------------------------------------------------------------- 1 | # for npy folder 2 | # CUDA_VISIBLE_DEVICES=0 /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --cfg=./configs/render.yaml --dir=$1 --mode=$2 3 | 4 | for j in `seq 0 2` 5 | do 6 | CUDA_VISIBLE_DEVICES=0 /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --dir=$1 --mode=$2 7 | done 8 | 9 | # for single npy 10 | # /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --cfg=./configs/render_cx.yaml --npy=$1 --joint_type=HumanML3D 11 | -------------------------------------------------------------------------------- /scripts/visualize_motion_parallel.sh: -------------------------------------------------------------------------------- 1 | # # parallel fit 2 | # for i in `seq 0 7` 3 | # do 4 | # for j in `seq 0 2` 5 | # do 6 | # CUDA_VISIBLE_DEVICES=$i python -m fit --dir $1 --save_folder $2 --cuda True & 7 | # echo $j & 8 | # done 9 | # done 10 | # wait 11 | # echo "all weakup" 12 | 13 | 14 | # parallel render 15 | for i in `seq 0 7` 16 | do 17 | for j in `seq 0 2` 18 | do 19 | sleep 1 & 20 | CUDA_VISIBLE_DEVICES=$i /apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender --background --python render.py -- --dir=$1 --mode=$2 & 21 | echo $i 22 | done 23 | done 24 | wait 25 | echo "all weakup" 26 | -------------------------------------------------------------------------------- /mGPT/data/build_data.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | from os.path import join as pjoin 3 | from mGPT.config import instantiate_from_config 4 | 5 | 6 | def build_data(cfg, phase="train"): 7 | data_config = OmegaConf.to_container(cfg.DATASET, resolve=True) 8 | data_config['params'] = {'cfg': cfg, 'phase': phase} 9 | if isinstance(data_config['target'], str): 10 | return instantiate_from_config(data_config) 11 | elif isinstance(data_config['target'], list): 12 | data_config_tmp = data_config.copy() 13 | data_config_tmp['params']['dataModules'] = data_config['target'] 14 | data_config_tmp['target'] = 'mGPT.data.Concat.ConcatDataModule' 15 | return instantiate_from_config(data_config) 16 | -------------------------------------------------------------------------------- /scripts/fit_motion_parallel.sh: -------------------------------------------------------------------------------- 1 | # parallel render 2 | for i in `seq 0 7` 3 | do 4 | for j in `seq 0 1` 5 | do 6 | CUDA_VISIBLE_DEVICES=$i python -m fit --dir $1 --save_folder $2 --cuda True & 7 | echo $j & 8 | done 9 | done 10 | 11 | wait 12 | echo "all weakup" 13 | 14 | # # parallel render 15 | # for i in `seq 0 25` 16 | # do 17 | # CUDA_VISIBLE_DEVICES=$3 python -m fit --dir $1 --save_folder $2 --cuda True & 18 | # echo $i 19 | # done 20 | # wait 21 | # echo "all weakup" 22 | 23 | 24 | # # gpu parallel render 25 | # for i in `seq 0 7` 26 | # do 27 | # CUDA_VISIBLE_DEVICES=$i python -m fit --dir $1 --save_folder $2 --cuda True & 28 | # echo $i 29 | # done 30 | # wait 31 | # echo "all weakup" 32 | -------------------------------------------------------------------------------- /mGPT/utils/misc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def to_numpy(tensor): 5 | if torch.is_tensor(tensor): 6 | return tensor.cpu().numpy() 7 | elif type(tensor).__module__ != 'numpy': 8 | raise ValueError("Cannot convert {} to numpy array".format( 9 | type(tensor))) 10 | return tensor 11 | 12 | 13 | def to_torch(ndarray): 14 | if type(ndarray).__module__ == 'numpy': 15 | return torch.from_numpy(ndarray) 16 | elif not torch.is_tensor(ndarray): 17 | raise ValueError("Cannot convert {} to torch tensor".format( 18 | type(ndarray))) 19 | return ndarray 20 | 21 | 22 | def cleanexit(): 23 | import sys 24 | import os 25 | try: 26 | sys.exit(0) 27 | except SystemExit: 28 | os._exit(0) 29 | 30 | -------------------------------------------------------------------------------- /mGPT/utils/sample_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | logger = logging.getLogger(__name__) 4 | 5 | def cfg_mean_nsamples_resolution(cfg): 6 | if cfg.mean and cfg.number_of_samples > 1: 7 | logger.error("All the samples will be the mean.. cfg.number_of_samples=1 will be forced.") 8 | cfg.number_of_samples = 1 9 | 10 | return cfg.number_of_samples == 1 11 | 12 | 13 | def get_path(sample_path: Path, is_amass: bool, gender: str, split: str, onesample: bool, mean: bool, fact: float): 14 | extra_str = ("_mean" if mean else "") if onesample else "_multi" 15 | fact_str = "" if fact == 1 else f"{fact}_" 16 | gender_str = gender + "_" if is_amass else "" 17 | path = sample_path / f"{fact_str}{gender_str}{split}{extra_str}" 18 | return path 19 | -------------------------------------------------------------------------------- /demos/pred.txt: -------------------------------------------------------------------------------- 1 | Predict motion: #datasets/humanml3d/new_joint_vecs/M009643.npy 2 | Predict motion: #datasets/humanml3d/new_joint_vecs/M008862.npy 3 | Predict motion: #datasets/humanml3d/new_joint_vecs/004163.npy 4 | Predict motion: #datasets/humanml3d/new_joint_vecs/005166.npy 5 | Predict motion: #datasets/humanml3d/new_joint_vecs/009181.npy 6 | Predict motion: #datasets/humanml3d/new_joint_vecs/000179.npy 7 | Predict motion: #datasets/humanml3d/new_joint_vecs/013811.npy 8 | Predict motion: #datasets/humanml3d/new_joint_vecs/000556.npy 9 | Predict motion: #datasets/humanml3d/new_joint_vecs/010967.npy 10 | Predict motion: #datasets/humanml3d/new_joint_vecs/007567.npy 11 | -------------------------------------------------------------------------------- /prepare/instructions/template_pretrain.json: -------------------------------------------------------------------------------- 1 | { 2 | "Text-to-Motion": { 3 | "t2m": { 4 | "class": "t2m", 5 | "input": [ 6 | "" 7 | ], 8 | "output": [ 9 | "" 10 | ] 11 | } 12 | }, 13 | "Motion-to-Text": { 14 | "m2t": { 15 | "class": "m2t", 16 | "input": [ 17 | "" 18 | ], 19 | "output": [ 20 | "" 21 | ] 22 | } 23 | }, 24 | "Motion Prediction": { 25 | "pred": { 26 | "class": "predict", 27 | "input": [ 28 | "Predict motion: " 29 | ], 30 | "output": [ 31 | "" 32 | ] 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /configs/assets.yaml: -------------------------------------------------------------------------------- 1 | CONFIG_FOLDER: configs # Config files path 2 | FOLDER: experiments # Experiment files saving path 3 | 4 | TEST: 5 | FOLDER: results # Testing files saving path 6 | 7 | DATASET: 8 | TASK_ROOT: deps/mGPT_instructions 9 | SMPL_PATH: deps/smpl 10 | TRANSFORM_PATH: deps/transforms/ 11 | WORD_VERTILIZER_PATH: deps/glove/ 12 | KIT: 13 | ROOT: datasets/kit-ml # KIT directory 14 | SPLIT_ROOT: datasets/kit-ml # KIT splits directory 15 | MEAN_STD_PATH: deps/t2m/ 16 | HUMANML3D: 17 | ROOT: datasets/humanml3d # HumanML3D directory 18 | SPLIT_ROOT: datasets/humanml3d # HumanML3D splits directory 19 | MEAN_STD_PATH: deps/t2m/ 20 | 21 | METRIC: 22 | TM2T: 23 | t2m_path: deps/t2m/ # path for tm2t evaluator 24 | 25 | model: 26 | whisper_path: deps/whisper-large-v2 # path for whisper model, webui only 27 | 28 | RENDER: 29 | BLENDER_PATH: libs/blender-2.93.2-linux-x64/blender 30 | SMPL_MODEL_PATH: deps/smpl/smpl_models/smpl 31 | MODEL_PATH: deps/smpl/smpl_models/ 32 | FACES_PATH: deps/smplh/smplh.faces 33 | -------------------------------------------------------------------------------- /mGPT/models/utils/position_encoding_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class PositionalEncoding(nn.Module): 7 | 8 | def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False): 9 | super().__init__() 10 | self.batch_first = batch_first 11 | 12 | self.dropout = nn.Dropout(p=dropout) 13 | 14 | pe = torch.zeros(max_len, d_model) 15 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 16 | div_term = torch.exp(torch.arange( 17 | 0, d_model, 2).float() * (-np.log(10000.0) / d_model)) 18 | pe[:, 0::2] = torch.sin(position * div_term) 19 | pe[:, 1::2] = torch.cos(position * div_term) 20 | pe = pe.unsqueeze(0).transpose(0, 1) 21 | 22 | self.register_buffer("pe", pe) 23 | 24 | def forward(self, x): 25 | # not used in the final model 26 | if self.batch_first: 27 | x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :] 28 | else: 29 | x = x + self.pe[: x.shape[0], :] 30 | return self.dropout(x) 31 | -------------------------------------------------------------------------------- /prepare/smplh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Extraction of the archives" 3 | echo 4 | 5 | cd deps/smplh 6 | mkdir tmp 7 | cd tmp 8 | 9 | tar xfv ../smplh.tar.xz 10 | unzip ../mano_v1_2.zip 11 | 12 | cd ../../../ 13 | echo 14 | echo "Done!" 15 | echo 16 | echo "Clean and merge models" 17 | echo 18 | 19 | python prepare/merge_smplh_mano.py --smplh-fn deps/smplh/tmp/male/model.npz --mano-left-fn deps/smplh/tmp/mano_v1_2/models/MANO_LEFT.pkl --mano-right-fn deps/smplh/tmp/mano_v1_2/models/MANO_RIGHT.pkl --output-folder deps/smplh/ 20 | 21 | python prepare/merge_smplh_mano.py --smplh-fn deps/smplh/tmp/female/model.npz --mano-left-fn deps/smplh/tmp/mano_v1_2/models/MANO_LEFT.pkl --mano-right-fn deps/smplh/tmp/mano_v1_2/models/MANO_RIGHT.pkl --output-folder deps/smplh/ 22 | 23 | python prepare/merge_smplh_mano.py --smplh-fn deps/smplh/tmp/neutral/model.npz --mano-left-fn deps/smplh/tmp/mano_v1_2/models/MANO_LEFT.pkl --mano-right-fn deps/smplh/tmp/mano_v1_2/models/MANO_RIGHT.pkl --output-folder deps/smplh/ 24 | 25 | echo 26 | echo "Done!" 27 | echo 28 | echo "Deleting tmp files" 29 | rm -rf deps/smplh/tmp/ 30 | echo 31 | echo "Done!" 32 | -------------------------------------------------------------------------------- /mGPT/data/tools/tensors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import List, Dict 18 | import torch 19 | from torch import Tensor 20 | 21 | 22 | def lengths_to_mask(lengths: List[int], device: torch.device) -> Tensor: 23 | lengths = torch.tensor(lengths, device=device) 24 | max_len = max(lengths) 25 | mask = torch.arange(max_len, device=device).expand(len(lengths), max_len) < lengths.unsqueeze(1) 26 | return mask 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 OpenMotionLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mGPT/models/utils/tools.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | def remove_padding(tensors, lengths): 4 | return [tensor[:tensor_length] for tensor, tensor_length in zip(tensors, lengths)] 5 | 6 | class AutoParams(nn.Module): 7 | def __init__(self, **kargs): 8 | try: 9 | for param in self.needed_params: 10 | if param in kargs: 11 | setattr(self, param, kargs[param]) 12 | else: 13 | raise ValueError(f"{param} is needed.") 14 | except : 15 | pass 16 | 17 | try: 18 | for param, default in self.optional_params.items(): 19 | if param in kargs and kargs[param] is not None: 20 | setattr(self, param, kargs[param]) 21 | else: 22 | setattr(self, param, default) 23 | except : 24 | pass 25 | super().__init__() 26 | 27 | 28 | # taken from joeynmt repo 29 | def freeze_params(module: nn.Module) -> None: 30 | """ 31 | Freeze the parameters of this module, 32 | i.e. do not update them during training 33 | 34 | :param module: freeze parameters of this module 35 | """ 36 | for _, p in module.named_parameters(): 37 | p.requires_grad = False 38 | -------------------------------------------------------------------------------- /mGPT/utils/load_checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def load_pretrained(cfg, model, logger=None, phase="train"): 4 | if logger is not None: 5 | logger.info(f"Loading pretrain model from {cfg.TRAIN.PRETRAINED}") 6 | 7 | if phase == "train": 8 | ckpt_path = cfg.TRAIN.PRETRAINED 9 | elif phase == "test": 10 | ckpt_path = cfg.TEST.CHECKPOINTS 11 | 12 | state_dict = torch.load(ckpt_path, map_location="cpu")["state_dict"] 13 | model.load_state_dict(state_dict, strict=True) 14 | return model 15 | 16 | 17 | def load_pretrained_vae(cfg, model, logger=None): 18 | state_dict = torch.load(cfg.TRAIN.PRETRAINED_VAE, 19 | map_location="cpu")['state_dict'] 20 | if logger is not None: 21 | logger.info(f"Loading pretrain vae from {cfg.TRAIN.PRETRAINED_VAE}") 22 | 23 | # Extract encoder/decoder 24 | from collections import OrderedDict 25 | vae_dict = OrderedDict() 26 | for k, v in state_dict.items(): 27 | if "motion_vae" in k: 28 | name = k.replace("motion_vae.", "") 29 | vae_dict[name] = v 30 | elif "vae" in k: 31 | name = k.replace("vae.", "") 32 | vae_dict[name] = v 33 | if hasattr(model, 'vae'): 34 | model.vae.load_state_dict(vae_dict, strict=True) 35 | else: 36 | model.motion_vae.load_state_dict(vae_dict, strict=True) 37 | 38 | return model 39 | -------------------------------------------------------------------------------- /mGPT/data/transforms/identity.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import Optional 18 | from torch import Tensor 19 | 20 | from .base import Datastruct, dataclass, Transform 21 | 22 | 23 | class IdentityTransform(Transform): 24 | def __init__(self, **kwargs): 25 | return 26 | 27 | def Datastruct(self, **kwargs): 28 | return IdentityDatastruct(**kwargs) 29 | 30 | def __repr__(self): 31 | return "IdentityTransform()" 32 | 33 | 34 | @dataclass 35 | class IdentityDatastruct(Datastruct): 36 | transforms: IdentityTransform 37 | 38 | features: Optional[Tensor] = None 39 | 40 | def __post_init__(self): 41 | self.datakeys = ["features"] 42 | 43 | def __len__(self): 44 | return len(self.rfeats) 45 | -------------------------------------------------------------------------------- /mGPT/render/blender/camera.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | 3 | 4 | class Camera: 5 | def __init__(self, *, first_root, mode, is_mesh): 6 | camera = bpy.data.objects['Camera'] 7 | 8 | ## initial position 9 | camera.location.x = 7.36 10 | camera.location.y = -6.93 11 | if is_mesh: 12 | # camera.location.z = 5.45 13 | camera.location.z = 5.6 14 | else: 15 | camera.location.z = 5.2 16 | 17 | # wider point of view 18 | if mode == "sequence": 19 | if is_mesh: 20 | camera.data.lens = 65 21 | else: 22 | camera.data.lens = 85 23 | elif mode == "frame": 24 | if is_mesh: 25 | camera.data.lens = 130 26 | else: 27 | camera.data.lens = 85 28 | elif mode == "video": 29 | if is_mesh: 30 | camera.data.lens = 110 31 | else: 32 | # avoid cutting person 33 | camera.data.lens = 85 34 | # camera.data.lens = 140 35 | 36 | # camera.location.x += 0.75 37 | 38 | self.mode = mode 39 | self.camera = camera 40 | 41 | self.camera.location.x += first_root[0] 42 | self.camera.location.y += first_root[1] 43 | 44 | self._root = first_root 45 | 46 | def update(self, newroot): 47 | delta_root = newroot - self._root 48 | 49 | self.camera.location.x += delta_root[0] 50 | self.camera.location.y += delta_root[1] 51 | 52 | self._root = newroot 53 | -------------------------------------------------------------------------------- /scripts/plys2npy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from argparse import ArgumentParser 4 | from pathlib import Path 5 | 6 | import natsort 7 | import numpy as np 8 | import torch 9 | import trimesh 10 | from tqdm import tqdm 11 | 12 | 13 | def main(): 14 | parser = ArgumentParser() 15 | 16 | group = parser.add_argument_group("Params") 17 | group.add_argument( 18 | "--ply_dir", 19 | type=str, 20 | required=True, 21 | help="ply set", 22 | ) 23 | group.add_argument( 24 | "--out_dir", 25 | type=str, 26 | required=True, 27 | help="output folder", 28 | ) 29 | params = parser.parse_args() 30 | plys2npy(params.ply_dir, params.out_dir) 31 | 32 | def plys2npy(ply_dir, out_dir): 33 | ply_dir = Path(ply_dir) 34 | paths = [] 35 | file_list = natsort.natsorted(os.listdir(ply_dir)) 36 | for item in file_list: 37 | if item.endswith(".ply") and not item.endswith("_gt.ply"): 38 | paths.append(os.path.join(ply_dir, item)) 39 | 40 | 41 | meshs = np.zeros((len(paths), 6890, 3)) 42 | for i, path in enumerate(paths): 43 | mesh = trimesh.load_mesh(path, process=False) 44 | vs = mesh.vertices 45 | assert vs.shape == (6890, 3) 46 | meshs[i] = vs 47 | 48 | basename = os.path.basename(ply_dir) 49 | if basename.startswith("SMPLFit_"): 50 | basename = basename[len("SMPLFit_"):] 51 | file_name = os.path.join(out_dir, basename+ "_mesh.npy") 52 | np.save(file_name, meshs) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /configs/config_h3d_stage1.yaml: -------------------------------------------------------------------------------- 1 | NAME: VQVAE_HumanML3D # Experiment names 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 3 | NUM_NODES: 1 # Number of GPU nodes for distributed training 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 5 | 6 | TRAIN: 7 | #--------------------------------- 8 | STAGE: vae # stage "vae" , "lm_pretrain", "lm_instruct" 9 | #--------------------------------- 10 | NUM_WORKERS: 16 # Number of workers 11 | BATCH_SIZE: 256 # Size of batches 12 | END_EPOCH: 999999 # End epoch 13 | RESUME: '' # Resume training from this path 14 | PRETRAINED: '' # Preatrained model path 15 | 16 | OPTIM: 17 | target: AdamW 18 | params: 19 | lr: 2e-4 20 | betas: [0.9, 0.99] 21 | weight_decay: 0.0 22 | 23 | # Evaluating Configuration 24 | EVAL: 25 | BATCH_SIZE: 32 # Evaluating Batch size 26 | SPLIT: test 27 | 28 | TEST: 29 | CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar 30 | SPLIT: test 31 | BATCH_SIZE: 32 # training Batch size 32 | 33 | DATASET: 34 | target: mGPT.data.HumanML3D.HumanML3DDataModule 35 | 36 | METRIC: 37 | TYPE: ['TM2TMetrics', 'MRMetrics'] 38 | 39 | LOSS: 40 | LAMBDA_FEATURE: 1.0 41 | LAMBDA_VELOCITY: 0.5 42 | LAMBDA_COMMIT: 0.02 43 | LAMBDA_CLS: 1.0 44 | ABLATION: 45 | RECONS_LOSS: 'l1_smooth' 46 | 47 | model: 48 | target: mGPT.models.mgpt.MotionGPT 49 | params: 50 | condition: 'text' 51 | task: 't2m' 52 | lm: ${lm.default} 53 | motion_vae: ${vq.default} 54 | 55 | LOGGER: 56 | TYPE: ['tensorboard', 'wandb'] 57 | VAL_EVERY_STEPS: 10 58 | WANDB: 59 | params: 60 | project: motiongpt 61 | -------------------------------------------------------------------------------- /configs/config_h3d_stage2.yaml: -------------------------------------------------------------------------------- 1 | NAME: Pretrain_HumanML3D # Experiment names 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 3 | NUM_NODES: 1 # Number of GPU nodes for distributed training 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 5 | 6 | TRAIN: 7 | #--------------------------------- 8 | STAGE: lm_pretrain # stage "vae" , "lm_pretrain", "lm_instruct" 9 | #--------------------------------- 10 | NUM_WORKERS: 16 # Number of workers 11 | BATCH_SIZE: 16 # Size of batches 12 | END_EPOCH: 999999 # End epoch 13 | RESUME: '' # Resume training from this path 14 | PRETRAINED: '' # Preatrained model path 15 | PRETRAINED_VAE: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar # Vae model path 16 | 17 | OPTIM: 18 | target: AdamW 19 | params: 20 | lr: 2e-4 21 | betas: [0.9, 0.99] 22 | weight_decay: 0.0 23 | 24 | # Evaluating Configuration 25 | EVAL: 26 | BATCH_SIZE: 32 # Evaluating Batch size 27 | SPLIT: test 28 | 29 | TEST: 30 | CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar 31 | SPLIT: test 32 | BATCH_SIZE: 32 # training Batch size 33 | 34 | DATASET: 35 | target: mGPT.data.HumanML3D.HumanML3DDataModule 36 | CODE_PATH: TOKENS 37 | 38 | METRIC: 39 | TYPE: ['TM2TMetrics', 'PredMetrics'] 40 | 41 | LOSS: 42 | LAMBDA_FEATURE: 1.0 43 | LAMBDA_VELOCITY: 0.5 44 | LAMBDA_COMMIT: 0.02 45 | LAMBDA_CLS: 1.0 46 | ABLATION: 47 | RECONS_LOSS: 'l1_smooth' 48 | 49 | model: 50 | target: mGPT.models.mgpt.MotionGPT 51 | params: 52 | condition: 'text' 53 | task: 't2m' 54 | lm: ${lm.default} 55 | motion_vae: ${vq.default} 56 | 57 | LOGGER: 58 | TYPE: ['tensorboard', 'wandb'] 59 | VAL_EVERY_STEPS: 10 60 | WANDB: 61 | params: 62 | project: motiongpt 63 | -------------------------------------------------------------------------------- /mGPT/metrics/base.py: -------------------------------------------------------------------------------- 1 | from torch import Tensor, nn 2 | from os.path import join as pjoin 3 | from .mr import MRMetrics 4 | from .t2m import TM2TMetrics 5 | from .mm import MMMetrics 6 | from .m2t import M2TMetrics 7 | from .m2m import PredMetrics 8 | 9 | 10 | class BaseMetrics(nn.Module): 11 | def __init__(self, cfg, datamodule, debug, **kwargs) -> None: 12 | super().__init__() 13 | 14 | njoints = datamodule.njoints 15 | 16 | data_name = datamodule.name 17 | if data_name in ["humanml3d", "kit"]: 18 | self.TM2TMetrics = TM2TMetrics( 19 | cfg=cfg, 20 | dataname=data_name, 21 | diversity_times=30 if debug else cfg.METRIC.DIVERSITY_TIMES, 22 | dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP, 23 | ) 24 | self.M2TMetrics = M2TMetrics( 25 | cfg=cfg, 26 | w_vectorizer=datamodule.hparams.w_vectorizer, 27 | diversity_times=30 if debug else cfg.METRIC.DIVERSITY_TIMES, 28 | dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP) 29 | self.MMMetrics = MMMetrics( 30 | cfg=cfg, 31 | mm_num_times=cfg.METRIC.MM_NUM_TIMES, 32 | dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP, 33 | ) 34 | 35 | self.MRMetrics = MRMetrics( 36 | njoints=njoints, 37 | jointstype=cfg.DATASET.JOINT_TYPE, 38 | dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP, 39 | ) 40 | self.PredMetrics = PredMetrics( 41 | cfg=cfg, 42 | njoints=njoints, 43 | jointstype=cfg.DATASET.JOINT_TYPE, 44 | dist_sync_on_step=cfg.METRIC.DIST_SYNC_ON_STEP, 45 | task=cfg.model.params.task, 46 | ) 47 | -------------------------------------------------------------------------------- /mGPT/render/blender/tools.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | import numpy as np 3 | 4 | 5 | def style_detect(data): 6 | is_mesh = False 7 | is_smplx = False 8 | jointstyle = 'mmm' 9 | # heuristic 10 | if data.shape[1] > 1000: 11 | is_mesh = True 12 | if data.shape[1] == 10475: 13 | is_smplx = True 14 | if data.shape[1] == 22: 15 | jointstyle = 'humanml3d' 16 | 17 | return is_mesh, is_smplx, jointstyle 18 | 19 | 20 | 21 | # see this for more explanation 22 | # https://gist.github.com/iyadahmed/7c7c0fae03c40bd87e75dc7059e35377 23 | # This should be solved with new version of blender 24 | class ndarray_pydata(np.ndarray): 25 | def __bool__(self) -> bool: 26 | return len(self) > 0 27 | 28 | 29 | def load_numpy_vertices_into_blender(vertices, faces, name, mat): 30 | mesh = bpy.data.meshes.new(name) 31 | mesh.from_pydata(vertices, [], faces.view(ndarray_pydata)) 32 | mesh.validate() 33 | 34 | obj = bpy.data.objects.new(name, mesh) 35 | bpy.context.scene.collection.objects.link(obj) 36 | 37 | bpy.ops.object.select_all(action='DESELECT') 38 | obj.select_set(True) 39 | obj.active_material = mat 40 | bpy.context.view_layer.objects.active = obj 41 | bpy.ops.object.shade_smooth() 42 | bpy.ops.object.select_all(action='DESELECT') 43 | return True 44 | 45 | 46 | def delete_objs(names): 47 | if not isinstance(names, list): 48 | names = [names] 49 | # bpy.ops.object.mode_set(mode='OBJECT') 50 | bpy.ops.object.select_all(action='DESELECT') 51 | for obj in bpy.context.scene.objects: 52 | for name in names: 53 | if obj.name.startswith(name) or obj.name.endswith(name): 54 | obj.select_set(True) 55 | bpy.ops.object.delete() 56 | bpy.ops.object.select_all(action='DESELECT') 57 | -------------------------------------------------------------------------------- /configs/webui.yaml: -------------------------------------------------------------------------------- 1 | NAME: Webui # Experiment name 2 | DEBUG: False # Debug mode 3 | ACCELERATOR: 'cpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 5 | 6 | # Training configuration 7 | TRAIN: 8 | #--------------------------------- 9 | STAGE: lm_instruct 10 | NUM_WORKERS: 32 # Number of workers 11 | BATCH_SIZE: 16 # Size of batches 12 | START_EPOCH: 0 # Start epochMMOTIONENCODER 13 | END_EPOCH: 99999 # End epoch 14 | ABLATION: 15 | pkeep: 0.5 16 | OPTIM: 17 | TYPE: AdamW # Optimizer type 18 | LR: 2e-4 # Learning rate 19 | WEIGHT_DECAY: 0.0 20 | LR_SCHEDULER: [100, 200, 300, 400] 21 | GAMMA: 0.8 22 | 23 | # Evaluating Configuration 24 | EVAL: 25 | BATCH_SIZE: 32 # Evaluating Batch size 26 | SPLIT: test 27 | 28 | # Test Configuration 29 | TEST: 30 | CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar 31 | SPLIT: test 32 | BATCH_SIZE: 32 # training Batch size 33 | MEAN: False 34 | NUM_SAMPLES: 1 35 | FACT: 1 36 | 37 | # Datasets Configuration 38 | DATASET: 39 | target: mGPT.data.webui.HumanML3DDataModule 40 | 41 | METRIC: 42 | TYPE: ['TM2TMetrics'] 43 | # Losses Configuration 44 | LOSS: 45 | TYPE: t2mgpt # Losses type 46 | LAMBDA_FEATURE: 1.0 47 | LAMBDA_VELOCITY: 0.5 48 | LAMBDA_COMMIT: 0.02 49 | LAMBDA_CLS: 1.0 50 | LAMBDA_M2T2M: 1.0 51 | LAMBDA_T2M2T: 10.0 52 | ABLATION: 53 | RECONS_LOSS: 'l1_smooth' 54 | 55 | # Model Configuration 56 | model: 57 | target: mGPT.models.mgpt_webui.MotionGPT 58 | params: 59 | condition: 'text' 60 | task: 't2m' 61 | lm: ${lm.default} 62 | motion_vae: ${vq.default} 63 | 64 | # Logger configuration 65 | LOGGER: 66 | LOG_EVERY_STEPS: 5 67 | VAL_EVERY_STEPS: 10 68 | TENSORBOARD: True 69 | wandb: 70 | params: 71 | project: null 72 | -------------------------------------------------------------------------------- /mGPT/data/humanml/dataset_m_vq.py: -------------------------------------------------------------------------------- 1 | import random 2 | import codecs as cs 3 | import numpy as np 4 | from torch.utils import data 5 | from rich.progress import track 6 | from os.path import join as pjoin 7 | from .dataset_m import MotionDataset 8 | from .dataset_t2m import Text2MotionDataset 9 | 10 | 11 | class MotionDatasetVQ(Text2MotionDataset): 12 | def __init__( 13 | self, 14 | data_root, 15 | split, 16 | mean, 17 | std, 18 | max_motion_length, 19 | min_motion_length, 20 | win_size, 21 | unit_length=4, 22 | fps=20, 23 | tmpFile=True, 24 | tiny=False, 25 | debug=False, 26 | **kwargs, 27 | ): 28 | super().__init__(data_root, split, mean, std, max_motion_length, 29 | min_motion_length, unit_length, fps, tmpFile, tiny, 30 | debug, **kwargs) 31 | 32 | # Filter out the motions that are too short 33 | self.window_size = win_size 34 | name_list = list(self.name_list) 35 | for name in self.name_list: 36 | motion = self.data_dict[name]["motion"] 37 | if motion.shape[0] < self.window_size: 38 | name_list.remove(name) 39 | self.data_dict.pop(name) 40 | self.name_list = name_list 41 | 42 | def __len__(self): 43 | return len(self.name_list) 44 | 45 | def __getitem__(self, item): 46 | idx = self.pointer + item 47 | data = self.data_dict[self.name_list[idx]] 48 | motion, length = data["motion"], data["length"] 49 | 50 | idx = random.randint(0, motion.shape[0] - self.window_size) 51 | motion = motion[idx:idx + self.window_size] 52 | motion = (motion - self.mean) / self.std 53 | 54 | return None, motion, length, None, None, None, None, 55 | -------------------------------------------------------------------------------- /configs/config_h3d_stage3.yaml: -------------------------------------------------------------------------------- 1 | NAME: Instruct_HumanML3D # Experiment names 2 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 3 | NUM_NODES: 1 # Number of GPU nodes for distributed training 4 | DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 5 | 6 | TRAIN: 7 | #--------------------------------- 8 | STAGE: lm_instruct # stage "vae" , "lm_pretrain", "lm_instruct" 9 | #--------------------------------- 10 | NUM_WORKERS: 16 # Number of workers 11 | BATCH_SIZE: 16 # Size of batches 12 | END_EPOCH: 999999 # End epoch 13 | RESUME: '' # Resume training from this path 14 | PRETRAINED: '' # Preatrained model path 15 | PRETRAINED_VAE: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar # Vae model path 16 | 17 | OPTIM: 18 | target: AdamW 19 | params: 20 | lr: 1e-4 21 | betas: [0.9, 0.99] 22 | weight_decay: 0.0 23 | 24 | # Evaluating Configuration 25 | EVAL: 26 | BATCH_SIZE: 32 # Evaluating Batch size 27 | SPLIT: test 28 | 29 | TEST: 30 | CHECKPOINTS: checkpoints/MotionGPT-base/motiongpt_s3_h3d.tar 31 | # CHECKPOINTS: /apdcephfs/share_1227775/billljiang/memData/experiments/gpt0/0514_Instruct_T5BASE_HumanML3D/checkpoints/epoch=119.tar 32 | SPLIT: test 33 | BATCH_SIZE: 32 # training Batch size 34 | 35 | DATASET: 36 | target: mGPT.data.HumanML3D.HumanML3DDataModule 37 | CODE_PATH: TOKENS 38 | 39 | METRIC: 40 | TYPE: ['TM2TMetrics', 'PredMetrics'] 41 | 42 | LOSS: 43 | LAMBDA_FEATURE: 1.0 44 | LAMBDA_VELOCITY: 0.5 45 | LAMBDA_COMMIT: 0.02 46 | LAMBDA_CLS: 1.0 47 | ABLATION: 48 | RECONS_LOSS: 'l1_smooth' 49 | 50 | model: 51 | target: mGPT.models.mgpt.MotionGPT 52 | params: 53 | condition: 'text' 54 | task: 't2m' 55 | lm: ${lm.default} 56 | motion_vae: ${vq.default} 57 | 58 | LOGGER: 59 | TYPE: ['tensorboard', 'wandb'] 60 | VAL_EVERY_STEPS: 10 61 | WANDB: 62 | params: 63 | project: mem 64 | -------------------------------------------------------------------------------- /mGPT/render/pyrender/j3ds_render_smpl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | from scripts.hybrik_loc2rot import HybrIKJointsToRotmat 5 | from scripts.pyrender import SMPLRender 6 | import cv2 7 | from scipy.spatial.transform import Rotation as RRR 8 | 9 | parser = argparse.ArgumentParser( 10 | description='Render a SMPL video by a j3ds npy file.') 11 | parser.add_argument('--input', type=str, default='', help='the npy file path') 12 | parser.add_argument('--render', 13 | type=int, 14 | default=1, 15 | help='render the video if 1') 16 | args = parser.parse_args() 17 | 18 | input_path = args.input 19 | output_npy_path = args.input.replace('.npy', '_pose.npy') 20 | data = np.load(input_path) 21 | data = data - data[0, 0] 22 | pose_generator = HybrIKJointsToRotmat() 23 | pose = pose_generator(data) 24 | pose = np.concatenate( 25 | [pose, np.stack([np.stack([np.eye(3)] * pose.shape[0], 0)] * 2, 1)], 1) 26 | np.save(output_npy_path, pose) 27 | shape = [768, 768] 28 | if args.render: 29 | render = SMPLRender() 30 | output_mp4_path = args.input.replace('.npy', '_smpl.mp4') 31 | os.environ['PYOPENGL_PLATFORM'] = 'egl' 32 | size = (shape[1], shape[0]) 33 | fps = 30.0 34 | fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V') 35 | videoWriter = cv2.VideoWriter(output_mp4_path, fourcc, fps, size) 36 | r = RRR.from_rotvec(np.array([np.pi, 0.0, 0.0])) 37 | pose[:, 0] = np.matmul(r.as_matrix().reshape(1, 3, 3), pose[:, 0]) 38 | for i in range(data.shape[0]): 39 | img = np.zeros([shape[0], shape[1], 3]) 40 | aroot = data[[i], 0] + np.array([[0.0, 0.0, 30.0]]) 41 | aroot[:, 1] = -aroot[:, 1] 42 | params = dict(pred_shape=np.zeros([1, 10]), 43 | pred_root=aroot, 44 | pred_pose=pose[[i]]) 45 | renderImg = render.render(img.copy(), params) 46 | renderImg = (renderImg * 255).astype(np.uint8) 47 | videoWriter.write(renderImg) 48 | videoWriter.release() 49 | -------------------------------------------------------------------------------- /mGPT/data/humanml/utils/paramUtil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Define a kinematic tree for the skeletal struture 4 | kit_kinematic_chain = [[0, 11, 12, 13, 14, 15], [0, 16, 17, 18, 19, 20], [0, 1, 2, 3, 4], [3, 5, 6, 7], [3, 8, 9, 10]] 5 | 6 | kit_raw_offsets = np.array( 7 | [ 8 | [0, 0, 0], 9 | [0, 1, 0], 10 | [0, 1, 0], 11 | [0, 1, 0], 12 | [0, 1, 0], 13 | [1, 0, 0], 14 | [0, -1, 0], 15 | [0, -1, 0], 16 | [-1, 0, 0], 17 | [0, -1, 0], 18 | [0, -1, 0], 19 | [1, 0, 0], 20 | [0, -1, 0], 21 | [0, -1, 0], 22 | [0, 0, 1], 23 | [0, 0, 1], 24 | [-1, 0, 0], 25 | [0, -1, 0], 26 | [0, -1, 0], 27 | [0, 0, 1], 28 | [0, 0, 1] 29 | ] 30 | ) 31 | 32 | t2m_raw_offsets = np.array([[0,0,0], 33 | [1,0,0], 34 | [-1,0,0], 35 | [0,1,0], 36 | [0,-1,0], 37 | [0,-1,0], 38 | [0,1,0], 39 | [0,-1,0], 40 | [0,-1,0], 41 | [0,1,0], 42 | [0,0,1], 43 | [0,0,1], 44 | [0,1,0], 45 | [1,0,0], 46 | [-1,0,0], 47 | [0,0,1], 48 | [0,-1,0], 49 | [0,-1,0], 50 | [0,-1,0], 51 | [0,-1,0], 52 | [0,-1,0], 53 | [0,-1,0]]) 54 | 55 | t2m_kinematic_chain = [[0, 2, 5, 8, 11], [0, 1, 4, 7, 10], [0, 3, 6, 9, 12, 15], [9, 14, 17, 19, 21], [9, 13, 16, 18, 20]] 56 | t2m_left_hand_chain = [[20, 22, 23, 24], [20, 34, 35, 36], [20, 25, 26, 27], [20, 31, 32, 33], [20, 28, 29, 30]] 57 | t2m_right_hand_chain = [[21, 43, 44, 45], [21, 46, 47, 48], [21, 40, 41, 42], [21, 37, 38, 39], [21, 49, 50, 51]] 58 | 59 | 60 | kit_tgt_skel_id = '03950' 61 | 62 | t2m_tgt_skel_id = '000021' 63 | 64 | -------------------------------------------------------------------------------- /scripts/get_motion_code.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pytorch_lightning as pl 4 | import torch 5 | from pathlib import Path 6 | from tqdm import tqdm 7 | from mGPT.config import parse_args 8 | from mGPT.data.build_data import build_data 9 | from mGPT.models.build_model import build_model 10 | from mGPT.utils.load_checkpoint import load_pretrained_vae 11 | 12 | def main(): 13 | # parse options 14 | cfg = parse_args(phase="test") # parse config file 15 | cfg.TRAIN.STAGE = "token" 16 | cfg.TRAIN.BATCH_SIZE = 1 17 | 18 | # set seed 19 | pl.seed_everything(cfg.SEED_VALUE) 20 | 21 | # gpu setting 22 | if cfg.ACCELERATOR == "gpu": 23 | os.environ["PYTHONWARNINGS"] = "ignore" 24 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 25 | 26 | # create dataset 27 | datasets = build_data(cfg, phase='token') 28 | print("datasets module initialized") 29 | output_dir = os.path.join(datasets.hparams.data_root, cfg.DATASET.CODE_PATH) 30 | 31 | os.makedirs(output_dir, exist_ok=True) 32 | 33 | # create model 34 | model = build_model(cfg, datasets) 35 | if hasattr(model, "motion_vae"): 36 | model.vae = model.motion_vae 37 | print("model loaded") 38 | 39 | # Strict load vae model 40 | assert cfg.TRAIN.PRETRAINED_VAE is not None 41 | load_pretrained_vae(cfg, model) 42 | 43 | if cfg.ACCELERATOR == "gpu": 44 | model = model.to('cuda') 45 | 46 | for batch in tqdm(datasets.train_dataloader(), 47 | desc=f'motion tokenize'): 48 | name = batch['text'] 49 | 50 | pose = batch['motion'] 51 | pose = pose.cuda().float() 52 | 53 | if pose.shape[1] == 0: 54 | continue 55 | target, _ = model.vae.encode(pose) 56 | target = target.to('cpu').numpy() 57 | 58 | target_path = os.path.join(output_dir, name[0] + '.npy') 59 | Path(target_path).parent.mkdir(parents=True, exist_ok=True) 60 | np.save(target_path, target) 61 | 62 | print( 63 | f'Motion tokenization done, the motion tokens are saved to {output_dir}' 64 | ) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /mGPT/data/transforms/rots2joints/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import Optional 18 | 19 | import torch 20 | from torch import Tensor, nn 21 | from pathlib import Path 22 | import os 23 | # import hydra 24 | 25 | class Rots2Joints(nn.Module): 26 | def __init__(self, path: Optional[str] = None, 27 | normalization: bool = False, 28 | eps: float = 1e-12, 29 | **kwargs) -> None: 30 | if normalization and path is None: 31 | raise TypeError("You should provide a path if normalization is on.") 32 | 33 | super().__init__() 34 | self.normalization = normalization 35 | self.eps = eps 36 | # workaround for cluster local/sync 37 | if path is not None: 38 | rel_p = path.split('/') 39 | rel_p = rel_p[rel_p.index('deps'):] 40 | rel_p = '/'.join(rel_p) 41 | # path = hydra.utils.get_original_cwd() + '/' + rel_p 42 | if normalization: 43 | mean_path = Path(path) / "mean.pt" 44 | std_path = Path(path) / "std.pt" 45 | self.register_buffer('mean', torch.load(mean_path)) 46 | self.register_buffer('std', torch.load(std_path)) 47 | 48 | def normalize(self, features: Tensor) -> Tensor: 49 | if self.normalization: 50 | features = (features - self.mean)/(self.std + self.eps) 51 | return features 52 | 53 | def unnormalize(self, features: Tensor) -> Tensor: 54 | if self.normalization: 55 | features = features * self.std + self.mean 56 | return features 57 | -------------------------------------------------------------------------------- /mGPT/data/transforms/joints2jfeats/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import Optional 18 | 19 | import torch 20 | from torch import Tensor, nn 21 | from pathlib import Path 22 | import os 23 | 24 | 25 | class Joints2Jfeats(nn.Module): 26 | 27 | def __init__(self, 28 | path: Optional[str] = None, 29 | normalization: bool = False, 30 | eps: float = 1e-12, 31 | **kwargs) -> None: 32 | if normalization and path is None: 33 | raise TypeError( 34 | "You should provide a path if normalization is on.") 35 | 36 | super().__init__() 37 | self.normalization = normalization 38 | self.eps = eps 39 | # workaround for cluster local/sync 40 | if path is not None: 41 | # rel_p = path.split('/') 42 | # rel_p = rel_p[rel_p.index('deps'):] 43 | # rel_p = '/'.join(rel_p) 44 | pass 45 | if normalization: 46 | mean_path = Path(path) / "jfeats_mean.pt" 47 | std_path = Path(path) / "jfeats_std.pt" 48 | self.register_buffer('mean', torch.load(mean_path)) 49 | self.register_buffer('std', torch.load(std_path)) 50 | 51 | def normalize(self, features: Tensor) -> Tensor: 52 | if self.normalization: 53 | features = (features - self.mean) / (self.std + self.eps) 54 | return features 55 | 56 | def unnormalize(self, features: Tensor) -> Tensor: 57 | if self.normalization: 58 | features = features * self.std + self.mean 59 | return features 60 | -------------------------------------------------------------------------------- /mGPT/render/video.py: -------------------------------------------------------------------------------- 1 | import moviepy.editor as mp 2 | import moviepy.video.fx.all as vfx 3 | import os 4 | import imageio 5 | 6 | 7 | def mask_png(frames): 8 | for frame in frames: 9 | im = imageio.imread(frame) 10 | im[im[:, :, 3] < 1, :] = 255 11 | imageio.imwrite(frame, im[:, :, 0:3]) 12 | return 13 | 14 | 15 | class Video: 16 | def __init__(self, frame_path: str, fps: float = 12.5, res="high"): 17 | frame_path = str(frame_path) 18 | self.fps = fps 19 | 20 | self._conf = {"codec": "libx264", 21 | "fps": self.fps, 22 | "audio_codec": "aac", 23 | "temp_audiofile": "temp-audio.m4a", 24 | "remove_temp": True} 25 | 26 | if res == "low": 27 | bitrate = "500k" 28 | else: 29 | bitrate = "5000k" 30 | 31 | self._conf = {"bitrate": bitrate, 32 | "fps": self.fps} 33 | 34 | # Load video 35 | # video = mp.VideoFileClip(video1_path, audio=False) 36 | # Load with frames 37 | frames = [os.path.join(frame_path, x) 38 | for x in sorted(os.listdir(frame_path))] 39 | 40 | # mask background white for videos 41 | mask_png(frames) 42 | 43 | video = mp.ImageSequenceClip(frames, fps=fps) 44 | self.video = video 45 | self.duration = video.duration 46 | 47 | def add_text(self, text): 48 | # needs ImageMagick 49 | video_text = mp.TextClip(text, 50 | font='Amiri', 51 | color='white', 52 | method='caption', 53 | align="center", 54 | size=(self.video.w, None), 55 | fontsize=30) 56 | video_text = video_text.on_color(size=(self.video.w, video_text.h + 5), 57 | color=(0, 0, 0), 58 | col_opacity=0.6) 59 | # video_text = video_text.set_pos('bottom') 60 | video_text = video_text.set_pos('top') 61 | 62 | self.video = mp.CompositeVideoClip([self.video, video_text]) 63 | 64 | def save(self, out_path): 65 | out_path = str(out_path) 66 | self.video.subclip(0, self.duration).write_videofile( 67 | out_path, **self._conf) 68 | -------------------------------------------------------------------------------- /mGPT/losses/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class BaseLosses(nn.Module): 5 | def __init__(self, cfg, losses, params, losses_func, num_joints, **kwargs): 6 | super().__init__() 7 | 8 | # Save parameters 9 | self.num_joints = num_joints 10 | self._params = params 11 | 12 | # Add total indicator 13 | losses.append("total") if "total" not in losses else None 14 | 15 | # Register losses 16 | for loss in losses: 17 | self.register_buffer(loss, torch.tensor(0.0)) 18 | self.register_buffer("count", torch.tensor(0.0)) 19 | self.losses = losses 20 | 21 | # Instantiate loss functions 22 | self._losses_func = {} 23 | for loss in losses[:-1]: 24 | self._losses_func[loss] = losses_func[loss](reduction='mean') 25 | 26 | def _update_loss(self, loss: str, outputs, inputs): 27 | '''Update the loss and return the weighted loss.''' 28 | # Update the loss 29 | val = self._losses_func[loss](outputs, inputs) 30 | # self.losses_values[loss] += val.detach() 31 | getattr(self, loss).add_(val.detach()) 32 | # Return a weighted sum 33 | weighted_loss = self._params[loss] * val 34 | return weighted_loss 35 | 36 | def reset(self): 37 | '''Reset the losses to 0.''' 38 | for loss in self.losses: 39 | setattr(self, loss, torch.tensor(0.0, device=getattr(self, loss).device)) 40 | setattr(self, "count", torch.tensor(0.0, device=getattr(self, "count").device)) 41 | 42 | def compute(self, split): 43 | '''Compute the losses and return a dictionary with the losses.''' 44 | count = self.count 45 | # Loss dictionary 46 | loss_dict = {loss: getattr(self, loss)/count for loss in self.losses} 47 | # Format the losses for logging 48 | log_dict = { self.loss2logname(loss, split): value.item() 49 | for loss, value in loss_dict.items() if not torch.isnan(value)} 50 | # Reset the losses 51 | self.reset() 52 | return log_dict 53 | 54 | def loss2logname(self, loss: str, split: str): 55 | '''Convert the loss name to a log name.''' 56 | if loss == "total": 57 | log_name = f"{loss}/{split}" 58 | else: 59 | loss_type, name = loss.split("_") 60 | log_name = f"{loss_type}/{name}/{split}" 61 | return log_name 62 | -------------------------------------------------------------------------------- /mGPT/data/transforms/rots2rfeats/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import Optional 18 | 19 | import torch 20 | from torch import Tensor, nn 21 | from pathlib import Path 22 | import os 23 | 24 | class Rots2Rfeats(nn.Module): 25 | def __init__(self, path: Optional[str] = None, 26 | normalization: bool = True, 27 | eps: float = 1e-12, 28 | **kwargs) -> None: 29 | if normalization and path is None: 30 | raise TypeError("You should provide a path if normalization is on.") 31 | 32 | super().__init__() 33 | self.normalization = normalization 34 | self.eps = eps 35 | if normalization: 36 | # workaround for cluster local/sync 37 | rel_p = path.split('/') 38 | # superhacky it is for the datatype ugly stuff change it, copy the main stuff to seperate_pairs dict 39 | if rel_p[-1] == 'separate_pairs': 40 | rel_p.remove('separate_pairs') 41 | ######################################################## 42 | # rel_p = rel_p[rel_p.index('deps'):] 43 | rel_p = '/'.join(rel_p) 44 | # path = hydra.utils.get_original_cwd() + '/' + rel_p 45 | path = rel_p 46 | mean_path = Path(path) / "rfeats_mean.pt" 47 | std_path = Path(path) / "rfeats_std.pt" 48 | 49 | self.register_buffer('mean', torch.load(mean_path)) 50 | self.register_buffer('std', torch.load(std_path)) 51 | 52 | def normalize(self, features: Tensor) -> Tensor: 53 | if self.normalization: 54 | features = (features - self.mean)/(self.std + self.eps) 55 | return features 56 | 57 | def unnormalize(self, features: Tensor) -> Tensor: 58 | if self.normalization: 59 | features = features * self.std + self.mean 60 | return features 61 | -------------------------------------------------------------------------------- /mGPT/data/tools/easyconvert.py: -------------------------------------------------------------------------------- 1 | from .geometry import * 2 | 3 | def nfeats_of(rottype): 4 | if rottype in ["rotvec", "axisangle"]: 5 | return 3 6 | elif rottype in ["rotquat", "quaternion"]: 7 | return 4 8 | elif rottype in ["rot6d", "6drot", "rotation6d"]: 9 | return 6 10 | elif rottype in ["rotmat"]: 11 | return 9 12 | else: 13 | return TypeError("This rotation type doesn't have features.") 14 | 15 | 16 | def axis_angle_to(newtype, rotations): 17 | if newtype in ["matrix"]: 18 | rotations = axis_angle_to_matrix(rotations) 19 | return rotations 20 | elif newtype in ["rotmat"]: 21 | rotations = axis_angle_to_matrix(rotations) 22 | rotations = matrix_to("rotmat", rotations) 23 | return rotations 24 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 25 | rotations = axis_angle_to_matrix(rotations) 26 | rotations = matrix_to("rot6d", rotations) 27 | return rotations 28 | elif newtype in ["rotquat", "quaternion"]: 29 | rotations = axis_angle_to_quaternion(rotations) 30 | return rotations 31 | elif newtype in ["rotvec", "axisangle"]: 32 | return rotations 33 | else: 34 | raise NotImplementedError 35 | 36 | 37 | def matrix_to(newtype, rotations): 38 | if newtype in ["matrix"]: 39 | return rotations 40 | if newtype in ["rotmat"]: 41 | rotations = rotations.reshape((*rotations.shape[:-2], 9)) 42 | return rotations 43 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 44 | rotations = matrix_to_rotation_6d(rotations) 45 | return rotations 46 | elif newtype in ["rotquat", "quaternion"]: 47 | rotations = matrix_to_quaternion(rotations) 48 | return rotations 49 | elif newtype in ["rotvec", "axisangle"]: 50 | rotations = matrix_to_axis_angle(rotations) 51 | return rotations 52 | else: 53 | raise NotImplementedError 54 | 55 | 56 | def to_matrix(oldtype, rotations): 57 | if oldtype in ["matrix"]: 58 | return rotations 59 | if oldtype in ["rotmat"]: 60 | rotations = rotations.reshape((*rotations.shape[:-2], 3, 3)) 61 | return rotations 62 | elif oldtype in ["rot6d", "6drot", "rotation6d"]: 63 | rotations = rotation_6d_to_matrix(rotations) 64 | return rotations 65 | elif oldtype in ["rotquat", "quaternion"]: 66 | rotations = quaternion_to_matrix(rotations) 67 | return rotations 68 | elif oldtype in ["rotvec", "axisangle"]: 69 | rotations = axis_angle_to_matrix(rotations) 70 | return rotations 71 | else: 72 | raise NotImplementedError 73 | -------------------------------------------------------------------------------- /mGPT/utils/logger.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | import time 4 | import logging 5 | from omegaconf import OmegaConf 6 | from pytorch_lightning.utilities.rank_zero import rank_zero_only 7 | 8 | def create_logger(cfg, phase='train'): 9 | # root dir set by cfg 10 | root_output_dir = Path(cfg.FOLDER) 11 | # set up logger 12 | if not root_output_dir.exists(): 13 | print('=> creating {}'.format(root_output_dir)) 14 | root_output_dir.mkdir() 15 | 16 | cfg_name = cfg.NAME 17 | model = cfg.model.target.split('.')[-2] 18 | cfg_name = os.path.basename(cfg_name).split('.')[0] 19 | 20 | final_output_dir = root_output_dir / model / cfg_name 21 | cfg.FOLDER_EXP = str(final_output_dir) 22 | 23 | time_str = time.strftime('%Y-%m-%d-%H-%M-%S') 24 | 25 | new_dir(cfg, phase, time_str, final_output_dir) 26 | 27 | head = '%(asctime)-15s %(message)s' 28 | logger = config_logger(final_output_dir, time_str, phase, head) 29 | if logger is None: 30 | logger = logging.getLogger() 31 | logger.setLevel(logging.CRITICAL) 32 | logging.basicConfig(format=head) 33 | return logger 34 | 35 | 36 | @rank_zero_only 37 | def config_logger(final_output_dir, time_str, phase, head): 38 | log_file = '{}_{}_{}.log'.format('log', time_str, phase) 39 | final_log_file = final_output_dir / log_file 40 | logging.basicConfig(filename=str(final_log_file)) 41 | logger = logging.getLogger() 42 | logger.setLevel(logging.INFO) 43 | console = logging.StreamHandler() 44 | formatter = logging.Formatter(head) 45 | console.setFormatter(formatter) 46 | logging.getLogger('').addHandler(console) 47 | file_handler = logging.FileHandler(final_log_file, 'w') 48 | file_handler.setFormatter(logging.Formatter(head)) 49 | file_handler.setLevel(logging.INFO) 50 | logging.getLogger('').addHandler(file_handler) 51 | return logger 52 | 53 | 54 | @rank_zero_only 55 | def new_dir(cfg, phase, time_str, final_output_dir): 56 | # new experiment folder 57 | cfg.TIME = str(time_str) 58 | if os.path.exists(final_output_dir) and not os.path.exists(cfg.TRAIN.RESUME) and not cfg.DEBUG and phase not in ['test', 'demo']: 59 | file_list = sorted(os.listdir(final_output_dir), reverse=True) 60 | for item in file_list: 61 | if item.endswith('.log'): 62 | os.rename(str(final_output_dir), str(final_output_dir) + '_' + cfg.TIME) 63 | break 64 | final_output_dir.mkdir(parents=True, exist_ok=True) 65 | # write config yaml 66 | config_file = '{}_{}_{}.yaml'.format('config', time_str, phase) 67 | final_config_file = final_output_dir / config_file 68 | OmegaConf.save(config=cfg, f=final_config_file) 69 | -------------------------------------------------------------------------------- /mGPT/utils/tensors.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def lengths_to_mask(lengths): 5 | max_len = max(lengths) 6 | mask = torch.arange(max_len, device=lengths.device).expand( 7 | len(lengths), max_len) < lengths.unsqueeze(1) 8 | return mask 9 | 10 | 11 | def collate_tensors(batch): 12 | dims = batch[0].dim() 13 | max_size = [max([b.size(i) for b in batch]) for i in range(dims)] 14 | size = (len(batch),) + tuple(max_size) 15 | canvas = batch[0].new_zeros(size=size) 16 | for i, b in enumerate(batch): 17 | sub_tensor = canvas[i] 18 | for d in range(dims): 19 | sub_tensor = sub_tensor.narrow(d, 0, b.size(d)) 20 | sub_tensor.add_(b) 21 | return canvas 22 | 23 | 24 | def collate(batch): 25 | databatch = [b[0] for b in batch] 26 | labelbatch = [b[1] for b in batch] 27 | lenbatch = [len(b[0][0][0]) for b in batch] 28 | 29 | databatchTensor = collate_tensors(databatch) 30 | labelbatchTensor = torch.as_tensor(labelbatch) 31 | lenbatchTensor = torch.as_tensor(lenbatch) 32 | 33 | maskbatchTensor = lengths_to_mask(lenbatchTensor) 34 | # x - [bs, njoints, nfeats, lengths] 35 | # - nfeats, the representation of a joint 36 | # y - [bs] 37 | # mask - [bs, lengths] 38 | # lengths - [bs] 39 | batch = {"x": databatchTensor, "y": labelbatchTensor, 40 | "mask": maskbatchTensor, 'lengths': lenbatchTensor} 41 | return batch 42 | 43 | 44 | # slow version with padding 45 | def collate_data3d_slow(batch): 46 | batchTensor = {} 47 | for key in batch[0].keys(): 48 | databatch = [b[key] for b in batch] 49 | batchTensor[key] = collate_tensors(databatch) 50 | batch = batchTensor 51 | # theta - [bs, lengths, 85], theta shape (85,) 52 | # - (np.array([1., 0., 0.]), pose(72), shape(10)), axis=0) 53 | # kp_2d - [bs, lengths, njoints, nfeats], nfeats (x,y,weight) 54 | # kp_3d - [bs, lengths, njoints, nfeats], nfeats (x,y,z) 55 | # w_smpl - [bs, lengths] zeros 56 | # w_3d - [bs, lengths] zeros 57 | return batch 58 | 59 | def collate_data3d(batch): 60 | batchTensor = {} 61 | for key in batch[0].keys(): 62 | databatch = [b[key] for b in batch] 63 | if key == "paths": 64 | batchTensor[key] = databatch 65 | else: 66 | batchTensor[key] = torch.stack(databatch,axis=0) 67 | batch = batchTensor 68 | # theta - [bs, lengths, 85], theta shape (85,) 69 | # - (np.array([1., 0., 0.]), pose(72), shape(10)), axis=0) 70 | # kp_2d - [bs, lengths, njoints, nfeats], nfeats (x,y,weight) 71 | # kp_3d - [bs, lengths, njoints, nfeats], nfeats (x,y,z) 72 | # w_smpl - [bs, lengths] zeros 73 | # w_3d - [bs, lengths] zeros 74 | return batch 75 | -------------------------------------------------------------------------------- /mGPT/data/transforms/xyz.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import Optional 18 | from torch import Tensor 19 | 20 | from .base import Datastruct, dataclass, Transform 21 | from ..tools import collate_tensor_with_padding 22 | 23 | from .joints2jfeats import Joints2Jfeats 24 | 25 | 26 | class XYZTransform(Transform): 27 | def __init__(self, joints2jfeats: Joints2Jfeats, **kwargs): 28 | self.joints2jfeats = joints2jfeats 29 | 30 | def Datastruct(self, **kwargs): 31 | return XYZDatastruct(_joints2jfeats=self.joints2jfeats, 32 | transforms=self, 33 | **kwargs) 34 | 35 | def __repr__(self): 36 | return "XYZTransform()" 37 | 38 | 39 | @dataclass 40 | class XYZDatastruct(Datastruct): 41 | transforms: XYZTransform 42 | _joints2jfeats: Joints2Jfeats 43 | 44 | features: Optional[Tensor] = None 45 | joints_: Optional[Tensor] = None 46 | jfeats_: Optional[Tensor] = None 47 | 48 | def __post_init__(self): 49 | self.datakeys = ["features", "joints_", "jfeats_"] 50 | # starting point 51 | if self.features is not None and self.jfeats_ is None: 52 | self.jfeats_ = self.features 53 | 54 | @property 55 | def joints(self): 56 | # Cached value 57 | if self.joints_ is not None: 58 | return self.joints_ 59 | 60 | # self.jfeats_ should be defined 61 | assert self.jfeats_ is not None 62 | 63 | self._joints2jfeats.to(self.jfeats.device) 64 | self.joints_ = self._joints2jfeats.inverse(self.jfeats) 65 | return self.joints_ 66 | 67 | @property 68 | def jfeats(self): 69 | # Cached value 70 | if self.jfeats_ is not None: 71 | return self.jfeats_ 72 | 73 | # self.joints_ should be defined 74 | assert self.joints_ is not None 75 | 76 | self._joints2jfeats.to(self.joints.device) 77 | self.jfeats_ = self._joints2jfeats(self.joints) 78 | return self.jfeats_ 79 | 80 | def __len__(self): 81 | return len(self.jfeats) 82 | -------------------------------------------------------------------------------- /mGPT/data/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import rich 3 | import pickle 4 | import numpy as np 5 | 6 | 7 | def lengths_to_mask(lengths): 8 | max_len = max(lengths) 9 | mask = torch.arange(max_len, device=lengths.device).expand( 10 | len(lengths), max_len) < lengths.unsqueeze(1) 11 | return mask 12 | 13 | 14 | # padding to max length in one batch 15 | def collate_tensors(batch): 16 | if isinstance(batch[0], np.ndarray): 17 | batch = [torch.tensor(b).float() for b in batch] 18 | 19 | dims = batch[0].dim() 20 | max_size = [max([b.size(i) for b in batch]) for i in range(dims)] 21 | size = (len(batch), ) + tuple(max_size) 22 | canvas = batch[0].new_zeros(size=size) 23 | for i, b in enumerate(batch): 24 | sub_tensor = canvas[i] 25 | for d in range(dims): 26 | sub_tensor = sub_tensor.narrow(d, 0, b.size(d)) 27 | sub_tensor.add_(b) 28 | return canvas 29 | 30 | def humanml3d_collate(batch): 31 | notnone_batches = [b for b in batch if b is not None] 32 | EvalFlag = False if notnone_batches[0][5] is None else True 33 | 34 | # Sort by text length 35 | if EvalFlag: 36 | notnone_batches.sort(key=lambda x: x[5], reverse=True) 37 | 38 | # Motion only 39 | adapted_batch = { 40 | "motion": 41 | collate_tensors([torch.tensor(b[1]).float() for b in notnone_batches]), 42 | "length": [b[2] for b in notnone_batches], 43 | } 44 | 45 | # Text and motion 46 | if notnone_batches[0][0] is not None: 47 | adapted_batch.update({ 48 | "text": [b[0] for b in notnone_batches], 49 | "all_captions": [b[7] for b in notnone_batches], 50 | }) 51 | 52 | # Evaluation related 53 | if EvalFlag: 54 | adapted_batch.update({ 55 | "text": [b[0] for b in notnone_batches], 56 | "word_embs": 57 | collate_tensors( 58 | [torch.tensor(b[3]).float() for b in notnone_batches]), 59 | "pos_ohot": 60 | collate_tensors( 61 | [torch.tensor(b[4]).float() for b in notnone_batches]), 62 | "text_len": 63 | collate_tensors([torch.tensor(b[5]) for b in notnone_batches]), 64 | "tokens": [b[6] for b in notnone_batches], 65 | }) 66 | 67 | # Tasks 68 | if len(notnone_batches[0]) == 9: 69 | adapted_batch.update({"tasks": [b[8] for b in notnone_batches]}) 70 | 71 | return adapted_batch 72 | 73 | 74 | def load_pkl(path, description=None, progressBar=False): 75 | if progressBar: 76 | with rich.progress.open(path, 'rb', description=description) as file: 77 | data = pickle.load(file) 78 | else: 79 | with open(path, 'rb') as file: 80 | data = pickle.load(file) 81 | return data 82 | -------------------------------------------------------------------------------- /mGPT/data/transforms/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from dataclasses import dataclass, fields 18 | 19 | 20 | class Transform: 21 | 22 | def collate(self, lst_datastruct): 23 | from ..tools import collate_tensor_with_padding 24 | example = lst_datastruct[0] 25 | 26 | def collate_or_none(key): 27 | if example[key] is None: 28 | return None 29 | key_lst = [x[key] for x in lst_datastruct] 30 | return collate_tensor_with_padding(key_lst) 31 | 32 | kwargs = {key: collate_or_none(key) for key in example.datakeys} 33 | 34 | return self.Datastruct(**kwargs) 35 | 36 | 37 | # Inspired from SMPLX library 38 | # need to define "datakeys" and transforms 39 | @dataclass 40 | class Datastruct: 41 | 42 | def __getitem__(self, key): 43 | return getattr(self, key) 44 | 45 | def __setitem__(self, key, value): 46 | self.__dict__[key] = value 47 | 48 | def get(self, key, default=None): 49 | return getattr(self, key, default) 50 | 51 | def __iter__(self): 52 | return self.keys() 53 | 54 | def keys(self): 55 | keys = [t.name for t in fields(self)] 56 | return iter(keys) 57 | 58 | def values(self): 59 | values = [getattr(self, t.name) for t in fields(self)] 60 | return iter(values) 61 | 62 | def items(self): 63 | data = [(t.name, getattr(self, t.name)) for t in fields(self)] 64 | return iter(data) 65 | 66 | def to(self, *args, **kwargs): 67 | for key in self.datakeys: 68 | if self[key] is not None: 69 | self[key] = self[key].to(*args, **kwargs) 70 | return self 71 | 72 | @property 73 | def device(self): 74 | return self[self.datakeys[0]].device 75 | 76 | def detach(self): 77 | 78 | def detach_or_none(tensor): 79 | if tensor is not None: 80 | return tensor.detach() 81 | return None 82 | 83 | kwargs = {key: detach_or_none(self[key]) for key in self.datakeys} 84 | return self.transforms.Datastruct(**kwargs) 85 | -------------------------------------------------------------------------------- /mGPT/data/humanml/dataset_t2m_token.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from torch.utils import data 4 | from .dataset_t2m import Text2MotionDataset 5 | import codecs as cs 6 | from os.path import join as pjoin 7 | 8 | 9 | class Text2MotionDatasetToken(data.Dataset): 10 | 11 | def __init__( 12 | self, 13 | data_root, 14 | split, 15 | mean, 16 | std, 17 | max_motion_length=196, 18 | min_motion_length=40, 19 | unit_length=4, 20 | fps=20, 21 | tmpFile=True, 22 | tiny=False, 23 | debug=False, 24 | **kwargs, 25 | ): 26 | 27 | self.max_motion_length = max_motion_length 28 | self.min_motion_length = min_motion_length 29 | self.unit_length = unit_length 30 | 31 | # Data mean and std 32 | self.mean = mean 33 | self.std = std 34 | 35 | # Data path 36 | split_file = pjoin(data_root, split + '.txt') 37 | motion_dir = pjoin(data_root, 'new_joint_vecs') 38 | text_dir = pjoin(data_root, 'texts') 39 | 40 | # Data id list 41 | self.id_list = [] 42 | with cs.open(split_file, "r") as f: 43 | for line in f.readlines(): 44 | self.id_list.append(line.strip()) 45 | 46 | new_name_list = [] 47 | length_list = [] 48 | data_dict = {} 49 | for name in self.id_list: 50 | try: 51 | motion = np.load(pjoin(motion_dir, name + '.npy')) 52 | if (len(motion)) < self.min_motion_length or (len(motion) >= 200): 53 | continue 54 | 55 | data_dict[name] = {'motion': motion, 56 | 'length': len(motion), 57 | 'name': name} 58 | new_name_list.append(name) 59 | length_list.append(len(motion)) 60 | except: 61 | # Some motion may not exist in KIT dataset 62 | pass 63 | 64 | self.length_arr = np.array(length_list) 65 | self.data_dict = data_dict 66 | self.name_list = new_name_list 67 | self.nfeats = motion.shape[-1] 68 | 69 | 70 | def __len__(self): 71 | return len(self.data_dict) 72 | 73 | def __getitem__(self, item): 74 | name = self.name_list[item] 75 | data = self.data_dict[name] 76 | motion, m_length = data['motion'], data['length'] 77 | 78 | m_length = (m_length // self.unit_length) * self.unit_length 79 | 80 | idx = random.randint(0, len(motion) - m_length) 81 | motion = motion[idx:idx+m_length] 82 | 83 | "Z Normalization" 84 | motion = (motion - self.mean) / self.std 85 | 86 | return name, motion, m_length, True, True, True, True, True, True 87 | -------------------------------------------------------------------------------- /mGPT/models/utils/adain.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class AdaptiveInstanceNorm1d(nn.Module): 6 | def __init__(self, num_features, eps=1e-5, momentum=0.1): 7 | super(AdaptiveInstanceNorm1d, self).__init__() 8 | self.num_features = num_features 9 | self.eps = eps 10 | self.momentum = momentum 11 | self.weight = None 12 | self.bias = None 13 | self.register_buffer('running_mean', torch.zeros(num_features)) 14 | self.register_buffer('running_var', torch.ones(num_features)) 15 | 16 | def forward(self, x, direct_weighting=False, no_std=False): 17 | assert self.weight is not None and \ 18 | self.bias is not None, "Please assign AdaIN weight first" 19 | # (bs, nfeats, nframe) <= (nframe, bs, nfeats) 20 | x = x.permute(1,2,0) 21 | 22 | b, c = x.size(0), x.size(1) # batch size & channels 23 | running_mean = self.running_mean.repeat(b) 24 | running_var = self.running_var.repeat(b) 25 | # self.weight = torch.ones_like(self.weight) 26 | 27 | if direct_weighting: 28 | x_reshaped = x.contiguous().view(b * c) 29 | if no_std: 30 | out = x_reshaped + self.bias 31 | else: 32 | out = x_reshaped.mul(self.weight) + self.bias 33 | out = out.view(b, c, *x.size()[2:]) 34 | else: 35 | x_reshaped = x.contiguous().view(1, b * c, *x.size()[2:]) 36 | out = F.batch_norm( 37 | x_reshaped, running_mean, running_var, self.weight, self.bias, 38 | True, self.momentum, self.eps) 39 | out = out.view(b, c, *x.size()[2:]) 40 | 41 | # (nframe, bs, nfeats) <= (bs, nfeats, nframe) 42 | out = out.permute(2,0,1) 43 | return out 44 | 45 | def __repr__(self): 46 | return self.__class__.__name__ + '(' + str(self.num_features) + ')' 47 | 48 | def assign_adain_params(adain_params, model): 49 | # assign the adain_params to the AdaIN layers in model 50 | for m in model.modules(): 51 | if m.__class__.__name__ == "AdaptiveInstanceNorm1d": 52 | mean = adain_params[: , : m.num_features] 53 | std = adain_params[: , m.num_features: 2 * m.num_features] 54 | m.bias = mean.contiguous().view(-1) 55 | m.weight = std.contiguous().view(-1) 56 | if adain_params.size(1) > 2 * m.num_features: 57 | adain_params = adain_params[: , 2 * m.num_features:] 58 | 59 | 60 | def get_num_adain_params(model): 61 | # return the number of AdaIN parameters needed by the model 62 | num_adain_params = 0 63 | for m in model.modules(): 64 | if m.__class__.__name__ == "AdaptiveInstanceNorm1d": 65 | num_adain_params += 2 * m.num_features 66 | return num_adain_params 67 | -------------------------------------------------------------------------------- /mGPT/render/blender/meshes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .materials import body_material 4 | 5 | # green 6 | # GT_SMPL = body_material(0.009, 0.214, 0.029) 7 | GT_SMPL = body_material(0.035, 0.415, 0.122) 8 | 9 | # blue 10 | # GEN_SMPL = body_material(0.022, 0.129, 0.439) 11 | # Blues => cmap(0.87) 12 | # GEN_SMPL = body_material(0.035, 0.322, 0.615) 13 | # Oranges => cmap(0.87) 14 | GEN_SMPL = body_material(0.658, 0.214, 0.0114) 15 | 16 | 17 | class Meshes: 18 | def __init__(self, data, *, gt, mode, faces_path, canonicalize, always_on_floor, oldrender=True, is_smplx=False, **kwargs): 19 | data = prepare_meshes(data, canonicalize=canonicalize, 20 | always_on_floor=always_on_floor, 21 | is_smplx=is_smplx) 22 | 23 | if isinstance(faces_path, str): 24 | self.faces = np.load(faces_path) 25 | else: 26 | self.faces = faces_path 27 | 28 | self.data = data 29 | self.mode = mode 30 | self.oldrender = oldrender 31 | 32 | self.N = len(data) 33 | self.trajectory = data[:, :, [0, 1]].mean(1) 34 | 35 | if gt: 36 | self.mat = GT_SMPL 37 | else: 38 | self.mat = GEN_SMPL 39 | 40 | def get_sequence_mat(self, frac): 41 | import matplotlib 42 | # cmap = matplotlib.cm.get_cmap('Blues') 43 | cmap = matplotlib.cm.get_cmap('Oranges') 44 | # begin = 0.60 45 | # end = 0.90 46 | begin = 0.50 47 | end = 0.90 48 | rgbcolor = cmap(begin + (end-begin)*frac) 49 | mat = body_material(*rgbcolor, oldrender=self.oldrender) 50 | return mat 51 | 52 | def get_root(self, index): 53 | return self.data[index].mean(0) 54 | 55 | def get_mean_root(self): 56 | return self.data.mean((0, 1)) 57 | 58 | def load_in_blender(self, index, mat): 59 | vertices = self.data[index] 60 | faces = self.faces 61 | name = f"{str(index).zfill(4)}" 62 | 63 | from .tools import load_numpy_vertices_into_blender 64 | load_numpy_vertices_into_blender(vertices, faces, name, mat) 65 | 66 | return name 67 | 68 | def __len__(self): 69 | return self.N 70 | 71 | 72 | def prepare_meshes(data, canonicalize=True, always_on_floor=False, is_smplx=False): 73 | if canonicalize: 74 | print("No canonicalization for now") 75 | 76 | # fitted mesh do not need fixing axis 77 | # fix axis 78 | if is_smplx: 79 | data[..., 1] = - data[..., 1] 80 | # data[..., 0] = - data[..., 0] 81 | 82 | 83 | # Swap axis (gravity=Z instead of Y) 84 | data = data[..., [2, 0, 1]] 85 | 86 | # Remove the floor 87 | data[..., 2] -= data[..., 2].min() 88 | 89 | # Put all the body on the floor 90 | if always_on_floor: 91 | data[..., 2] -= data[..., 2].min(1)[:, None] 92 | 93 | return data 94 | -------------------------------------------------------------------------------- /mGPT/data/humanml/utils/word_vectorizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | from os.path import join as pjoin 4 | 5 | POS_enumerator = { 6 | 'VERB': 0, 7 | 'NOUN': 1, 8 | 'DET': 2, 9 | 'ADP': 3, 10 | 'NUM': 4, 11 | 'AUX': 5, 12 | 'PRON': 6, 13 | 'ADJ': 7, 14 | 'ADV': 8, 15 | 'Loc_VIP': 9, 16 | 'Body_VIP': 10, 17 | 'Obj_VIP': 11, 18 | 'Act_VIP': 12, 19 | 'Desc_VIP': 13, 20 | 'OTHER': 14, 21 | } 22 | 23 | Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward', 24 | 'up', 'down', 'straight', 'curve') 25 | 26 | Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh') 27 | 28 | Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball') 29 | 30 | Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn', 31 | 'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll', 32 | 'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb') 33 | 34 | Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily', 'angrily', 'sadly') 35 | 36 | VIP_dict = { 37 | 'Loc_VIP': Loc_list, 38 | 'Body_VIP': Body_list, 39 | 'Obj_VIP': Obj_List, 40 | 'Act_VIP': Act_list, 41 | 'Desc_VIP': Desc_list, 42 | } 43 | 44 | 45 | class WordVectorizer(object): 46 | def __init__(self, meta_root, prefix): 47 | vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix)) 48 | words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb')) 49 | word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb')) 50 | self.word2vec = {w: vectors[word2idx[w]] for w in words} 51 | 52 | def _get_pos_ohot(self, pos): 53 | pos_vec = np.zeros(len(POS_enumerator)) 54 | if pos in POS_enumerator: 55 | pos_vec[POS_enumerator[pos]] = 1 56 | else: 57 | pos_vec[POS_enumerator['OTHER']] = 1 58 | return pos_vec 59 | 60 | def __len__(self): 61 | return len(self.word2vec) 62 | 63 | def __getitem__(self, item): 64 | word, pos = item.split('/') 65 | if word in self.word2vec: 66 | word_vec = self.word2vec[word] 67 | vip_pos = None 68 | for key, values in VIP_dict.items(): 69 | if word in values: 70 | vip_pos = key 71 | break 72 | if vip_pos is not None: 73 | pos_vec = self._get_pos_ohot(vip_pos) 74 | else: 75 | pos_vec = self._get_pos_ohot(pos) 76 | else: 77 | word_vec = self.word2vec['unk'] 78 | pos_vec = self._get_pos_ohot('OTHER') 79 | return word_vec, pos_vec 80 | -------------------------------------------------------------------------------- /mGPT/archs/tools/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | class nonlinearity(nn.Module): 5 | def __init__(self): 6 | super().__init__() 7 | 8 | def forward(self, x): 9 | # swish 10 | return x * torch.sigmoid(x) 11 | 12 | class ResConv1DBlock(nn.Module): 13 | def __init__(self, n_in, n_state, dilation=1, activation='silu', norm=None, dropout=None): 14 | super().__init__() 15 | padding = dilation 16 | self.norm = norm 17 | if norm == "LN": 18 | self.norm1 = nn.LayerNorm(n_in) 19 | self.norm2 = nn.LayerNorm(n_in) 20 | elif norm == "GN": 21 | self.norm1 = nn.GroupNorm(num_groups=32, num_channels=n_in, eps=1e-6, affine=True) 22 | self.norm2 = nn.GroupNorm(num_groups=32, num_channels=n_in, eps=1e-6, affine=True) 23 | elif norm == "BN": 24 | self.norm1 = nn.BatchNorm1d(num_features=n_in, eps=1e-6, affine=True) 25 | self.norm2 = nn.BatchNorm1d(num_features=n_in, eps=1e-6, affine=True) 26 | 27 | else: 28 | self.norm1 = nn.Identity() 29 | self.norm2 = nn.Identity() 30 | 31 | if activation == "relu": 32 | self.activation1 = nn.ReLU() 33 | self.activation2 = nn.ReLU() 34 | 35 | elif activation == "silu": 36 | self.activation1 = nonlinearity() 37 | self.activation2 = nonlinearity() 38 | 39 | elif activation == "gelu": 40 | self.activation1 = nn.GELU() 41 | self.activation2 = nn.GELU() 42 | 43 | 44 | 45 | self.conv1 = nn.Conv1d(n_in, n_state, 3, 1, padding, dilation) 46 | self.conv2 = nn.Conv1d(n_state, n_in, 1, 1, 0,) 47 | 48 | 49 | def forward(self, x): 50 | x_orig = x 51 | if self.norm == "LN": 52 | x = self.norm1(x.transpose(-2, -1)) 53 | x = self.activation1(x.transpose(-2, -1)) 54 | else: 55 | x = self.norm1(x) 56 | x = self.activation1(x) 57 | 58 | x = self.conv1(x) 59 | 60 | if self.norm == "LN": 61 | x = self.norm2(x.transpose(-2, -1)) 62 | x = self.activation2(x.transpose(-2, -1)) 63 | else: 64 | x = self.norm2(x) 65 | x = self.activation2(x) 66 | 67 | x = self.conv2(x) 68 | x = x + x_orig 69 | return x 70 | 71 | class Resnet1D(nn.Module): 72 | def __init__(self, n_in, n_depth, dilation_growth_rate=1, reverse_dilation=True, activation='relu', norm=None): 73 | super().__init__() 74 | 75 | blocks = [ResConv1DBlock(n_in, n_in, dilation=dilation_growth_rate ** depth, activation=activation, norm=norm) for depth in range(n_depth)] 76 | if reverse_dilation: 77 | blocks = blocks[::-1] 78 | 79 | self.model = nn.Sequential(*blocks) 80 | 81 | def forward(self, x): 82 | return self.model(x) -------------------------------------------------------------------------------- /mGPT/utils/demo_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | 5 | # load example data 6 | def load_example_input(txt_path): 7 | file = open(txt_path, "r") 8 | Lines = file.readlines() 9 | count = 0 10 | texts, lens = [], [] 11 | # Strips the newline character 12 | for line in Lines: 13 | count += 1 14 | s = line.strip() 15 | s_l = s.split(" ")[0] 16 | s_t = s[(len(s_l) + 1):] 17 | lens.append(int(s_l)) 18 | texts.append(s_t) 19 | print("Length-{}: {}".format(s_l, s_t)) 20 | return texts, lens 21 | 22 | 23 | # render batch 24 | def render_batch(npy_dir, execute_python="./scripts/visualize_motion.sh", mode="sequence"): 25 | os.system(f"{execute_python} {npy_dir} {mode}") 26 | 27 | 28 | # render 29 | def render(execute_python, npy_path, jointtype, cfg_path): 30 | # execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender" 31 | # execute_python = "/apdcephfs/share_1227775/mingzhenzhu/jiangbiao/libs/blender-2.93.2-linux-x64/blender" 32 | export_scripts = "render.py" 33 | 34 | os.system( 35 | f"{execute_python} --background --python {export_scripts} -- --cfg={cfg_path} --npy={npy_path}" 36 | ) 37 | 38 | fig_path = Path(str(npy_path).replace(".npy", ".png")) 39 | return fig_path 40 | 41 | 42 | # origin render 43 | # def render(npy_path, jointtype): 44 | # execute_python = '/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender' 45 | # export_scripts = 'render.py' 46 | 47 | # os.system(f"{execute_python} --background --python {export_scripts} -- npy={npy_path} jointstype={jointtype}") 48 | 49 | # fig_path = Path(str(npy_path).replace(".npy",".png")) 50 | # return fig_path 51 | 52 | # export fbx with hand params from pkl files 53 | # refer to /apdcephfs/share_1227775/shingxchen/AIMotion/TMOST/scripts/fbx_output_smplx.py 54 | def export_fbx_hand(pkl_path): 55 | input = pkl_path 56 | output = pkl_path.replace(".pkl", ".fbx") 57 | 58 | execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender" 59 | export_scripts = "./scripts/fbx_output_smplx.py" 60 | os.system( 61 | f"{execute_python} -noaudio --background --python {export_scripts}\ 62 | --input {input} \ 63 | --output {output}" 64 | ) 65 | 66 | 67 | # export fbx without hand params from pkl files 68 | # refer to /apdcephfs/share_1227775/shingxchen/AIMotion/TMOST/scripts/fbx_output.py 69 | def export_fbx(pkl_path): 70 | input = pkl_path 71 | output = pkl_path.replace(".pkl", ".fbx") 72 | 73 | execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender" 74 | export_scripts = "./scripts/fbx_output.py" 75 | os.system( 76 | f"{execute_python} -noaudio --background --python {export_scripts}\ 77 | --input {input} \ 78 | --output {output}" 79 | ) 80 | -------------------------------------------------------------------------------- /mGPT/utils/easyconvert.py: -------------------------------------------------------------------------------- 1 | from .geometry_tools import * 2 | 3 | 4 | def rep_to_rep(oldtype, newtype, rotations): 5 | if newtype in ["matrix"]: 6 | return to_matrix(oldtype, rotations) 7 | 8 | if oldtype in ["rotvec", "axisangle"]: 9 | return axis_angle_to(newtype, rotations) 10 | elif oldtype in ["matrix"]: 11 | return matrix_to(newtype, rotations) 12 | else: 13 | raise NotImplementedError("Only rotvec and matrix are supported.") 14 | 15 | def nfeats_of(rottype): 16 | if rottype in ["rotvec", "axisangle"]: 17 | return 3 18 | elif rottype in ["rotquat", "quaternion"]: 19 | return 4 20 | elif rottype in ["rot6d", "6drot", "rotation6d"]: 21 | return 6 22 | elif rottype in ["rotmat"]: 23 | return 9 24 | else: 25 | return TypeError("This rotation type doesn't have features.") 26 | 27 | 28 | def axis_angle_to(newtype, rotations): 29 | if newtype in ["matrix"]: 30 | rotations = axis_angle_to_matrix(rotations) 31 | return rotations 32 | elif newtype in ["rotmat"]: 33 | rotations = axis_angle_to_matrix(rotations) 34 | rotations = matrix_to("rotmat", rotations) 35 | return rotations 36 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 37 | rotations = axis_angle_to_matrix(rotations) 38 | rotations = matrix_to("rot6d", rotations) 39 | return rotations 40 | elif newtype in ["rotquat", "quaternion"]: 41 | rotations = axis_angle_to_quaternion(rotations) 42 | return rotations 43 | elif newtype in ["rotvec", "axisangle"]: 44 | return rotations 45 | else: 46 | raise NotImplementedError 47 | 48 | 49 | def matrix_to(newtype, rotations): 50 | if newtype in ["matrix"]: 51 | return rotations 52 | if newtype in ["rotmat"]: 53 | rotations = rotations.reshape((*rotations.shape[:-2], 9)) 54 | return rotations 55 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 56 | rotations = matrix_to_rotation_6d(rotations) 57 | return rotations 58 | elif newtype in ["rotquat", "quaternion"]: 59 | rotations = matrix_to_quaternion(rotations) 60 | return rotations 61 | elif newtype in ["rotvec", "axisangle"]: 62 | rotations = matrix_to_axis_angle(rotations) 63 | return rotations 64 | else: 65 | raise NotImplementedError 66 | 67 | 68 | def to_matrix(oldtype, rotations): 69 | if oldtype in ["matrix"]: 70 | return rotations 71 | if oldtype in ["rotmat"]: 72 | rotations = rotations.reshape((*rotations.shape[:-2], 3, 3)) 73 | return rotations 74 | elif oldtype in ["rot6d", "6drot", "rotation6d"]: 75 | rotations = rotation_6d_to_matrix(rotations) 76 | return rotations 77 | elif oldtype in ["rotquat", "quaternion"]: 78 | rotations = quaternion_to_matrix(rotations) 79 | return rotations 80 | elif oldtype in ["rotvec", "axisangle"]: 81 | rotations = axis_angle_to_matrix(rotations) 82 | return rotations 83 | else: 84 | raise NotImplementedError 85 | -------------------------------------------------------------------------------- /mGPT/render/blender/floor.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | from .materials import floor_mat 3 | 4 | 5 | def get_trajectory(data, is_mesh): 6 | if is_mesh: 7 | # mean of the vertices 8 | trajectory = data[:, :, [0, 1]].mean(1) 9 | else: 10 | # get the root joint 11 | trajectory = data[:, 0, [0, 1]] 12 | return trajectory 13 | 14 | 15 | def plot_floor(data, big_plane=True): 16 | # Create a floor 17 | minx, miny, _ = data.min(axis=(0, 1)) 18 | maxx, maxy, _ = data.max(axis=(0, 1)) 19 | minz = 0 20 | 21 | location = ((maxx + minx)/2, (maxy + miny)/2, 0) 22 | # a little bit bigger 23 | scale = (1.08*(maxx - minx)/2, 1.08*(maxy - miny)/2, 1) 24 | 25 | bpy.ops.mesh.primitive_plane_add(size=2, enter_editmode=False, align='WORLD', location=location, scale=(1, 1, 1)) 26 | 27 | bpy.ops.transform.resize(value=scale, orient_type='GLOBAL', orient_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)), orient_matrix_type='GLOBAL', 28 | constraint_axis=(False, True, False), mirror=True, use_proportional_edit=False, 29 | proportional_edit_falloff='SMOOTH', proportional_size=1, use_proportional_connected=False, 30 | use_proportional_projected=False, release_confirm=True) 31 | obj = bpy.data.objects["Plane"] 32 | obj.name = "SmallPlane" 33 | obj.data.name = "SmallPlane" 34 | 35 | if not big_plane: 36 | obj.active_material = floor_mat(color=(0.2, 0.2, 0.2, 1)) 37 | else: 38 | obj.active_material = floor_mat(color=(0.1, 0.1, 0.1, 1)) 39 | 40 | if big_plane: 41 | location = ((maxx + minx)/2, (maxy + miny)/2, -0.01) 42 | bpy.ops.mesh.primitive_plane_add(size=2, enter_editmode=False, align='WORLD', location=location, scale=(1, 1, 1)) 43 | 44 | bpy.ops.transform.resize(value=[2*x for x in scale], orient_type='GLOBAL', orient_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)), orient_matrix_type='GLOBAL', 45 | constraint_axis=(False, True, False), mirror=True, use_proportional_edit=False, 46 | proportional_edit_falloff='SMOOTH', proportional_size=1, use_proportional_connected=False, 47 | use_proportional_projected=False, release_confirm=True) 48 | 49 | obj = bpy.data.objects["Plane"] 50 | obj.name = "BigPlane" 51 | obj.data.name = "BigPlane" 52 | obj.active_material = floor_mat(color=(0.2, 0.2, 0.2, 1)) 53 | 54 | 55 | def show_traj(coords): 56 | pass 57 | # create the Curve Datablock 58 | # curveData = bpy.data.curves.new('myCurve', type='CURVE') 59 | # curveData.dimensions = '3D' 60 | # curveData.resolution_u = 2 61 | 62 | # # map coords to spline 63 | # polyline = curveData.splines.new('POLY') 64 | # polyline.points.add(len(coords)-1) 65 | # for i, coord in enumerate(coords): 66 | # x, y = coord 67 | # polyline.points[i].co = (x, y, 0.001, 1) 68 | 69 | # # create Object 70 | # curveOB = bpy.data.objects.new('myCurve', curveData) 71 | # curveData.bevel_depth = 0.01 72 | 73 | # bpy.context.collection.objects.link(curveOB) 74 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import torch 4 | import pytorch_lightning as pl 5 | from omegaconf import OmegaConf 6 | from mGPT.callback import build_callbacks 7 | from mGPT.config import parse_args, instantiate_from_config 8 | from mGPT.data.build_data import build_data 9 | from mGPT.models.build_model import build_model 10 | from mGPT.utils.logger import create_logger 11 | from mGPT.utils.load_checkpoint import load_pretrained, load_pretrained_vae 12 | 13 | def main(): 14 | # Configs 15 | cfg = parse_args(phase="train") # parse config file 16 | 17 | # Logger 18 | logger = create_logger(cfg, phase="train") # create logger 19 | logger.info(OmegaConf.to_yaml(cfg)) # print config file 20 | 21 | # Seed 22 | pl.seed_everything(cfg.SEED_VALUE) 23 | 24 | # Environment Variables 25 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 26 | 27 | # Metric Logger 28 | pl_loggers = [] 29 | for loggerName in cfg.LOGGER.TYPE: 30 | if loggerName == 'tenosrboard' or cfg.LOGGER.WANDB.params.project: 31 | pl_logger = instantiate_from_config( 32 | eval(f'cfg.LOGGER.{loggerName.upper()}')) 33 | pl_loggers.append(pl_logger) 34 | 35 | # Callbacks 36 | callbacks = build_callbacks(cfg, logger=logger, phase='train') 37 | logger.info("Callbacks initialized") 38 | 39 | # Dataset 40 | datamodule = build_data(cfg) 41 | logger.info("datasets module {} initialized".format("".join( 42 | cfg.DATASET.target.split('.')[-2]))) 43 | 44 | # Model 45 | model = build_model(cfg, datamodule) 46 | logger.info("model {} loaded".format(cfg.model.target)) 47 | 48 | # Lightning Trainer 49 | trainer = pl.Trainer( 50 | default_root_dir=cfg.FOLDER_EXP, 51 | max_epochs=cfg.TRAIN.END_EPOCH, 52 | # precision='16', 53 | logger=pl_loggers, 54 | callbacks=callbacks, 55 | check_val_every_n_epoch=cfg.LOGGER.VAL_EVERY_STEPS, 56 | accelerator=cfg.ACCELERATOR, 57 | devices=cfg.DEVICE, 58 | num_nodes=cfg.NUM_NODES, 59 | strategy="ddp_find_unused_parameters_true" 60 | if len(cfg.DEVICE) > 1 else 'auto', 61 | benchmark=False, 62 | deterministic=False, 63 | ) 64 | logger.info("Trainer initialized") 65 | 66 | # Strict load pretrianed model 67 | if cfg.TRAIN.PRETRAINED: 68 | load_pretrained(cfg, model, logger) 69 | 70 | # Strict load vae model 71 | if cfg.TRAIN.PRETRAINED_VAE: 72 | load_pretrained_vae(cfg, model, logger) 73 | 74 | # Pytorch 2.0 Compile 75 | # if torch.__version__ >= "2.0.0": 76 | # model = torch.compile(model, mode="reduce-overhead") 77 | # model = torch.compile(model) 78 | 79 | # Lightning Fitting 80 | if cfg.TRAIN.RESUME: 81 | trainer.fit(model, 82 | datamodule=datamodule, 83 | ckpt_path=cfg.TRAIN.PRETRAINED) 84 | else: 85 | trainer.fit(model, datamodule=datamodule) 86 | 87 | # Training ends 88 | logger.info( 89 | f"The outputs of this experiment are stored in {cfg.FOLDER_EXP}") 90 | logger.info("Training ends!") 91 | 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /scripts/get_code_visual.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pytorch_lightning as pl 4 | import torch 5 | from pathlib import Path 6 | from tqdm import tqdm 7 | from mGPT.config import parse_args 8 | from mGPT.data.build_data import build_data 9 | from mGPT.models.build_model import build_model 10 | from mGPT.utils.load_checkpoint import load_pretrained, load_pretrained_vae 11 | 12 | 13 | def main(): 14 | 15 | # parse options 16 | cfg = parse_args(phase="test") # parse config file 17 | cfg.TRAIN.STAGE = "token" 18 | cfg.TRAIN.BATCH_SIZE = 1 19 | 20 | model_name = cfg.model.target.split('.')[-2].lower() 21 | output_dir = Path( 22 | os.path.join(cfg.FOLDER, model_name, cfg.NAME, 23 | "tokens_visual_" + cfg.TIME)) 24 | 25 | # set seed 26 | pl.seed_everything(cfg.SEED_VALUE) 27 | 28 | # gpu setting 29 | if cfg.ACCELERATOR == "gpu": 30 | os.environ["PYTHONWARNINGS"] = "ignore" 31 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 32 | 33 | # create dataset 34 | datamodule = build_data(cfg, phase="test") 35 | print("datasets module {} initialized".format("".join(cfg.TRAIN.DATASETS))) 36 | 37 | os.makedirs(output_dir, exist_ok=True) 38 | 39 | # create model 40 | model = build_model(cfg, datamodule) 41 | print("model {} loaded".format(cfg.model.target)) 42 | 43 | # Strict load vae model 44 | if cfg.TRAIN.PRETRAINED_VAE: 45 | load_pretrained_vae(cfg, model) 46 | 47 | # loading state dict 48 | if cfg.TEST.CHECKPOINTS: 49 | load_pretrained(cfg, model, phase="test") 50 | 51 | if cfg.ACCELERATOR == "gpu": 52 | model = model.cuda() 53 | 54 | model.eval() 55 | codes = cfg.model.params.codebook_size 56 | with torch.no_grad(): 57 | for i in tqdm(range(codes)): 58 | 59 | # Generate motion from token 60 | m_token = torch.LongTensor(1, 1).fill_(i).to(model.device) 61 | # vq_latent = model.vae.quantizer.dequantize(m_token) 62 | gen_motion = model.vae.decode(m_token) 63 | gen_motion = model.feats2joints(gen_motion).to('cpu').numpy() 64 | 65 | # Generate translation from token 66 | texts = [ 67 | f'Generate text: ' 68 | ] 69 | # texts = [f'Use only one word to describe: '] 70 | batch = {"text": texts, "length": [0]} 71 | 72 | # out_text = model(batch)['texts'] 73 | # print(out_text) 74 | # out_text_path = os.path.join(output_dir, f'{i}.txt') 75 | # Path(out_text_path).parent.mkdir(parents=True, exist_ok=True) 76 | # with open(out_text_path, 'w') as f: 77 | # f.write(out_text[0]) 78 | 79 | target_path = os.path.join(output_dir, f'{i}.npy') 80 | Path(target_path).parent.mkdir(parents=True, exist_ok=True) 81 | 82 | np.save(target_path, gen_motion) 83 | 84 | print( 85 | f'Motion tokenization done, the motion tokens are saved to {output_dir}' 86 | ) 87 | 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /mGPT/data/transforms/joints2rots/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mGPT.utils.joints import mmm_joints, smplh2mmm_indexes 3 | 4 | # Map joints Name to SMPL joints idx 5 | JOINT_MAP = { 6 | 'MidHip': 0, 7 | 'LHip': 1, 8 | 'LKnee': 4, 9 | 'LAnkle': 7, 10 | 'LFoot': 10, 11 | 'RHip': 2, 12 | 'RKnee': 5, 13 | 'RAnkle': 8, 14 | 'RFoot': 11, 15 | 'LShoulder': 16, 16 | 'LElbow': 18, 17 | 'LWrist': 20, 18 | 'LHand': 22, 19 | 'RShoulder': 17, 20 | 'RElbow': 19, 21 | 'RWrist': 21, 22 | 'RHand': 23, 23 | 'spine1': 3, 24 | 'spine2': 6, 25 | 'spine3': 9, 26 | 'Neck': 12, 27 | 'Head': 15, 28 | 'LCollar': 13, 29 | 'Rcollar': 14, 30 | 'Nose': 24, 31 | 'REye': 26, 32 | 'LEye': 26, 33 | 'REar': 27, 34 | 'LEar': 28, 35 | 'LHeel': 31, 36 | 'RHeel': 34, 37 | 'OP RShoulder': 17, 38 | 'OP LShoulder': 16, 39 | 'OP RHip': 2, 40 | 'OP LHip': 1, 41 | 'OP Neck': 12, 42 | } 43 | 44 | mmm2smpl_correspondence = { 45 | "root": "MidHip", 46 | "BP": "spine1", 47 | "BT": "spine3", 48 | "BLN": "Neck", 49 | "BUN": "Head", 50 | "LS": "LShoulder", 51 | "LE": "LElbow", 52 | "LW": "LWrist", 53 | "RS": "RShoulder", 54 | "RE": "RElbow", 55 | "RW": "RWrist", 56 | "LH": "LHip", 57 | "LK": "LKnee", 58 | "LA": "LAnkle", 59 | "LMrot": "LHeel", 60 | "LF": "LFoot", 61 | "RH": "RHip", 62 | "RK": "RKnee", 63 | "RA": "RAnkle", 64 | "RMrot": "RHeel", 65 | "RF": "RFoot" 66 | } 67 | 68 | full_smpl_idx = range(24) 69 | key_smpl_idx = [0, 1, 4, 7, 2, 5, 8, 17, 19, 21, 16, 18, 20] 70 | 71 | AMASS_JOINT_MAP = { 72 | 'MidHip': 0, 73 | 'LHip': 1, 74 | 'LKnee': 4, 75 | 'LAnkle': 7, 76 | 'LFoot': 10, 77 | 'RHip': 2, 78 | 'RKnee': 5, 79 | 'RAnkle': 8, 80 | 'RFoot': 11, 81 | 'LShoulder': 16, 82 | 'LElbow': 18, 83 | 'LWrist': 20, 84 | 'RShoulder': 17, 85 | 'RElbow': 19, 86 | 'RWrist': 21, 87 | 'spine1': 3, 88 | 'spine2': 6, 89 | 'spine3': 9, 90 | 'Neck': 12, 91 | 'Head': 15, 92 | 'LCollar': 13, 93 | 'Rcollar': 14, 94 | } 95 | amass_idx = range(22) 96 | amass_smpl_idx = range(22) 97 | 98 | # cal mmm in smpl index 99 | smpl2mmm_correspondence = { 100 | val: key 101 | for key, val in mmm2smpl_correspondence.items() 102 | } 103 | smpl2mmm_indexes = [JOINT_MAP[mmm2smpl_correspondence[x]] for x in mmm_joints] 104 | 105 | # cal mmm joints map 106 | MMM_JOINT_MAP = { 107 | val: JOINT_MAP[val] 108 | for key, val in mmm2smpl_correspondence.items() 109 | } 110 | 111 | # mmm_idx = range(21) 112 | # mmm_smpl_dix = smpl2mmm_indexes 113 | # mmm_smpl_dix = smplh2mmm_indexes 114 | # todo - configable 115 | SMPL_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/" 116 | GMM_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/" 117 | SMPL_MEAN_FILE = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/neutral_smpl_mean_params.h5" 118 | # for collsion 119 | Part_Seg_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/smplx_parts_segm.pkl" 120 | -------------------------------------------------------------------------------- /mGPT/data/humanml/dataset_t2m_eval.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from .dataset_t2m import Text2MotionDataset 4 | 5 | 6 | class Text2MotionDatasetEval(Text2MotionDataset): 7 | 8 | def __init__( 9 | self, 10 | data_root, 11 | split, 12 | mean, 13 | std, 14 | w_vectorizer, 15 | max_motion_length=196, 16 | min_motion_length=40, 17 | unit_length=4, 18 | fps=20, 19 | tmpFile=True, 20 | tiny=False, 21 | debug=False, 22 | **kwargs, 23 | ): 24 | super().__init__(data_root, split, mean, std, max_motion_length, 25 | min_motion_length, unit_length, fps, tmpFile, tiny, 26 | debug, **kwargs) 27 | 28 | self.w_vectorizer = w_vectorizer 29 | 30 | 31 | def __getitem__(self, item): 32 | # Get text data 33 | idx = self.pointer + item 34 | data = self.data_dict[self.name_list[idx]] 35 | motion, m_length, text_list = data["motion"], data["length"], data["text"] 36 | 37 | all_captions = [ 38 | ' '.join([token.split('/')[0] for token in text_dic['tokens']]) 39 | for text_dic in text_list 40 | ] 41 | 42 | if len(all_captions) > 3: 43 | all_captions = all_captions[:3] 44 | elif len(all_captions) == 2: 45 | all_captions = all_captions + all_captions[0:1] 46 | elif len(all_captions) == 1: 47 | all_captions = all_captions * 3 48 | 49 | # Randomly select a caption 50 | text_data = random.choice(text_list) 51 | caption, tokens = text_data["caption"], text_data["tokens"] 52 | 53 | # Text 54 | max_text_len = 20 55 | if len(tokens) < max_text_len: 56 | # pad with "unk" 57 | tokens = ["sos/OTHER"] + tokens + ["eos/OTHER"] 58 | sent_len = len(tokens) 59 | tokens = tokens + ["unk/OTHER"] * (max_text_len + 2 - sent_len) 60 | else: 61 | # crop 62 | tokens = tokens[:max_text_len] 63 | tokens = ["sos/OTHER"] + tokens + ["eos/OTHER"] 64 | sent_len = len(tokens) 65 | pos_one_hots = [] 66 | word_embeddings = [] 67 | for token in tokens: 68 | word_emb, pos_oh = self.w_vectorizer[token] 69 | pos_one_hots.append(pos_oh[None, :]) 70 | word_embeddings.append(word_emb[None, :]) 71 | pos_one_hots = np.concatenate(pos_one_hots, axis=0) 72 | word_embeddings = np.concatenate(word_embeddings, axis=0) 73 | 74 | # Random crop 75 | if self.unit_length < 10: 76 | coin2 = np.random.choice(["single", "single", "double"]) 77 | else: 78 | coin2 = "single" 79 | 80 | if coin2 == "double": 81 | m_length = (m_length // self.unit_length - 1) * self.unit_length 82 | elif coin2 == "single": 83 | m_length = (m_length // self.unit_length) * self.unit_length 84 | 85 | idx = random.randint(0, len(motion) - m_length) 86 | motion = motion[idx:idx + m_length] 87 | 88 | # Z Normalization 89 | motion = (motion - self.mean) / self.std 90 | 91 | return caption, motion, m_length, word_embeddings, pos_one_hots, sent_len, "_".join( 92 | tokens), all_captions 93 | -------------------------------------------------------------------------------- /mGPT/metrics/m2m.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torchmetrics import Metric 6 | 7 | from .utils import * 8 | 9 | 10 | # motion reconstruction metric 11 | class PredMetrics(Metric): 12 | 13 | def __init__(self, 14 | cfg, 15 | njoints: int = 22, 16 | jointstype: str = "mmm", 17 | force_in_meter: bool = True, 18 | align_root: bool = True, 19 | dist_sync_on_step=True, 20 | task: str = "pred", 21 | **kwargs): 22 | super().__init__(dist_sync_on_step=dist_sync_on_step) 23 | 24 | self.name = 'Motion Prdiction' 25 | self.cfg = cfg 26 | self.jointstype = jointstype 27 | self.align_root = align_root 28 | self.task = task 29 | self.force_in_meter = force_in_meter 30 | 31 | self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum") 32 | self.add_state("count_seq", 33 | default=torch.tensor(0), 34 | dist_reduce_fx="sum") 35 | 36 | self.add_state("APD", 37 | default=torch.tensor([0.0]), 38 | dist_reduce_fx="sum") 39 | self.add_state("ADE", 40 | default=torch.tensor([0.0]), 41 | dist_reduce_fx="sum") 42 | self.add_state("FDE", 43 | default=torch.tensor([0.0]), 44 | dist_reduce_fx="sum") 45 | 46 | self.MR_metrics = ["APD", "ADE", "FDE"] 47 | 48 | # All metric 49 | self.metrics = self.MR_metrics 50 | 51 | def compute(self, sanity_flag): 52 | 53 | count = self.count 54 | count_seq = self.count_seq 55 | mr_metrics = {} 56 | mr_metrics["APD"] = self.APD / count_seq 57 | mr_metrics["ADE"] = self.ADE / count_seq 58 | mr_metrics["FDE"] = self.FDE / count_seq 59 | 60 | # Reset 61 | self.reset() 62 | 63 | return mr_metrics 64 | 65 | def update(self, joints_rst: Tensor, joints_ref: Tensor, 66 | lengths: List[int]): 67 | 68 | assert joints_rst.shape == joints_ref.shape 69 | assert joints_rst.dim() == 4 70 | # (bs, seq, njoint=22, 3) 71 | 72 | self.count += sum(lengths) 73 | self.count_seq += len(lengths) 74 | 75 | rst = torch.flatten(joints_rst, start_dim=2) 76 | ref = torch.flatten(joints_ref, start_dim=2) 77 | 78 | for i, l in enumerate(lengths): 79 | if self.task == "pred": 80 | pred_start = int(l*self.cfg.ABLATION.predict_ratio) 81 | diff = rst[i,pred_start:] - ref[i,pred_start:] 82 | elif self.task == "inbetween": 83 | inbetween_start = int(l*self.cfg.ABLATION.inbetween_ratio) 84 | inbetween_end = l - int(l*self.cfg.ABLATION.inbetween_ratio) 85 | diff = rst[i,inbetween_start:inbetween_end] - ref[i,inbetween_start:inbetween_end] 86 | else: 87 | print(f"Task {self.task} not implemented.") 88 | diff = rst - ref 89 | 90 | dist = torch.linalg.norm(diff, dim=-1)[None] 91 | 92 | ade = dist.mean(dim=1) 93 | fde = dist[:,-1] 94 | self.ADE = self.ADE + ade 95 | self.FDE = self.FDE + fde 96 | -------------------------------------------------------------------------------- /mGPT/archs/tools/token_emb.py: -------------------------------------------------------------------------------- 1 | 2 | from torch import Tensor, nn 3 | 4 | class NewTokenEmb(nn.Module): 5 | """ 6 | For adding new tokens to a pretrained model 7 | """ 8 | 9 | def __init__(self, 10 | old_embeddings: nn.Embedding, 11 | new_num_tokens: int = None) -> None: 12 | 13 | super().__init__() 14 | 15 | self.num_tokens = old_embeddings.num_embeddings + new_num_tokens 16 | self.old_num_tokens = old_embeddings.num_embeddings 17 | self.new_num_tokens = new_num_tokens 18 | self.embedding_dim = old_embeddings.embedding_dim 19 | 20 | # For text embeddings 21 | self.text_embeddings = nn.Embedding( 22 | self.num_tokens, 23 | self.embedding_dim, 24 | device=old_embeddings.weight.device, 25 | dtype=old_embeddings.weight.dtype) 26 | with torch.no_grad(): 27 | self.text_embeddings.weight.data[:old_embeddings. 28 | num_embeddings] = old_embeddings.weight.data 29 | self.text_embeddings.weight.data[ 30 | self.old_num_tokens:] = torch.zeros( 31 | self.new_num_tokens, 32 | self.embedding_dim, 33 | dtype=old_embeddings.weight.dtype, 34 | device=old_embeddings.weight.device) 35 | self.text_embeddings.weight.requires_grad_(False) 36 | 37 | # For motion embeddings 38 | self.motion_embeddings = nn.Embedding( 39 | new_num_tokens, 40 | self.embedding_dim, 41 | device=old_embeddings.weight.device, 42 | dtype=old_embeddings.weight.dtype) 43 | with torch.no_grad(): 44 | self.motion_embeddings.weight.data[:self. 45 | old_num_tokens] = torch.zeros( 46 | new_num_tokens, 47 | self.embedding_dim, 48 | dtype=old_embeddings.weight. 49 | dtype, 50 | device=old_embeddings. 51 | weight.device) 52 | self.word2motionProj = nn.Linear(self.old_num_tokens, new_num_tokens) 53 | 54 | def forward(self, input: Tensor) -> Tensor: 55 | 56 | with torch.no_grad(): 57 | self.motion_embeddings.weight.data[:self. 58 | old_num_tokens] = torch.zeros( 59 | self.new_num_tokens, 60 | self.embedding_dim, 61 | dtype=self.motion_embeddings 62 | .weight.dtype, 63 | device=self. 64 | motion_embeddings.weight. 65 | device) 66 | 67 | self.motion_embeddings.weight.data[ 68 | self.old_num_tokens:] = self.word2motionProj( 69 | self.text_embeddings.weight.data[:self.old_num_tokens].permute( 70 | 1, 0)).permute(1, 0) 71 | 72 | return self.text_embeddings(input) + self.motion_embeddings(input) 73 | 74 | -------------------------------------------------------------------------------- /mGPT/metrics/mr.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torchmetrics import Metric 6 | 7 | from .utils import * 8 | 9 | 10 | # motion reconstruction metric 11 | class MRMetrics(Metric): 12 | 13 | def __init__(self, 14 | njoints, 15 | jointstype: str = "mmm", 16 | force_in_meter: bool = True, 17 | align_root: bool = True, 18 | dist_sync_on_step=True, 19 | **kwargs): 20 | super().__init__(dist_sync_on_step=dist_sync_on_step) 21 | 22 | self.name = 'Motion Reconstructions' 23 | self.jointstype = jointstype 24 | self.align_root = align_root 25 | self.force_in_meter = force_in_meter 26 | 27 | self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum") 28 | self.add_state("count_seq", 29 | default=torch.tensor(0), 30 | dist_reduce_fx="sum") 31 | 32 | self.add_state("MPJPE", 33 | default=torch.tensor([0.0]), 34 | dist_reduce_fx="sum") 35 | self.add_state("PAMPJPE", 36 | default=torch.tensor([0.0]), 37 | dist_reduce_fx="sum") 38 | self.add_state("ACCEL", 39 | default=torch.tensor([0.0]), 40 | dist_reduce_fx="sum") 41 | # todo 42 | # self.add_state("ROOT", default=torch.tensor([0.0]), dist_reduce_fx="sum") 43 | 44 | self.MR_metrics = ["MPJPE", "PAMPJPE", "ACCEL"] 45 | 46 | # All metric 47 | self.metrics = self.MR_metrics 48 | 49 | def compute(self, sanity_flag): 50 | if self.force_in_meter: 51 | # different jointstypes have different scale factors 52 | # if self.jointstype == 'mmm': 53 | # factor = 1000.0 54 | # elif self.jointstype == 'humanml3d': 55 | # factor = 1000.0 * 0.75 / 480 56 | factor = 1000.0 57 | else: 58 | factor = 1.0 59 | 60 | count = self.count 61 | count_seq = self.count_seq 62 | mr_metrics = {} 63 | mr_metrics["MPJPE"] = self.MPJPE / count * factor 64 | mr_metrics["PAMPJPE"] = self.PAMPJPE / count * factor 65 | # accel error: joints_gt[:-2] - 2 * joints_gt[1:-1] + joints_gt[2:] 66 | # n-2 for each sequences 67 | mr_metrics["ACCEL"] = self.ACCEL / (count - 2 * count_seq) * factor 68 | 69 | # Reset 70 | self.reset() 71 | 72 | return mr_metrics 73 | 74 | def update(self, joints_rst: Tensor, joints_ref: Tensor, 75 | lengths: List[int]): 76 | assert joints_rst.shape == joints_ref.shape 77 | assert joints_rst.dim() == 4 78 | # (bs, seq, njoint=22, 3) 79 | 80 | self.count += sum(lengths) 81 | self.count_seq += len(lengths) 82 | 83 | # avoid cuda error of DDP in pampjpe 84 | rst = joints_rst.detach().cpu() 85 | ref = joints_ref.detach().cpu() 86 | 87 | # align root joints index 88 | if self.align_root and self.jointstype in ['mmm', 'humanml3d']: 89 | align_inds = [0] 90 | else: 91 | align_inds = None 92 | 93 | for i in range(len(lengths)): 94 | self.MPJPE += torch.sum( 95 | calc_mpjpe(rst[i], ref[i], align_inds=align_inds)) 96 | self.PAMPJPE += torch.sum(calc_pampjpe(rst[i], ref[i])) 97 | self.ACCEL += torch.sum(calc_accel(rst[i], ref[i])) 98 | -------------------------------------------------------------------------------- /mGPT/losses/mgpt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .base import BaseLosses 4 | 5 | 6 | class CommitLoss(nn.Module): 7 | """ 8 | Useless Wrapper 9 | """ 10 | def __init__(self, **kwargs): 11 | super().__init__() 12 | 13 | def forward(self, commit, commit2, **kwargs): 14 | return commit 15 | 16 | 17 | class GPTLosses(BaseLosses): 18 | 19 | def __init__(self, cfg, stage, num_joints, **kwargs): 20 | # Save parameters 21 | self.stage = stage 22 | recons_loss = cfg.LOSS.ABLATION.RECONS_LOSS 23 | 24 | # Define losses 25 | losses = [] 26 | params = {} 27 | if stage == "vae": 28 | losses.append("recons_feature") 29 | params['recons_feature'] = cfg.LOSS.LAMBDA_FEATURE 30 | 31 | losses.append("recons_velocity") 32 | params['recons_velocity'] = cfg.LOSS.LAMBDA_VELOCITY 33 | 34 | losses.append("vq_commit") 35 | params['vq_commit'] = cfg.LOSS.LAMBDA_COMMIT 36 | elif stage in ["lm_pretrain", "lm_instruct"]: 37 | losses.append("gpt_loss") 38 | params['gpt_loss'] = cfg.LOSS.LAMBDA_CLS 39 | 40 | # Define loss functions & weights 41 | losses_func = {} 42 | for loss in losses: 43 | if loss.split('_')[0] == 'recons': 44 | if recons_loss == "l1": 45 | losses_func[loss] = nn.L1Loss 46 | elif recons_loss == "l2": 47 | losses_func[loss] = nn.MSELoss 48 | elif recons_loss == "l1_smooth": 49 | losses_func[loss] = nn.SmoothL1Loss 50 | elif loss.split('_')[1] in [ 51 | 'commit', 'loss', 'gpt', 'm2t2m', 't2m2t' 52 | ]: 53 | losses_func[loss] = CommitLoss 54 | elif loss.split('_')[1] in ['cls', 'lm']: 55 | losses_func[loss] = nn.CrossEntropyLoss 56 | else: 57 | raise NotImplementedError(f"Loss {loss} not implemented.") 58 | 59 | super().__init__(cfg, losses, params, losses_func, num_joints, 60 | **kwargs) 61 | 62 | def update(self, rs_set): 63 | '''Update the losses''' 64 | total: float = 0.0 65 | 66 | if self.stage in ["vae"]: 67 | total += self._update_loss("recons_feature", rs_set['m_rst'], 68 | rs_set['m_ref']) 69 | # total += self._update_loss("recons_joints", rs_set['joints_rst'], rs_set['joints_ref']) 70 | nfeats = rs_set['m_rst'].shape[-1] 71 | if nfeats in [263, 135 + 263]: 72 | if nfeats == 135 + 263: 73 | vel_start = 135 + 4 74 | elif nfeats == 263: 75 | vel_start = 4 76 | total += self._update_loss( 77 | "recons_velocity", 78 | rs_set['m_rst'][..., vel_start:(self.num_joints - 1) * 3 + 79 | vel_start], 80 | rs_set['m_ref'][..., vel_start:(self.num_joints - 1) * 3 + 81 | vel_start]) 82 | else: 83 | if self._params['recons_velocity'] != 0.0: 84 | raise NotImplementedError( 85 | "Velocity not implemented for nfeats = {})".format(nfeats)) 86 | total += self._update_loss("vq_commit", rs_set['loss_commit'], 87 | rs_set['loss_commit']) 88 | 89 | if self.stage in ["lm_pretrain", "lm_instruct"]: 90 | total += self._update_loss("gpt_loss", rs_set['outputs'].loss, 91 | rs_set['outputs'].loss) 92 | 93 | # Update the total loss 94 | self.total += total.detach() 95 | self.count += 1 96 | 97 | return total 98 | -------------------------------------------------------------------------------- /mGPT/render/blender/scene.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | from .materials import plane_mat # noqa 3 | 4 | 5 | def setup_renderer(denoising=True, oldrender=True, accelerator="gpu", device=[0]): 6 | bpy.context.scene.render.engine = "CYCLES" 7 | bpy.data.scenes[0].render.engine = "CYCLES" 8 | if accelerator.lower() == "gpu": 9 | bpy.context.preferences.addons[ 10 | "cycles" 11 | ].preferences.compute_device_type = "CUDA" 12 | bpy.context.scene.cycles.device = "GPU" 13 | i = 0 14 | bpy.context.preferences.addons["cycles"].preferences.get_devices() 15 | for d in bpy.context.preferences.addons["cycles"].preferences.devices: 16 | if i in device: # gpu id 17 | d["use"] = 1 18 | print(d["name"], "".join(str(i) for i in device)) 19 | else: 20 | d["use"] = 0 21 | i += 1 22 | 23 | if denoising: 24 | bpy.context.scene.cycles.use_denoising = True 25 | 26 | try: 27 | bpy.context.scene.render.tile_x = 256 28 | bpy.context.scene.render.tile_y = 256 29 | except AttributeError as e: 30 | print(e) 31 | bpy.context.scene.cycles.tile_size = 256 32 | bpy.context.scene.cycles.samples = 64 33 | # bpy.context.scene.cycles.denoiser = 'OPTIX' 34 | 35 | if not oldrender: 36 | bpy.context.scene.view_settings.view_transform = "Standard" 37 | bpy.context.scene.render.film_transparent = True 38 | bpy.context.scene.display_settings.display_device = "sRGB" 39 | bpy.context.scene.view_settings.gamma = 1.2 40 | bpy.context.scene.view_settings.exposure = -0.75 41 | 42 | 43 | # Setup scene 44 | def setup_scene( 45 | res="high", denoising=True, oldrender=True, accelerator="gpu", device=[0] 46 | ): 47 | scene = bpy.data.scenes["Scene"] 48 | assert res in ["ultra", "high", "med", "low"] 49 | if res == "high": 50 | scene.render.resolution_x = 1280 51 | scene.render.resolution_y = 1024 52 | elif res == "med": 53 | scene.render.resolution_x = 1280 // 2 54 | scene.render.resolution_y = 1024 // 2 55 | elif res == "low": 56 | scene.render.resolution_x = 1280 // 4 57 | scene.render.resolution_y = 1024 // 4 58 | elif res == "ultra": 59 | scene.render.resolution_x = 1280 * 2 60 | scene.render.resolution_y = 1024 * 2 61 | 62 | scene.render.film_transparent= True 63 | world = bpy.data.worlds["World"] 64 | world.use_nodes = True 65 | bg = world.node_tree.nodes["Background"] 66 | bg.inputs[0].default_value[:3] = (1.0, 1.0, 1.0) 67 | bg.inputs[1].default_value = 1.0 68 | 69 | # Remove default cube 70 | if "Cube" in bpy.data.objects: 71 | bpy.data.objects["Cube"].select_set(True) 72 | bpy.ops.object.delete() 73 | 74 | bpy.ops.object.light_add( 75 | type="SUN", align="WORLD", location=(0, 0, 0), scale=(1, 1, 1) 76 | ) 77 | bpy.data.objects["Sun"].data.energy = 1.5 78 | 79 | # rotate camera 80 | bpy.ops.object.empty_add( 81 | type="PLAIN_AXES", align="WORLD", location=(0, 0, 0), scale=(1, 1, 1) 82 | ) 83 | bpy.ops.transform.resize( 84 | value=(10, 10, 10), 85 | orient_type="GLOBAL", 86 | orient_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)), 87 | orient_matrix_type="GLOBAL", 88 | mirror=True, 89 | use_proportional_edit=False, 90 | proportional_edit_falloff="SMOOTH", 91 | proportional_size=1, 92 | use_proportional_connected=False, 93 | use_proportional_projected=False, 94 | ) 95 | bpy.ops.object.select_all(action="DESELECT") 96 | 97 | setup_renderer( 98 | denoising=denoising, oldrender=oldrender, accelerator=accelerator, device=device 99 | ) 100 | return scene 101 | -------------------------------------------------------------------------------- /mGPT/data/Kit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import os 4 | from os.path import join as pjoin 5 | from .humanml.utils.word_vectorizer import WordVectorizer 6 | from .humanml.scripts.motion_process import (process_file, recover_from_ric) 7 | from .HumanML3D import HumanML3DDataModule 8 | from .humanml import Text2MotionDatasetEval, Text2MotionDataset, Text2MotionDatasetCB, MotionDataset, MotionDatasetVQ, Text2MotionDatasetToken 9 | 10 | 11 | class KitDataModule(HumanML3DDataModule): 12 | def __init__(self, cfg, **kwargs): 13 | 14 | super().__init__(cfg, **kwargs) 15 | 16 | # Basic info of the dataset 17 | self.name = "kit" 18 | self.njoints = 21 19 | 20 | # Path to the dataset 21 | data_root = cfg.DATASET.KIT.ROOT 22 | self.hparams.data_root = data_root 23 | self.hparams.text_dir = pjoin(data_root, "texts") 24 | self.hparams.motion_dir = pjoin(data_root, 'new_joint_vecs') 25 | 26 | # Mean and std of the dataset 27 | dis_data_root = pjoin(cfg.DATASET.KIT.MEAN_STD_PATH, 'kit', 28 | "VQVAEV3_CB1024_CMT_H1024_NRES3", "meta") 29 | self.hparams.mean = np.load(pjoin(dis_data_root, "mean.npy")) 30 | self.hparams.std = np.load(pjoin(dis_data_root, "std.npy")) 31 | 32 | # Mean and std for fair evaluation 33 | dis_data_root_eval = pjoin(cfg.DATASET.KIT.MEAN_STD_PATH, 't2m', 34 | "Comp_v6_KLD005", "meta") 35 | self.hparams.mean_eval = np.load(pjoin(dis_data_root_eval, "mean.npy")) 36 | self.hparams.std_eval = np.load(pjoin(dis_data_root_eval, "std.npy")) 37 | 38 | # Length of the dataset 39 | self.hparams.max_motion_length = cfg.DATASET.KIT.MAX_MOTION_LEN 40 | self.hparams.min_motion_length = cfg.DATASET.KIT.MIN_MOTION_LEN 41 | self.hparams.max_text_len = cfg.DATASET.KIT.MAX_TEXT_LEN 42 | self.hparams.unit_length = cfg.DATASET.KIT.UNIT_LEN 43 | 44 | # Get additional info of the dataset 45 | self._sample_set = self.get_sample_set(overrides={"split": "test", "tiny": True}) 46 | self.nfeats = self._sample_set.nfeats 47 | cfg.DATASET.NFEATS = self.nfeats 48 | 49 | def joints2feats(self, features): 50 | example_data = np.load(os.path.join(self.hparams.data_root, 'joints', '03950_gt.npy')) 51 | example_data = example_data.reshape(len(example_data), -1, 3) 52 | example_data = torch.from_numpy(example_data) 53 | features = process_file(features, self.njoints, example_data, 'kit')[0] 54 | return features 55 | 56 | def normalize(self, features): 57 | mean = torch.tensor(self.hparams.mean).to(features) 58 | std = torch.tensor(self.hparams.std).to(features) 59 | features = (features - mean) / std 60 | return features 61 | 62 | def renorm4t2m(self, features): 63 | # renorm to t2m norms for using t2m evaluators 64 | ori_mean = torch.tensor(self.hparams.mean).to(features) 65 | ori_std = torch.tensor(self.hparams.std).to(features) 66 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 67 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 68 | features = features * ori_std + ori_mean 69 | features = (features - eval_mean) / eval_std 70 | return features 71 | 72 | def mm_mode(self, mm_on=True): 73 | # random select samples for mm 74 | if mm_on: 75 | self.is_mm = True 76 | self.name_list = self.test_dataset.name_list 77 | self.mm_list = np.random.choice(self.name_list, 78 | self.cfg.METRIC.MM_NUM_SAMPLES, 79 | replace=False) 80 | self.test_dataset.name_list = self.mm_list 81 | else: 82 | self.is_mm = False 83 | self.test_dataset.name_list = self.name_list 84 | -------------------------------------------------------------------------------- /mGPT/data/transforms/joints2jfeats/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | import torch 18 | import torch.nn.functional as F 19 | 20 | from mGPT.utils.joints import mmm_joints 21 | 22 | # Get the indexes of particular body part SMPLH case 23 | # Feet 24 | # LM, RM = smplh_joints.index("left_ankle"), smplh_joints.index("right_ankle") 25 | # LF, RF = smplh_joints.index("left_foot"), smplh_joints.index("right_foot") 26 | # # Shoulders 27 | # LS, RS = smplh_joints.index("left_shoulder"), smplh_joints.index("right_shoulder") 28 | # # Hips 29 | # LH, RH = smplh_joints.index("left_hip"), smplh_joints.index("right_hip") 30 | 31 | # Get the indexes of particular body part 32 | # Feet 33 | LM, RM = mmm_joints.index("LMrot"), mmm_joints.index("RMrot") 34 | LF, RF = mmm_joints.index("LF"), mmm_joints.index("RF") 35 | # Shoulders 36 | LS, RS = mmm_joints.index("LS"), mmm_joints.index("RS") 37 | # Hips 38 | LH, RH = mmm_joints.index("LH"), mmm_joints.index("RH") 39 | 40 | 41 | def get_forward_direction(poses, jointstype="mmm"): 42 | # assert jointstype == 'mmm' 43 | across = poses[..., RH, :] - poses[..., LH, :] + poses[..., RS, :] - poses[ 44 | ..., LS, :] 45 | forward = torch.stack((-across[..., 2], across[..., 0]), axis=-1) 46 | forward = torch.nn.functional.normalize(forward, dim=-1) 47 | return forward 48 | 49 | 50 | def get_floor(poses, jointstype="mmm"): 51 | # assert jointstype == 'mmm' 52 | ndim = len(poses.shape) 53 | foot_heights = poses[..., (LM, LF, RM, RF), 1].min(-1).values 54 | floor_height = softmin(foot_heights, softness=0.5, dim=-1) 55 | # changed this thing Mathis version 1.11 pytorch 56 | return floor_height[(ndim - 2) * [None]].transpose(0, -1) 57 | 58 | 59 | def softmax(x, softness=1.0, dim=None): 60 | maxi, mini = x.max(dim=dim).values, x.min(dim=dim).values 61 | return maxi + torch.log(softness + torch.exp(mini - maxi)) 62 | 63 | 64 | def softmin(x, softness=1.0, dim=0): 65 | return -softmax(-x, softness=softness, dim=dim) 66 | 67 | 68 | def gaussian_filter1d(_inputs, sigma, truncate=4.0): 69 | # Code adapted/mixed from scipy library into pytorch 70 | # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L211 71 | # and gaussian kernel 72 | # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L179 73 | # Correspond to mode="nearest" and order = 0 74 | # But works batched 75 | if len(_inputs.shape) == 2: 76 | inputs = _inputs[None] 77 | else: 78 | inputs = _inputs 79 | 80 | sd = float(sigma) 81 | radius = int(truncate * sd + 0.5) 82 | sigma2 = sigma * sigma 83 | x = torch.arange(-radius, 84 | radius + 1, 85 | device=inputs.device, 86 | dtype=inputs.dtype) 87 | phi_x = torch.exp(-0.5 / sigma2 * x**2) 88 | phi_x = phi_x / phi_x.sum() 89 | 90 | # Conv1d weights 91 | groups = inputs.shape[-1] 92 | weights = torch.tile(phi_x, (groups, 1, 1)) 93 | inputs = inputs.transpose(-1, -2) 94 | outputs = F.conv1d(inputs, weights, padding="same", 95 | groups=groups).transpose(-1, -2) 96 | 97 | return outputs.reshape(_inputs.shape) 98 | -------------------------------------------------------------------------------- /mGPT/data/__init__.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | from torch.utils.data import DataLoader 3 | 4 | 5 | class BASEDataModule(pl.LightningDataModule): 6 | def __init__(self, collate_fn): 7 | super().__init__() 8 | 9 | self.dataloader_options = {"collate_fn": collate_fn} 10 | self.persistent_workers = True 11 | self.is_mm = False 12 | 13 | self._train_dataset = None 14 | self._val_dataset = None 15 | self._test_dataset = None 16 | 17 | def get_sample_set(self, overrides={}): 18 | sample_params = self.hparams.copy() 19 | sample_params.update(overrides) 20 | return self.DatasetEval(**sample_params) 21 | 22 | @property 23 | def train_dataset(self): 24 | if self._train_dataset is None: 25 | self._train_dataset = self.Dataset(split=self.cfg.TRAIN.SPLIT, 26 | **self.hparams) 27 | return self._train_dataset 28 | 29 | @property 30 | def val_dataset(self): 31 | if self._val_dataset is None: 32 | params = self.hparams.copy() 33 | params['code_path'] = None 34 | params['split'] = self.cfg.EVAL.SPLIT 35 | self._val_dataset = self.DatasetEval(**params) 36 | return self._val_dataset 37 | 38 | @property 39 | def test_dataset(self): 40 | if self._test_dataset is None: 41 | # self._test_dataset = self.DatasetEval(split=self.cfg.TEST.SPLIT, 42 | # **self.hparams) 43 | params = self.hparams.copy() 44 | params['code_path'] = None 45 | params['split'] = self.cfg.TEST.SPLIT 46 | self._test_dataset = self.DatasetEval( **params) 47 | return self._test_dataset 48 | 49 | def setup(self, stage=None): 50 | # Use the getter the first time to load the data 51 | if stage in (None, "fit"): 52 | _ = self.train_dataset 53 | _ = self.val_dataset 54 | if stage in (None, "test"): 55 | _ = self.test_dataset 56 | 57 | def train_dataloader(self): 58 | dataloader_options = self.dataloader_options.copy() 59 | dataloader_options["batch_size"] = self.cfg.TRAIN.BATCH_SIZE 60 | dataloader_options["num_workers"] = self.cfg.TRAIN.NUM_WORKERS 61 | return DataLoader( 62 | self.train_dataset, 63 | shuffle=False, 64 | persistent_workers=True, 65 | **dataloader_options, 66 | ) 67 | 68 | def predict_dataloader(self): 69 | dataloader_options = self.dataloader_options.copy() 70 | dataloader_options[ 71 | "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE 72 | dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS 73 | dataloader_options["shuffle"] = False 74 | return DataLoader( 75 | self.test_dataset, 76 | persistent_workers=True, 77 | **dataloader_options, 78 | ) 79 | 80 | def val_dataloader(self): 81 | # overrides batch_size and num_workers 82 | dataloader_options = self.dataloader_options.copy() 83 | dataloader_options["batch_size"] = self.cfg.EVAL.BATCH_SIZE 84 | dataloader_options["num_workers"] = self.cfg.EVAL.NUM_WORKERS 85 | dataloader_options["shuffle"] = False 86 | return DataLoader( 87 | self.val_dataset, 88 | persistent_workers=True, 89 | **dataloader_options, 90 | ) 91 | 92 | def test_dataloader(self): 93 | # overrides batch_size and num_workers 94 | dataloader_options = self.dataloader_options.copy() 95 | dataloader_options[ 96 | "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE 97 | dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS 98 | dataloader_options["shuffle"] = False 99 | return DataLoader( 100 | self.test_dataset, 101 | persistent_workers=True, 102 | **dataloader_options, 103 | ) 104 | -------------------------------------------------------------------------------- /mGPT/archs/tm2t_evaluator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.utils.rnn import pack_padded_sequence 4 | 5 | 6 | class MovementConvEncoder(nn.Module): 7 | def __init__(self, input_size, hidden_size, output_size): 8 | super(MovementConvEncoder, self).__init__() 9 | self.main = nn.Sequential( 10 | nn.Conv1d(input_size, hidden_size, 4, 2, 1), 11 | nn.Dropout(0.2, inplace=True), 12 | nn.LeakyReLU(0.2, inplace=True), 13 | nn.Conv1d(hidden_size, output_size, 4, 2, 1), 14 | nn.Dropout(0.2, inplace=True), 15 | nn.LeakyReLU(0.2, inplace=True), 16 | ) 17 | self.out_net = nn.Linear(output_size, output_size) 18 | # self.main.apply(init_weight) 19 | # self.out_net.apply(init_weight) 20 | 21 | def forward(self, inputs): 22 | inputs = inputs.permute(0, 2, 1) 23 | outputs = self.main(inputs).permute(0, 2, 1) 24 | # print(outputs.shape) 25 | return self.out_net(outputs) 26 | 27 | 28 | class MotionEncoderBiGRUCo(nn.Module): 29 | def __init__(self, input_size, hidden_size, output_size): 30 | super(MotionEncoderBiGRUCo, self).__init__() 31 | 32 | self.input_emb = nn.Linear(input_size, hidden_size) 33 | self.gru = nn.GRU( 34 | hidden_size, hidden_size, batch_first=True, bidirectional=True 35 | ) 36 | self.output_net = nn.Sequential( 37 | nn.Linear(hidden_size * 2, hidden_size), 38 | nn.LayerNorm(hidden_size), 39 | nn.LeakyReLU(0.2, inplace=True), 40 | nn.Linear(hidden_size, output_size), 41 | ) 42 | 43 | # self.input_emb.apply(init_weight) 44 | # self.output_net.apply(init_weight) 45 | self.hidden_size = hidden_size 46 | self.hidden = nn.Parameter( 47 | torch.randn((2, 1, self.hidden_size), requires_grad=True) 48 | ) 49 | 50 | # input(batch_size, seq_len, dim) 51 | def forward(self, inputs, m_lens): 52 | num_samples = inputs.shape[0] 53 | 54 | input_embs = self.input_emb(inputs) 55 | hidden = self.hidden.repeat(1, num_samples, 1) 56 | 57 | cap_lens = m_lens.data.tolist() 58 | 59 | # emb = pack_padded_sequence(input=input_embs, lengths=cap_lens, batch_first=True) 60 | emb = input_embs 61 | 62 | gru_seq, gru_last = self.gru(emb, hidden) 63 | 64 | gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1) 65 | 66 | return self.output_net(gru_last) 67 | 68 | 69 | class TextEncoderBiGRUCo(nn.Module): 70 | def __init__(self, word_size, pos_size, hidden_size, output_size): 71 | super(TextEncoderBiGRUCo, self).__init__() 72 | 73 | self.pos_emb = nn.Linear(pos_size, word_size) 74 | self.input_emb = nn.Linear(word_size, hidden_size) 75 | self.gru = nn.GRU( 76 | hidden_size, hidden_size, batch_first=True, bidirectional=True 77 | ) 78 | self.output_net = nn.Sequential( 79 | nn.Linear(hidden_size * 2, hidden_size), 80 | nn.LayerNorm(hidden_size), 81 | nn.LeakyReLU(0.2, inplace=True), 82 | nn.Linear(hidden_size, output_size), 83 | ) 84 | 85 | # self.input_emb.apply(init_weight) 86 | # self.pos_emb.apply(init_weight) 87 | # self.output_net.apply(init_weight) 88 | # self.linear2.apply(init_weight) 89 | # self.batch_size = batch_size 90 | self.hidden_size = hidden_size 91 | self.hidden = nn.Parameter( 92 | torch.randn((2, 1, self.hidden_size), requires_grad=True) 93 | ) 94 | 95 | # input(batch_size, seq_len, dim) 96 | def forward(self, word_embs, pos_onehot, cap_lens): 97 | num_samples = word_embs.shape[0] 98 | 99 | pos_embs = self.pos_emb(pos_onehot) 100 | inputs = word_embs + pos_embs 101 | input_embs = self.input_emb(inputs) 102 | hidden = self.hidden.repeat(1, num_samples, 1) 103 | 104 | cap_lens = cap_lens.data.tolist() 105 | emb = pack_padded_sequence(input=input_embs, lengths=cap_lens, batch_first=True) 106 | 107 | gru_seq, gru_last = self.gru(emb, hidden) 108 | 109 | gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1) 110 | 111 | return self.output_net(gru_last) 112 | -------------------------------------------------------------------------------- /configs/default.yaml: -------------------------------------------------------------------------------- 1 | SEED_VALUE: 1234 # Seed value 2 | DEBUG: True # Debug mode 3 | FULL_CONFIG: false 4 | 5 | TRAIN: 6 | SPLIT: 'train' # Training split name 7 | NUM_WORKERS: 8 # Number of workers 8 | BATCH_SIZE: 8 # Size of batches 9 | END_EPOCH: 2000 # End epoch 10 | 11 | RESUME: '' # Experiment path to be resumed training 12 | PRETRAINED_VAE: '' # Pretrained vae/vqvae model path 13 | PRETRAINED: '' # Pretrained model path 14 | 15 | OPTIM: 16 | target: AdamW 17 | params: 18 | lr: 2e-4 19 | betas: [0.9, 0.99] 20 | weight_decay: 0.0 21 | 22 | LR_SCHEDULER: 23 | target: CosineAnnealingLR 24 | params: 25 | T_max: ${eval:${LOGGER.VAL_EVERY_STEPS} * 100} 26 | eta_min: 1e-6 27 | 28 | EVAL: 29 | SPLIT: 'val' # Validation split name 30 | BATCH_SIZE: 16 # Validation Batch size 31 | NUM_WORKERS: 8 # Validation Batch size 32 | 33 | TEST: 34 | CHECKPOINTS: '' # Pretrained model path 35 | SPLIT: 'test' # Testing split name 36 | BATCH_SIZE: 16 # Testing Batch size 37 | NUM_WORKERS: 8 # Testing Batch size 38 | 39 | SAVE_PREDICTIONS: False # Weather to save predictions 40 | COUNT_TIME: False # Weather to count time during test 41 | REPLICATION_TIMES: 20 # Number of times to replicate the test 42 | REP_I: 0 # For counting replication times 43 | 44 | model: 45 | target: mGPT.models.mgpt.MotionGPT 46 | params: 47 | condition: 'text' 48 | task: 't2m' 49 | lm: ${lm.default} 50 | motion_vae: ${vq.default} 51 | 52 | # Related parameters 53 | stage: ${TRAIN.STAGE} 54 | debug: ${DEBUG} 55 | codebook_size: ${model.params.motion_vae.params.code_num} 56 | metrics_dict: ${METRIC.TYPE} 57 | 58 | LOSS: 59 | LAMBDA_REC: 1.0 # Lambda for reconstruction losses 60 | LAMBDA_JOINT: 1.0 # Lambda for joint losses 61 | 62 | LAMBDA_LATENT: 1e-5 # Lambda for latent losses 63 | LAMBDA_KL: 1e-5 # Lambda for kl losses 64 | LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses 65 | LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses 66 | LAMBDA_CYCLE: 1.0 # Lambda for cycle losses 67 | LAMBDA_PRIOR: 0.0 # Lambda for diffusion prior losses 68 | 69 | LAMBDA_VELOCITY: 0.5 # Lambda for velocity losses 70 | LAMBDA_COMMIT: 0.02 # Lambda for commitment losses 71 | 72 | ABLATION: 73 | RECONS_LOSS: 'l1_smooth' 74 | 75 | METRIC: 76 | TASK: 't2m' 77 | FORCE_IN_METER: True 78 | DIST_SYNC_ON_STEP: True 79 | MM_NUM_SAMPLES: 100 # Number of samples for multimodal test 80 | MM_NUM_REPEATS: 30 # Number of repeats for multimodal test 81 | MM_NUM_TIMES: 10 # Number of times to repeat the multimodal test 82 | DIVERSITY_TIMES: 300 # Number of times to repeat the diversity test 83 | TM2T: ${evaluator.tm2t} 84 | 85 | DATASET: 86 | target: mGPT.data.HumanML3D.HumanML3DDataModule 87 | CODE_PATH: 'VQVAE' 88 | TASK_ROOT: '' 89 | TASK_PATH: '' 90 | NFEATS: 263 91 | KIT: 92 | MAX_MOTION_LEN: 196 93 | MIN_MOTION_LEN: 24 94 | MAX_TEXT_LEN: 20 95 | PICK_ONE_TEXT: true 96 | FRAME_RATE: 12.5 97 | UNIT_LEN: 4 98 | HUMANML3D: 99 | MAX_MOTION_LEN: 196 100 | MIN_MOTION_LEN: 40 101 | MAX_TEXT_LEN: 20 102 | PICK_ONE_TEXT: true 103 | FRAME_RATE: 20.0 104 | UNIT_LEN: 4 105 | STD_TEXT: False 106 | 107 | ABLATION: 108 | # For MotionGPT 109 | use_length: False 110 | predict_ratio: 0.2 111 | inbetween_ratio: 0.25 112 | image_size: 256 113 | 114 | # For Motion-latent-diffusion 115 | VAE_TYPE: 'actor' # vae ablation: actor or mcross 116 | VAE_ARCH: 'encoder_decoder' # mdiffusion vae architecture 117 | PE_TYPE: 'actor' # mdiffusion mld or actor 118 | DIFF_PE_TYPE: 'actor' # mdiffusion mld or actor 119 | SKIP_CONNECT: False # skip connection for denoiser va 120 | MLP_DIST: False # use linear to expand mean and std rather expand token nums 121 | IS_DIST: False # Mcross distribution kl 122 | PREDICT_EPSILON: True # noise or motion 123 | 124 | DEMO: 125 | EXAMPLE: null 126 | TASK: t2m 127 | 128 | LOGGER: 129 | VAL_EVERY_STEPS: 10 130 | LOGGERS: ['tensorboard', 'wandb'] 131 | TENSORBOARD: 132 | target: pytorch_lightning.loggers.TensorBoardLogger 133 | params: 134 | save_dir: ${FOLDER_EXP} 135 | name: 'tensorboard' 136 | version: '' 137 | WANDB: 138 | target: pytorch_lightning.loggers.WandbLogger 139 | params: 140 | project: null 141 | offline: False 142 | id: null 143 | version: '' 144 | name: ${NAME} 145 | save_dir: ${FOLDER_EXP} 146 | -------------------------------------------------------------------------------- /mGPT/data/tools/collate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import List, Dict 18 | from torch import Tensor 19 | 20 | 21 | def collate_tensor_with_padding(batch: List[Tensor]) -> Tensor: 22 | dims = batch[0].dim() 23 | max_size = [max([b.size(i) for b in batch]) for i in range(dims)] 24 | size = (len(batch),) + tuple(max_size) 25 | canvas = batch[0].new_zeros(size=size) 26 | for i, b in enumerate(batch): 27 | sub_tensor = canvas[i] 28 | for d in range(dims): 29 | sub_tensor = sub_tensor.narrow(d, 0, b.size(d)) 30 | sub_tensor.add_(b) 31 | return canvas 32 | 33 | 34 | def collate_datastruct_and_text(lst_elements: List) -> Dict: 35 | collate_datastruct = lst_elements[0]["datastruct"].transforms.collate 36 | 37 | batch = { 38 | # Collate with padding for the datastruct 39 | "datastruct": collate_datastruct([x["datastruct"] for x in lst_elements]), 40 | # Collate normally for the length 41 | "length": [x["length"] for x in lst_elements], 42 | # Collate the text 43 | "text": [x["text"] for x in lst_elements]} 44 | 45 | # add keyid for example 46 | otherkeys = [x for x in lst_elements[0].keys() if x not in batch] 47 | for key in otherkeys: 48 | batch[key] = [x[key] for x in lst_elements] 49 | 50 | return batch 51 | 52 | def collate_length_and_text(lst_elements: List) -> Dict: 53 | 54 | batch = { 55 | "length_0": [x["length_0"] for x in lst_elements], 56 | "length_1": [x["length_1"] for x in lst_elements], 57 | "length_transition": [x["length_transition"] for x in lst_elements], 58 | "length_1_with_transition": [x["length_1_with_transition"] for x in lst_elements], 59 | "text_0": [x["text_0"] for x in lst_elements], 60 | "text_1": [x["text_1"] for x in lst_elements] 61 | } 62 | 63 | return batch 64 | 65 | def collate_pairs_and_text(lst_elements: List, ) -> Dict: 66 | if 'features_0' not in lst_elements[0]: # test set 67 | collate_datastruct = lst_elements[0]["datastruct"].transforms.collate 68 | batch = {"datastruct": collate_datastruct([x["datastruct"] for x in lst_elements]), 69 | "length_0": [x["length_0"] for x in lst_elements], 70 | "length_1": [x["length_1"] for x in lst_elements], 71 | "length_transition": [x["length_transition"] for x in lst_elements], 72 | "length_1_with_transition": [x["length_1_with_transition"] for x in lst_elements], 73 | "text_0": [x["text_0"] for x in lst_elements], 74 | "text_1": [x["text_1"] for x in lst_elements] 75 | } 76 | 77 | else: 78 | batch = {"motion_feats_0": collate_tensor_with_padding([el["features_0"] for el in lst_elements]), 79 | "motion_feats_1": collate_tensor_with_padding([el["features_1"] for el in lst_elements]), 80 | "motion_feats_1_with_transition": collate_tensor_with_padding([el["features_1_with_transition"] for el in lst_elements]), 81 | "length_0": [x["length_0"] for x in lst_elements], 82 | "length_1": [x["length_1"] for x in lst_elements], 83 | "length_transition": [x["length_transition"] for x in lst_elements], 84 | "length_1_with_transition": [x["length_1_with_transition"] for x in lst_elements], 85 | "text_0": [x["text_0"] for x in lst_elements], 86 | "text_1": [x["text_1"] for x in lst_elements] 87 | } 88 | return batch 89 | 90 | 91 | def collate_text_and_length(lst_elements: Dict) -> Dict: 92 | batch = {"length": [x["length"] for x in lst_elements], 93 | "text": [x["text"] for x in lst_elements]} 94 | 95 | # add keyid for example 96 | otherkeys = [x for x in lst_elements[0].keys() if x not in batch and x != "datastruct"] 97 | for key in otherkeys: 98 | batch[key] = [x[key] for x in lst_elements] 99 | return batch 100 | -------------------------------------------------------------------------------- /mGPT/data/humanml/dataset_t2m_m2t.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from torch.utils import data 4 | from .dataset_t2m import Text2MotionDataset 5 | import codecs as cs 6 | from os.path import join as pjoin 7 | 8 | 9 | class Text2MotionDatasetM2T(data.Dataset): 10 | 11 | def __init__( 12 | self, 13 | data_root, 14 | split, 15 | mean, 16 | std, 17 | max_motion_length=196, 18 | min_motion_length=40, 19 | unit_length=4, 20 | fps=20, 21 | tmpFile=True, 22 | tiny=False, 23 | debug=False, 24 | **kwargs, 25 | ): 26 | 27 | self.max_motion_length = max_motion_length 28 | self.min_motion_length = min_motion_length 29 | self.unit_length = unit_length 30 | 31 | # Data mean and std 32 | self.mean = mean 33 | self.std = std 34 | 35 | # Data path 36 | split_file = pjoin(data_root, split + '.txt') 37 | motion_dir = pjoin(data_root, 'new_joint_vecs') 38 | text_dir = pjoin(data_root, 'texts') 39 | 40 | # Data id list 41 | self.id_list = [] 42 | with cs.open(split_file, "r") as f: 43 | for line in f.readlines(): 44 | self.id_list.append(line.strip()) 45 | 46 | new_name_list = [] 47 | length_list = [] 48 | data_dict = {} 49 | for name in self.id_list: 50 | # try: 51 | motion = np.load(pjoin(motion_dir, name + '.npy')) 52 | if (len(motion)) < self.min_motion_length or (len(motion) >= 200): 53 | continue 54 | 55 | 56 | text_data = [] 57 | flag = False 58 | 59 | with cs.open(pjoin(text_dir, name + '.txt')) as f: 60 | for line in f.readlines(): 61 | text_dict = {} 62 | line_split = line.strip().split('#') 63 | caption = line_split[0] 64 | tokens = line_split[1].split(' ') 65 | f_tag = float(line_split[2]) 66 | to_tag = float(line_split[3]) 67 | f_tag = 0.0 if np.isnan(f_tag) else f_tag 68 | to_tag = 0.0 if np.isnan(to_tag) else to_tag 69 | 70 | text_dict['caption'] = caption 71 | text_dict['tokens'] = tokens 72 | if f_tag == 0.0 and to_tag == 0.0: 73 | flag = True 74 | text_data.append(text_dict) 75 | else: 76 | try: 77 | n_motion = motion[int(f_tag*20) : int(to_tag*20)] 78 | 79 | if (len(n_motion)) < min_motion_length or (len(n_motion) >= 200): 80 | continue 81 | 82 | new_name = "%s_%f_%f"%(name, f_tag, to_tag) 83 | data_dict[new_name] = {'motion': n_motion, 84 | 'length': len(n_motion), 85 | 'text':[text_dict]} 86 | new_name_list.append(new_name) 87 | except: 88 | print(line_split) 89 | print(line_split[2], line_split[3], f_tag, to_tag, name) 90 | if flag: 91 | data_dict[name] = {'motion': motion, 92 | 'length': len(motion), 93 | 'name': name, 94 | 'text': text_data} 95 | 96 | new_name_list.append(name) 97 | length_list.append(len(motion)) 98 | # except: 99 | # # Some motion may not exist in KIT dataset 100 | # pass 101 | 102 | self.length_arr = np.array(length_list) 103 | self.data_dict = data_dict 104 | self.name_list = new_name_list 105 | self.nfeats = motion.shape[-1] 106 | 107 | 108 | def __len__(self): 109 | return len(self.data_dict) 110 | 111 | def __getitem__(self, item): 112 | name = self.name_list[item] 113 | data = self.data_dict[name] 114 | motion, m_length = data['motion'], data['length'] 115 | 116 | "Z Normalization" 117 | motion = (motion - self.mean) / self.std 118 | 119 | return name, motion, m_length, True, True, True, True, True, True 120 | -------------------------------------------------------------------------------- /mGPT/utils/temos_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import numpy as np 4 | import torch 5 | from torch import Tensor 6 | 7 | import mGPT.utils.geometry_conver as geometry_conver 8 | 9 | 10 | def lengths_to_mask(lengths: List[int], 11 | device: torch.device, 12 | max_len: int = None) -> Tensor: 13 | lengths = torch.tensor(lengths, device=device) 14 | max_len = max_len if max_len else max(lengths) 15 | mask = torch.arange(max_len, device=device).expand( 16 | len(lengths), max_len) < lengths.unsqueeze(1) 17 | return mask 18 | 19 | 20 | def detach_to_numpy(tensor): 21 | return tensor.detach().cpu().numpy() 22 | 23 | 24 | def remove_padding(tensors, lengths): 25 | return [ 26 | tensor[:tensor_length] 27 | for tensor, tensor_length in zip(tensors, lengths) 28 | ] 29 | 30 | 31 | def nfeats_of(rottype): 32 | if rottype in ["rotvec", "axisangle"]: 33 | return 3 34 | elif rottype in ["rotquat", "quaternion"]: 35 | return 4 36 | elif rottype in ["rot6d", "6drot", "rotation6d"]: 37 | return 6 38 | elif rottype in ["rotmat"]: 39 | return 9 40 | else: 41 | return TypeError("This rotation type doesn't have features.") 42 | 43 | 44 | def axis_angle_to(newtype, rotations): 45 | if newtype in ["matrix"]: 46 | rotations = geometry_conver.axis_angle_to_matrix(rotations) 47 | return rotations 48 | elif newtype in ["rotmat"]: 49 | rotations = geometry_conver.axis_angle_to_matrix(rotations) 50 | rotations = matrix_to("rotmat", rotations) 51 | return rotations 52 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 53 | rotations = geometry_conver.axis_angle_to_matrix(rotations) 54 | rotations = matrix_to("rot6d", rotations) 55 | return rotations 56 | elif newtype in ["rotquat", "quaternion"]: 57 | rotations = geometry_conver.axis_angle_to_quaternion(rotations) 58 | return rotations 59 | elif newtype in ["rotvec", "axisangle"]: 60 | return rotations 61 | else: 62 | raise NotImplementedError 63 | 64 | 65 | def matrix_to(newtype, rotations): 66 | if newtype in ["matrix"]: 67 | return rotations 68 | if newtype in ["rotmat"]: 69 | rotations = rotations.reshape((*rotations.shape[:-2], 9)) 70 | return rotations 71 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 72 | rotations = geometry_conver.matrix_to_rotation_6d(rotations) 73 | return rotations 74 | elif newtype in ["rotquat", "quaternion"]: 75 | rotations = geometry_conver.matrix_to_quaternion(rotations) 76 | return rotations 77 | elif newtype in ["rotvec", "axisangle"]: 78 | rotations = geometry_conver.matrix_to_axis_angle(rotations) 79 | return rotations 80 | else: 81 | raise NotImplementedError 82 | 83 | 84 | def to_matrix(oldtype, rotations): 85 | if oldtype in ["matrix"]: 86 | return rotations 87 | if oldtype in ["rotmat"]: 88 | rotations = rotations.reshape((*rotations.shape[:-2], 3, 3)) 89 | return rotations 90 | elif oldtype in ["rot6d", "6drot", "rotation6d"]: 91 | rotations = geometry_conver.rotation_6d_to_matrix(rotations) 92 | return rotations 93 | elif oldtype in ["rotquat", "quaternion"]: 94 | rotations = geometry_conver.quaternion_to_matrix(rotations) 95 | return rotations 96 | elif oldtype in ["rotvec", "axisangle"]: 97 | rotations = geometry_conver.axis_angle_to_matrix(rotations) 98 | return rotations 99 | else: 100 | raise NotImplementedError 101 | 102 | 103 | # TODO: use a real subsampler.. 104 | def subsample(num_frames, last_framerate, new_framerate): 105 | step = int(last_framerate / new_framerate) 106 | assert step >= 1 107 | frames = np.arange(0, num_frames, step) 108 | return frames 109 | 110 | 111 | # TODO: use a real upsampler.. 112 | def upsample(motion, last_framerate, new_framerate): 113 | step = int(new_framerate / last_framerate) 114 | assert step >= 1 115 | 116 | # Alpha blending => interpolation 117 | alpha = np.linspace(0, 1, step + 1) 118 | last = np.einsum("l,...->l...", 1 - alpha, motion[:-1]) 119 | new = np.einsum("l,...->l...", alpha, motion[1:]) 120 | 121 | chuncks = (last + new)[:-1] 122 | output = np.concatenate(chuncks.swapaxes(1, 0)) 123 | # Don't forget the last one 124 | output = np.concatenate((output, motion[[-1]])) 125 | return output 126 | 127 | 128 | if __name__ == "__main__": 129 | motion = np.arange(105) 130 | submotion = motion[subsample(len(motion), 100.0, 12.5)] 131 | newmotion = upsample(submotion, 12.5, 100) 132 | 133 | print(newmotion) 134 | -------------------------------------------------------------------------------- /mGPT/models/utils/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mGPT.models.notused import AdaptiveInstanceNorm1d 5 | 6 | 7 | class MLP(nn.Module): 8 | 9 | def __init__(self, cfg, out_dim, is_init): 10 | super(MLP, self).__init__() 11 | dims = cfg.MODEL.MOTION_DECODER.MLP_DIM 12 | n_blk = len(dims) 13 | norm = 'none' 14 | acti = 'lrelu' 15 | 16 | layers = [] 17 | for i in range(n_blk - 1): 18 | layers += LinearBlock(dims[i], dims[i + 1], norm=norm, acti=acti) 19 | layers += LinearBlock(dims[-1], out_dim, norm='none', acti='none') 20 | self.model = nn.Sequential(*layers) 21 | 22 | if is_init: 23 | for m in self.modules(): 24 | if isinstance(m, nn.Linear): 25 | #nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 26 | nn.init.constant_(m.weight, 1) 27 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 28 | nn.init.constant_(m.weight, 1) 29 | nn.init.constant_(m.bias, 0) 30 | 31 | def forward(self, x): 32 | return self.model(x.view(x.size(0), -1)) 33 | 34 | 35 | def ZeroPad1d(sizes): 36 | return nn.ConstantPad1d(sizes, 0) 37 | 38 | 39 | def get_acti_layer(acti='relu', inplace=True): 40 | 41 | if acti == 'relu': 42 | return [nn.ReLU(inplace=inplace)] 43 | elif acti == 'lrelu': 44 | return [nn.LeakyReLU(0.2, inplace=inplace)] 45 | elif acti == 'tanh': 46 | return [nn.Tanh()] 47 | elif acti == 'none': 48 | return [] 49 | else: 50 | assert 0, "Unsupported activation: {}".format(acti) 51 | 52 | 53 | def get_norm_layer(norm='none', norm_dim=None): 54 | 55 | if norm == 'bn': 56 | return [nn.BatchNorm1d(norm_dim)] 57 | elif norm == 'in': 58 | # return [nn.InstanceNorm1d(norm_dim, affine=False)] # for rt42! 59 | return [nn.InstanceNorm1d(norm_dim, affine=True)] 60 | elif norm == 'adain': 61 | return [AdaptiveInstanceNorm1d(norm_dim)] 62 | elif norm == 'none': 63 | return [] 64 | else: 65 | assert 0, "Unsupported normalization: {}".format(norm) 66 | 67 | 68 | def get_dropout_layer(dropout=None): 69 | if dropout is not None: 70 | return [nn.Dropout(p=dropout)] 71 | else: 72 | return [] 73 | 74 | 75 | def ConvLayers(kernel_size, 76 | in_channels, 77 | out_channels, 78 | stride=1, 79 | pad_type='reflect', 80 | use_bias=True): 81 | """ 82 | returns a list of [pad, conv] => should be += to some list, then apply sequential 83 | """ 84 | 85 | if pad_type == 'reflect': 86 | pad = nn.ReflectionPad1d 87 | elif pad_type == 'replicate': 88 | pad = nn.ReplicationPad1d 89 | elif pad_type == 'zero': 90 | pad = ZeroPad1d 91 | else: 92 | assert 0, "Unsupported padding type: {}".format(pad_type) 93 | 94 | pad_l = (kernel_size - 1) // 2 95 | pad_r = kernel_size - 1 - pad_l 96 | return [ 97 | pad((pad_l, pad_r)), 98 | nn.Conv1d(in_channels, 99 | out_channels, 100 | kernel_size=kernel_size, 101 | stride=stride, 102 | bias=use_bias) 103 | ] 104 | 105 | 106 | def ConvBlock(kernel_size, 107 | in_channels, 108 | out_channels, 109 | stride=1, 110 | pad_type='reflect', 111 | dropout=None, 112 | norm='none', 113 | acti='lrelu', 114 | acti_first=False, 115 | use_bias=True, 116 | inplace=True): 117 | """ 118 | returns a list of [pad, conv, norm, acti] or [acti, pad, conv, norm] 119 | """ 120 | 121 | layers = ConvLayers(kernel_size, 122 | in_channels, 123 | out_channels, 124 | stride=stride, 125 | pad_type=pad_type, 126 | use_bias=use_bias) 127 | layers += get_dropout_layer(dropout) 128 | layers += get_norm_layer(norm, norm_dim=out_channels) 129 | acti_layers = get_acti_layer(acti, inplace=inplace) 130 | 131 | if acti_first: 132 | return acti_layers + layers 133 | else: 134 | return layers + acti_layers 135 | 136 | 137 | def LinearBlock(in_dim, out_dim, dropout=None, norm='none', acti='relu'): 138 | 139 | use_bias = True 140 | layers = [] 141 | layers.append(nn.Linear(in_dim, out_dim, bias=use_bias)) 142 | layers += get_dropout_layer(dropout) 143 | layers += get_norm_layer(norm, norm_dim=out_dim) 144 | layers += get_acti_layer(acti) 145 | 146 | return layers 147 | -------------------------------------------------------------------------------- /prepare/merge_smplh_mano.py: -------------------------------------------------------------------------------- 1 | # inspired and modified by Mathis Petrovich from 2 | # https://github.com/vchoutas/smplx/tree/master/tools 3 | 4 | # -*- coding: utf-8 -*- 5 | 6 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 7 | # holder of all proprietary rights on this computer program. 8 | # You can only use this computer program if you have closed 9 | # a license agreement with MPG or you get the right to use the computer 10 | # program from someone who is authorized to grant you that right. 11 | # Any use of the computer program without a valid license is prohibited and 12 | # liable to prosecution. 13 | # 14 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 15 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 16 | # for Intelligent Systems and the Max Planck Institute for Biological 17 | # Cybernetics. All rights reserved. 18 | # 19 | # Contact: ps-license@tuebingen.mpg.de 20 | 21 | import os 22 | import os.path as osp 23 | import pickle 24 | 25 | import argparse 26 | 27 | import numpy as np 28 | 29 | 30 | def remove_chumpy_dep(dico): 31 | output_dict = {} 32 | for key, val in dico.items(): 33 | if 'chumpy' in str(type(val)): 34 | output_dict[key] = np.array(val) 35 | else: 36 | output_dict[key] = val 37 | return output_dict 38 | 39 | 40 | def load_and_remove_chumpy_dep(path): 41 | with open(path, 'rb') as pkl_file: 42 | import warnings 43 | warnings.filterwarnings("ignore", category=DeprecationWarning) 44 | data = pickle.load(pkl_file, encoding="latin1") 45 | 46 | data = remove_chumpy_dep(data) 47 | return data 48 | 49 | 50 | def load_npz_into_dict(path): 51 | data = {key: val for key, val in np.load(smplh_fn).items()} 52 | data = remove_chumpy_dep(data) 53 | return data 54 | 55 | 56 | def load_and_clean_data(path): 57 | ext = os.path.splitext(path)[-1] 58 | if ext == ".npz": 59 | data = load_npz_into_dict(path) 60 | elif ext == ".pkl": 61 | data = load_and_remove_chumpy_dep(path) 62 | else: 63 | raise TypeError("The format should be pkl or npz") 64 | return data 65 | 66 | 67 | def merge_models(smplh_fn, mano_left_fn, mano_right_fn, 68 | output_folder='output'): 69 | 70 | body_data = load_and_clean_data(smplh_fn) 71 | lhand_data = load_and_clean_data(mano_left_fn) 72 | rhand_data = load_and_clean_data(mano_right_fn) 73 | 74 | modelname = osp.split(smplh_fn)[1] 75 | parent_folder = osp.split(osp.split(smplh_fn)[0])[1] 76 | if "female" in parent_folder + "_" + modelname.lower(): 77 | out_fn = "SMPLH_FEMALE.npz" 78 | elif "male" in parent_folder + "_" + modelname.lower(): 79 | out_fn = "SMPLH_MALE.npz" 80 | elif "neutral" in parent_folder + "_" + modelname.lower(): 81 | out_fn = "SMPLH_NEUTRAL.npz" 82 | else: 83 | out_fn = modelname 84 | 85 | output_data = body_data.copy() 86 | output_data['hands_componentsl'] = lhand_data['hands_components'] 87 | output_data['hands_componentsr'] = rhand_data['hands_components'] 88 | 89 | output_data['hands_coeffsl'] = lhand_data['hands_coeffs'] 90 | output_data['hands_coeffsr'] = rhand_data['hands_coeffs'] 91 | 92 | output_data['hands_meanl'] = lhand_data['hands_mean'] 93 | output_data['hands_meanr'] = rhand_data['hands_mean'] 94 | 95 | # Just in case 96 | output_data = remove_chumpy_dep(output_data) 97 | 98 | out_path = osp.join(output_folder, out_fn) 99 | print('Saving to {}'.format(out_path)) 100 | 101 | # np.savez(out_path, output_data) 102 | np.savez_compressed(out_path, **output_data) 103 | # with open(out_path, 'wb') as output_file: 104 | # pickle.dump(output_data, output_file) 105 | 106 | 107 | if __name__ == '__main__': 108 | parser = argparse.ArgumentParser() 109 | parser.add_argument('--smplh-fn', dest='smplh_fn', required=True, 110 | type=str, help='The path to the SMPLH model') 111 | parser.add_argument('--mano-left-fn', dest='mano_left_fn', required=True, 112 | type=str, help='The path to the left hand MANO model') 113 | parser.add_argument('--mano-right-fn', dest='mano_right_fn', required=True, 114 | type=str, help='The path to the right hand MANO model') 115 | parser.add_argument('--output-folder', dest='output_folder', 116 | required=True, type=str, 117 | help='The path to the output folder') 118 | 119 | args = parser.parse_args() 120 | 121 | smplh_fn = args.smplh_fn 122 | mano_left_fn = args.mano_left_fn 123 | mano_right_fn = args.mano_right_fn 124 | output_folder = args.output_folder 125 | 126 | if not osp.exists(output_folder): 127 | print('Creating directory: {}'.format(output_folder)) 128 | os.makedirs(output_folder) 129 | 130 | merge_models(smplh_fn, mano_left_fn, mano_right_fn, output_folder) 131 | -------------------------------------------------------------------------------- /mGPT/metrics/mm.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torchmetrics import Metric 6 | from torchmetrics.functional import pairwise_euclidean_distance 7 | from .utils import * 8 | import os 9 | from mGPT.config import instantiate_from_config 10 | 11 | class MMMetrics(Metric): 12 | full_state_update = True 13 | 14 | def __init__(self, cfg, dataname='humanml3d', mm_num_times=10, dist_sync_on_step=True, **kwargs): 15 | super().__init__(dist_sync_on_step=dist_sync_on_step) 16 | 17 | self.name = "MultiModality scores" 18 | self.cfg = cfg 19 | self.dataname = dataname 20 | self.mm_num_times = mm_num_times 21 | 22 | self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum") 23 | self.add_state("count_seq", 24 | default=torch.tensor(0), 25 | dist_reduce_fx="sum") 26 | 27 | self.metrics = ["MultiModality"] 28 | self.add_state("MultiModality", 29 | default=torch.tensor(0.), 30 | dist_reduce_fx="sum") 31 | 32 | # chached batches 33 | self.add_state("mm_motion_embeddings", default=[], dist_reduce_fx=None) 34 | 35 | # T2M Evaluator 36 | self._get_t2m_evaluator(cfg) 37 | 38 | def _get_t2m_evaluator(self, cfg): 39 | """ 40 | load T2M text encoder and motion encoder for evaluating 41 | """ 42 | # init module 43 | self.t2m_textencoder = instantiate_from_config(cfg.METRIC.TM2T.t2m_textencoder) 44 | self.t2m_moveencoder = instantiate_from_config(cfg.METRIC.TM2T.t2m_moveencoder) 45 | self.t2m_motionencoder = instantiate_from_config(cfg.METRIC.TM2T.t2m_motionencoder) 46 | 47 | # load pretrianed 48 | if self.dataname == "kit": 49 | dataname = "kit" 50 | else: 51 | dataname = "t2m" 52 | t2m_checkpoint = torch.load(os.path.join( 53 | cfg.METRIC.TM2T.t2m_path, dataname, 54 | "text_mot_match/model/finest.tar"), 55 | map_location="cpu") 56 | 57 | self.t2m_textencoder.load_state_dict(t2m_checkpoint["text_encoder"]) 58 | self.t2m_moveencoder.load_state_dict( 59 | t2m_checkpoint["movement_encoder"]) 60 | self.t2m_motionencoder.load_state_dict( 61 | t2m_checkpoint["motion_encoder"]) 62 | 63 | # freeze params 64 | self.t2m_textencoder.eval() 65 | self.t2m_moveencoder.eval() 66 | self.t2m_motionencoder.eval() 67 | for p in self.t2m_textencoder.parameters(): 68 | p.requires_grad = False 69 | for p in self.t2m_moveencoder.parameters(): 70 | p.requires_grad = False 71 | for p in self.t2m_motionencoder.parameters(): 72 | p.requires_grad = False 73 | 74 | def compute(self, sanity_flag): 75 | count = self.count.item() 76 | count_seq = self.count_seq.item() 77 | 78 | # init metrics 79 | metrics = {metric: getattr(self, metric) for metric in self.metrics} 80 | 81 | # if in sanity check stage then jump 82 | if sanity_flag: 83 | return metrics 84 | 85 | # cat all embeddings 86 | all_mm_motions = torch.cat(self.mm_motion_embeddings, 87 | axis=0).cpu().numpy() 88 | metrics['MultiModality'] = calculate_multimodality_np( 89 | all_mm_motions, self.mm_num_times) 90 | 91 | # Reset 92 | self.reset() 93 | 94 | return {**metrics} 95 | 96 | def update( 97 | self, 98 | feats_rst: Tensor, 99 | lengths_rst: List[int], 100 | ): 101 | self.count += sum(lengths_rst) 102 | self.count_seq += len(lengths_rst) 103 | 104 | align_idx = np.argsort(lengths_rst)[::-1].copy() 105 | feats_rst = feats_rst[align_idx] 106 | lengths_rst = np.array(lengths_rst)[align_idx] 107 | recmotion_embeddings = self.get_motion_embeddings( 108 | feats_rst, lengths_rst) 109 | cache = [0] * len(lengths_rst) 110 | for i in range(len(lengths_rst)): 111 | cache[align_idx[i]] = recmotion_embeddings[i:i + 1] 112 | 113 | mm_motion_embeddings = torch.cat(cache, axis=0).unsqueeze(0) 114 | # self.mm_motion_embeddings.extend(cache) 115 | # print(mm_motion_embeddings.shape) 116 | # # store all mm motion embeddings 117 | self.mm_motion_embeddings.append(mm_motion_embeddings) 118 | 119 | def get_motion_embeddings(self, feats: Tensor, lengths: List[int]): 120 | m_lens = torch.tensor(lengths) 121 | m_lens = torch.div(m_lens, 122 | self.cfg.DATASET.HUMANML3D.UNIT_LEN, 123 | rounding_mode="floor") 124 | 125 | mov = self.t2m_moveencoder(feats[..., :-4]).detach() 126 | emb = self.t2m_motionencoder(mov, m_lens) 127 | 128 | # [bs, nlatent*ndim] <= [bs, nlatent, ndim] 129 | return torch.flatten(emb, start_dim=1).detach() 130 | -------------------------------------------------------------------------------- /demos/inbetween.txt: -------------------------------------------------------------------------------- 1 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/012657.npy 2 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M003137.npy 3 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M011458.npy 4 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/009410.npy 5 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M001298.npy 6 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M010526.npy 7 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M001632.npy 8 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M009521.npy 9 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/000086.npy 10 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M005152.npy 11 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M008910.npy 12 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M010563.npy 13 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/000307.npy 14 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/008588.npy 15 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M011731.npy 16 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/000921.npy 17 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M004975.npy 18 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/010698.npy 19 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/P_M008159.npy 20 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/005413.npy 21 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/004867.npy 22 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M002246.npy 23 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M010043.npy 24 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M014536.npy 25 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/J_M002982.npy 26 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M011785.npy 27 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/002093.npy 28 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M013476.npy 29 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M000343.npy 30 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M012561.npy 31 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M000472.npy 32 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M006819.npy 33 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/000363.npy 34 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M014253.npy 35 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/013112.npy 36 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M000379.npy 37 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/008567.npy 38 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/013514.npy 39 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M003365.npy 40 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/002550.npy 41 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/011095.npy 42 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/R_000889.npy 43 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/006236.npy 44 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/009031.npy 45 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/001676.npy 46 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M013314.npy 47 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M012611.npy 48 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/M005468.npy 49 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/008357.npy 50 | Complete the masked motion: #datasets/humanml3d/new_joint_vecs/V_M007878.npy 51 | -------------------------------------------------------------------------------- /mGPT/data/transforms/rots2rfeats/globvelandy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2020 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import Optional 18 | 19 | import torch 20 | from torch import Tensor 21 | from einops import rearrange 22 | 23 | from mGPT.utils.easyconvert import rep_to_rep, nfeats_of, to_matrix 24 | import mGPT.utils.geometry_tools as geometry_tools 25 | 26 | from .base import Rots2Rfeats 27 | 28 | 29 | class Globalvelandy(Rots2Rfeats): 30 | def __init__(self, 31 | path: Optional[str] = None, 32 | normalization: bool = False, 33 | pose_rep: str = "rot6d", 34 | canonicalize: bool = False, 35 | offset: bool = True, 36 | **kwargs) -> None: 37 | super().__init__(path=path, normalization=normalization) 38 | 39 | self.canonicalize = canonicalize 40 | self.pose_rep = pose_rep 41 | self.nfeats = nfeats_of(pose_rep) 42 | self.offset = offset 43 | 44 | def forward(self, data, data_rep='matrix', first_frame=None) -> Tensor: 45 | 46 | poses, trans = data.rots, data.trans 47 | 48 | # extract the root gravity axis 49 | # for smpl it is the last coordinate 50 | root_y = trans[..., 2] 51 | trajectory = trans[..., [0, 1]] 52 | 53 | # Compute the difference of trajectory 54 | vel_trajectory = torch.diff(trajectory, dim=-2) 55 | 56 | # 0 for the first one => keep the dimentionality 57 | if first_frame is None: 58 | first_frame = 0 * vel_trajectory[..., [0], :] 59 | 60 | vel_trajectory = torch.cat((first_frame, vel_trajectory), dim=-2) 61 | 62 | # first normalize the data 63 | if self.canonicalize: 64 | 65 | matrix_poses = rep_to_rep(data_rep, 'matrix', poses) 66 | global_orient = matrix_poses[..., 0, :, :] 67 | 68 | # remove the rotation 69 | rot2d = rep_to_rep(data_rep, 'rotvec', poses[0, 0, ...]) 70 | 71 | # Remove the fist rotation along the vertical axis 72 | rot2d[..., :2] = 0 73 | 74 | if self.offset: 75 | # add a bit more rotation 76 | rot2d[..., 2] += torch.pi / 2 77 | 78 | rot2d = rep_to_rep('rotvec', 'matrix', rot2d) 79 | 80 | # turn with the same amount all the rotations 81 | global_orient = torch.einsum("...kj,...kl->...jl", rot2d, 82 | global_orient) 83 | 84 | matrix_poses = torch.cat( 85 | (global_orient[..., None, :, :], matrix_poses[..., 1:, :, :]), 86 | dim=-3) 87 | 88 | poses = rep_to_rep('matrix', data_rep, matrix_poses) 89 | 90 | # Turn the trajectory as well 91 | vel_trajectory = torch.einsum("...kj,...lk->...lj", 92 | rot2d[..., :2, :2], vel_trajectory) 93 | 94 | poses = rep_to_rep(data_rep, self.pose_rep, poses) 95 | features = torch.cat( 96 | (root_y[..., None], vel_trajectory, 97 | rearrange(poses, "... joints rot -> ... (joints rot)")), 98 | dim=-1) 99 | features = self.normalize(features) 100 | 101 | return features 102 | 103 | def extract(self, features): 104 | root_y = features[..., 0] 105 | vel_trajectory = features[..., 1:3] 106 | poses_features = features[..., 3:] 107 | poses = rearrange(poses_features, 108 | "... (joints rot) -> ... joints rot", 109 | rot=self.nfeats) 110 | return root_y, vel_trajectory, poses 111 | 112 | def inverse(self, features, last_frame=None): 113 | features = self.unnormalize(features) 114 | root_y, vel_trajectory, poses = self.extract(features) 115 | 116 | # integrate the trajectory 117 | trajectory = torch.cumsum(vel_trajectory, dim=-2) 118 | if last_frame is None: 119 | pass 120 | # First frame should be 0, but if infered it is better to ensure it 121 | trajectory = trajectory - trajectory[..., [0], :] 122 | 123 | # Get back the translation 124 | trans = torch.cat([trajectory, root_y[..., None]], dim=-1) 125 | matrix_poses = rep_to_rep(self.pose_rep, 'matrix', poses) 126 | 127 | from ..smpl import RotTransDatastruct 128 | return RotTransDatastruct(rots=matrix_poses, trans=trans) 129 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | import pytorch_lightning as pl 5 | import torch 6 | from pathlib import Path 7 | from rich import get_console 8 | from rich.table import Table 9 | from omegaconf import OmegaConf 10 | from mGPT.callback import build_callbacks 11 | from mGPT.config import parse_args 12 | from mGPT.data.build_data import build_data 13 | from mGPT.models.build_model import build_model 14 | from mGPT.utils.logger import create_logger 15 | from mGPT.utils.load_checkpoint import load_pretrained, load_pretrained_vae 16 | 17 | 18 | def print_table(title, metrics, logger=None): 19 | table = Table(title=title) 20 | 21 | table.add_column("Metrics", style="cyan", no_wrap=True) 22 | table.add_column("Value", style="magenta") 23 | 24 | for key, value in metrics.items(): 25 | table.add_row(key, str(value)) 26 | 27 | console = get_console() 28 | console.print(table, justify="center") 29 | 30 | logger.info(metrics) if logger else None 31 | 32 | 33 | def get_metric_statistics(values, replication_times): 34 | mean = np.mean(values, axis=0) 35 | std = np.std(values, axis=0) 36 | conf_interval = 1.96 * std / np.sqrt(replication_times) 37 | return mean, conf_interval 38 | 39 | 40 | def main(): 41 | # parse options 42 | cfg = parse_args(phase="test") # parse config file 43 | cfg.FOLDER = cfg.TEST.FOLDER 44 | 45 | # Logger 46 | logger = create_logger(cfg, phase="test") 47 | logger.info(OmegaConf.to_yaml(cfg)) 48 | 49 | # Output dir 50 | model_name = cfg.model.target.split('.')[-2].lower() 51 | output_dir = Path( 52 | os.path.join(cfg.FOLDER, model_name, cfg.NAME, "samples_" + cfg.TIME)) 53 | if cfg.TEST.SAVE_PREDICTIONS: 54 | output_dir.mkdir(parents=True, exist_ok=True) 55 | logger.info(f"Saving predictions to {str(output_dir)}") 56 | 57 | # Seed 58 | pl.seed_everything(cfg.SEED_VALUE) 59 | 60 | # Environment Variables 61 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 62 | 63 | # Callbacks 64 | callbacks = build_callbacks(cfg, logger=logger, phase="test") 65 | logger.info("Callbacks initialized") 66 | 67 | # Dataset 68 | datamodule = build_data(cfg) 69 | logger.info("datasets module {} initialized".format("".join( 70 | cfg.DATASET.target.split('.')[-2]))) 71 | 72 | # Model 73 | model = build_model(cfg, datamodule) 74 | logger.info("model {} loaded".format(cfg.model.target)) 75 | 76 | # Lightning Trainer 77 | trainer = pl.Trainer( 78 | benchmark=False, 79 | max_epochs=cfg.TRAIN.END_EPOCH, 80 | accelerator=cfg.ACCELERATOR, 81 | devices=list(range(len(cfg.DEVICE))), 82 | default_root_dir=cfg.FOLDER_EXP, 83 | reload_dataloaders_every_n_epochs=1, 84 | deterministic=False, 85 | detect_anomaly=False, 86 | enable_progress_bar=True, 87 | logger=None, 88 | callbacks=callbacks, 89 | ) 90 | 91 | # Strict load vae model 92 | if cfg.TRAIN.PRETRAINED_VAE: 93 | load_pretrained_vae(cfg, model, logger) 94 | 95 | # loading state dict 96 | if cfg.TEST.CHECKPOINTS: 97 | load_pretrained(cfg, model, logger, phase="test") 98 | else: 99 | logger.warning("No checkpoints provided!!!") 100 | 101 | # Calculate metrics 102 | all_metrics = {} 103 | replication_times = cfg.TEST.REPLICATION_TIMES 104 | 105 | for i in range(replication_times): 106 | metrics_type = ", ".join(cfg.METRIC.TYPE) 107 | logger.info(f"Evaluating {metrics_type} - Replication {i}") 108 | metrics = trainer.test(model, datamodule=datamodule)[0] 109 | if "TM2TMetrics" in metrics_type and cfg.model.params.task == "t2m" and cfg.model.params.stage != 'vae': 110 | # mm meteics 111 | logger.info(f"Evaluating MultiModality - Replication {i}") 112 | datamodule.mm_mode(True) 113 | mm_metrics = trainer.test(model, datamodule=datamodule)[0] 114 | # metrics.update(mm_metrics) 115 | metrics.update(mm_metrics) 116 | datamodule.mm_mode(False) 117 | for key, item in metrics.items(): 118 | if key not in all_metrics: 119 | all_metrics[key] = [item] 120 | else: 121 | all_metrics[key] += [item] 122 | 123 | all_metrics_new = {} 124 | 125 | for key, item in all_metrics.items(): 126 | mean, conf_interval = get_metric_statistics(np.array(item), 127 | replication_times) 128 | all_metrics_new[key + "/mean"] = mean 129 | all_metrics_new[key + "/conf_interval"] = conf_interval 130 | 131 | print_table(f"Mean Metrics", all_metrics_new, logger=logger) 132 | all_metrics_new.update(all_metrics) 133 | 134 | # Save metrics to file 135 | metric_file = output_dir.parent / f"metrics_{cfg.TIME}.json" 136 | with open(metric_file, "w", encoding="utf-8") as f: 137 | json.dump(all_metrics_new, f, indent=4) 138 | logger.info(f"Testing done, the metrics are saved to {str(metric_file)}") 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /mGPT/data/webui.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from os.path import join as pjoin 4 | from .humanml.scripts.motion_process import (process_file, recover_from_ric) 5 | from . import BASEDataModule 6 | from .humanml import Text2MotionDatasetEval, Text2MotionDataset, Text2MotionDatasetCB, MotionDataset, MotionDatasetVQ, Text2MotionDatasetToken, Text2MotionDatasetM2T 7 | from .utils import humanml3d_collate 8 | 9 | 10 | class HumanML3DDataModule(BASEDataModule): 11 | def __init__(self, cfg, **kwargs): 12 | 13 | super().__init__(collate_fn=humanml3d_collate) 14 | self.cfg = cfg 15 | self.save_hyperparameters(logger=False) 16 | 17 | # Basic info of the dataset 18 | cfg.DATASET.JOINT_TYPE = 'humanml3d' 19 | self.name = "humanml3d" 20 | self.njoints = 22 21 | 22 | # Path to the dataset 23 | data_root = cfg.DATASET.HUMANML3D.ROOT 24 | self.hparams.data_root = data_root 25 | self.hparams.text_dir = pjoin(data_root, "texts") 26 | self.hparams.motion_dir = pjoin(data_root, 'new_joint_vecs') 27 | 28 | # Mean and std of the dataset 29 | self.hparams.mean = np.load(pjoin('assets/meta', "mean.npy")) 30 | self.hparams.std = np.load(pjoin('assets/meta', "std.npy")) 31 | 32 | # Mean and std for fair evaluation 33 | self.hparams.mean_eval = np.load(pjoin('assets/meta', "mean_eval.npy")) 34 | self.hparams.std_eval = np.load(pjoin('assets/meta', "std_eval.npy")) 35 | 36 | # Length of the dataset 37 | self.hparams.max_motion_length = cfg.DATASET.HUMANML3D.MAX_MOTION_LEN 38 | self.hparams.min_motion_length = cfg.DATASET.HUMANML3D.MIN_MOTION_LEN 39 | self.hparams.max_text_len = cfg.DATASET.HUMANML3D.MAX_TEXT_LEN 40 | self.hparams.unit_length = cfg.DATASET.HUMANML3D.UNIT_LEN 41 | 42 | # Additional parameters 43 | self.hparams.debug = cfg.DEBUG 44 | self.hparams.stage = cfg.TRAIN.STAGE 45 | 46 | # Dataset switch 47 | self.DatasetEval = Text2MotionDatasetEval 48 | 49 | if cfg.TRAIN.STAGE == "vae": 50 | if cfg.model.params.motion_vae.target.split('.')[-1].lower() == "vqvae": 51 | self.hparams.win_size = 64 52 | self.Dataset = MotionDatasetVQ 53 | else: 54 | self.Dataset = MotionDataset 55 | elif 'lm' in cfg.TRAIN.STAGE: 56 | self.hparams.code_path = cfg.DATASET.CODE_PATH 57 | self.hparams.task_path = cfg.DATASET.TASK_PATH 58 | self.hparams.std_text = cfg.DATASET.HUMANML3D.STD_TEXT 59 | self.Dataset = Text2MotionDatasetCB 60 | elif cfg.TRAIN.STAGE == "token": 61 | self.Dataset = Text2MotionDatasetToken 62 | self.DatasetEval = Text2MotionDatasetToken 63 | elif cfg.TRAIN.STAGE == "m2t": 64 | self.Dataset = Text2MotionDatasetM2T 65 | self.DatasetEval = Text2MotionDatasetM2T 66 | else: 67 | self.Dataset = Text2MotionDataset 68 | 69 | # Get additional info of the dataset 70 | self.nfeats = 263 71 | cfg.DATASET.NFEATS = self.nfeats 72 | 73 | 74 | def feats2joints(self, features): 75 | mean = torch.tensor(self.hparams.mean).to(features) 76 | std = torch.tensor(self.hparams.std).to(features) 77 | features = features * std + mean 78 | return recover_from_ric(features, self.njoints) 79 | 80 | def joints2feats(self, features): 81 | features = process_file(features, self.njoints)[0] 82 | return features 83 | 84 | def normalize(self, features): 85 | mean = torch.tensor(self.hparams.mean).to(features) 86 | std = torch.tensor(self.hparams.std).to(features) 87 | features = (features - mean) / std 88 | return features 89 | 90 | def denormalize(self, features): 91 | mean = torch.tensor(self.hparams.mean).to(features) 92 | std = torch.tensor(self.hparams.std).to(features) 93 | features = features * std + mean 94 | return features 95 | 96 | def renorm4t2m(self, features): 97 | # renorm to t2m norms for using t2m evaluators 98 | ori_mean = torch.tensor(self.hparams.mean).to(features) 99 | ori_std = torch.tensor(self.hparams.std).to(features) 100 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 101 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 102 | features = features * ori_std + ori_mean 103 | features = (features - eval_mean) / eval_std 104 | return features 105 | 106 | def mm_mode(self, mm_on=True): 107 | if mm_on: 108 | self.is_mm = True 109 | self.name_list = self.test_dataset.name_list 110 | self.mm_list = np.random.choice(self.name_list, 111 | self.cfg.METRIC.MM_NUM_SAMPLES, 112 | replace=False) 113 | self.test_dataset.name_list = self.mm_list 114 | else: 115 | self.is_mm = False 116 | self.test_dataset.name_list = self.name_list 117 | -------------------------------------------------------------------------------- /mGPT/render/rendermotion.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import imageio 3 | import os 4 | import argparse 5 | from tqdm import tqdm 6 | from .renderer import get_renderer 7 | 8 | 9 | def get_rotation(theta=np.pi / 3): 10 | import mGPT.utils.rotation_conversions as geometry 11 | import torch 12 | axis = torch.tensor([0, 1, 0], dtype=torch.float) 13 | axisangle = theta * axis 14 | matrix = geometry.axis_angle_to_matrix(axisangle) 15 | return matrix.numpy() 16 | 17 | 18 | def render_video(meshes, 19 | key, 20 | action, 21 | renderer, 22 | savepath, 23 | backgrounds, 24 | cam_pose, 25 | cams=(0.75, 0.75, 0, 0.10), 26 | color=[0.11, 0.53, 0.8]): 27 | # cams=(0.75, 0.75, 0, 0.10), color=[165.0/255,112/255,140/255]): 28 | # center the first frame 29 | if key not in ["real", "ntf", "side"]: 30 | w = int(key) / 6.0 31 | # purpole to green 32 | # color = w*np.array([0.9,102/255,120/255]) + (1-w)*np.array([0.11, 0.9, 0.11]) 33 | # color = (1-w)*np.array([165.0/255,112/255,140/255]) + w*np.array([0.11, 0.8, 0.11]) 34 | color = (1 - w) * np.array([0.75, 0.13, 0.7]) + w * np.array( 35 | [0.12, 0.7, 0.14]) 36 | 37 | meshes = meshes - meshes[0].mean(axis=0) 38 | imgs = [] 39 | idx = 0 40 | # for mesh in meshes: 41 | for mesh in tqdm(meshes, desc=f"Visualize {key}, action {action}"): 42 | # file_name = '3dpw_rot-90_glob_trimesh.ply' mesh_filename=file_name, 43 | # prepare background 44 | if len(backgrounds.shape) == 3: 45 | background = backgrounds 46 | cam = cams 47 | elif len(backgrounds.shape) == 4: 48 | background = backgrounds[idx] 49 | cam = cams[idx] 50 | idx += 1 51 | # prepare cams 52 | img = renderer.render(background, 53 | mesh, 54 | cam, 55 | color=color, 56 | cam_pose=cam_pose) 57 | imgs.append(img) 58 | # show(img) 59 | 60 | imgs = np.array(imgs) 61 | # masks = ~(imgs/255. > 0.96).all(-1) 62 | # coords = np.argwhere(masks.sum(axis=0)) 63 | # y1, x1 = coords.min(axis=0) 64 | # y2, x2 = coords.max(axis=0) 65 | # writer = imageio.get_writer(savepath, fps=30) 66 | # for cimg in imgs[:, y1:y2, x1:x2]: 67 | # writer.append_data(cimg) 68 | # writer.close() 69 | 70 | # from mld.utils.uicap_utils import write_rgba_seqs 71 | # write_rgba_seqs(imgs, savepath) 72 | 73 | writer = imageio.get_writer(savepath, fps=30) 74 | for cimg in imgs: 75 | writer.append_data(cimg) 76 | writer.close() 77 | 78 | 79 | def main(): 80 | parser = argparse.ArgumentParser() 81 | parser.add_argument("filename") 82 | opt = parser.parse_args() 83 | filename = opt.filename 84 | savefolder = os.path.splitext(filename)[0] 85 | os.makedirs(savefolder, exist_ok=True) 86 | 87 | output = np.load(filename) 88 | 89 | if output.shape[0] == 3: 90 | visualization, generation, reconstruction = output 91 | output = { 92 | "visualization": visualization, 93 | "generation": generation, 94 | "reconstruction": reconstruction 95 | } 96 | else: 97 | # output = {f"generation_{key}": output[key] for key in range(2)} # len(output))} 98 | # output = {f"generation_{key}": output[key] for key in range(len(output))} 99 | output = { 100 | f"generation_{key}": output[key] 101 | for key in range(len(output)) 102 | } 103 | 104 | width = 1024 105 | height = 1024 106 | 107 | background = np.zeros((height, width, 3)) 108 | renderer = get_renderer(width, height) 109 | 110 | # if duration mode, put back durations 111 | if output["generation_3"].shape[-1] == 100: 112 | output["generation_0"] = output["generation_0"][:, :, :, :40] 113 | output["generation_1"] = output["generation_1"][:, :, :, :60] 114 | output["generation_2"] = output["generation_2"][:, :, :, :80] 115 | output["generation_3"] = output["generation_3"][:, :, :, :100] 116 | elif output["generation_3"].shape[-1] == 160: 117 | print("160 mode") 118 | output["generation_0"] = output["generation_0"][:, :, :, :100] 119 | output["generation_1"] = output["generation_1"][:, :, :, :120] 120 | output["generation_2"] = output["generation_2"][:, :, :, :140] 121 | output["generation_3"] = output["generation_3"][:, :, :, :160] 122 | 123 | # if str(action) == str(1) and str(key) == "generation_4": 124 | for key in output: 125 | vidmeshes = output[key] 126 | for action in range(len(vidmeshes)): 127 | meshes = vidmeshes[action].transpose(2, 0, 1) 128 | path = os.path.join(savefolder, 129 | "action{}_{}.mp4".format(action, key)) 130 | render_video(meshes, key, action, renderer, path, background) 131 | 132 | 133 | if __name__ == "__main__": 134 | main() 135 | -------------------------------------------------------------------------------- /render.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | import sys 5 | import natsort 6 | from pathlib import Path 7 | from argparse import ArgumentParser 8 | 9 | try: 10 | import bpy 11 | 12 | sys.path.append(os.path.dirname(bpy.data.filepath)) 13 | 14 | # local packages 15 | sys.path.append(os.path.expanduser("~/.local/lib/python3.9/site-packages")) 16 | except ImportError: 17 | raise ImportError( 18 | "Blender is not properly installed or not launch properly. See README.md to have instruction on how to install and use blender." 19 | ) 20 | 21 | 22 | # Monkey patch argparse such that 23 | # blender / python / hydra parsing works 24 | def parse_args(self, args=None, namespace=None): 25 | if args is not None: 26 | return self.parse_args_bak(args=args, namespace=namespace) 27 | try: 28 | idx = sys.argv.index("--") 29 | args = sys.argv[idx + 1:] # the list after '--' 30 | except ValueError as e: # '--' not in the list: 31 | args = [] 32 | return self.parse_args_bak(args=args, namespace=namespace) 33 | 34 | 35 | setattr(ArgumentParser, 'parse_args_bak', ArgumentParser.parse_args) 36 | setattr(ArgumentParser, 'parse_args', parse_args) 37 | 38 | from mGPT.config import parse_args 39 | 40 | 41 | def render_cli() -> None: 42 | # parse options 43 | cfg = parse_args(phase="render") # parse config file 44 | cfg.FOLDER = cfg.RENDER.FOLDER 45 | 46 | if cfg.RENDER.INPUT_MODE.lower() == "npy": 47 | output_dir = Path(os.path.dirname(cfg.RENDER.NPY)) 48 | paths = [cfg.RENDER.NPY] 49 | elif cfg.RENDER.INPUT_MODE.lower() == "dir": 50 | output_dir = Path(cfg.RENDER.DIR) 51 | paths = [] 52 | file_list = natsort.natsorted(os.listdir(cfg.RENDER.DIR)) 53 | begin_id = random.randrange(0, len(file_list)) 54 | file_list = file_list[begin_id:] + file_list[:begin_id] 55 | 56 | # render mesh npy first 57 | for item in file_list: 58 | if item.endswith("_mesh.npy"): 59 | paths.append(os.path.join(cfg.RENDER.DIR, item)) 60 | 61 | # then render joint npy 62 | for item in file_list: 63 | if item.endswith(".npy") and not item.endswith("_mesh.npy"): 64 | paths.append(os.path.join(cfg.RENDER.DIR, item)) 65 | 66 | print(f"begin to render for {paths[0]}") 67 | 68 | import numpy as np 69 | 70 | from mGPT.render.blender import render 71 | from mGPT.render.video import Video 72 | 73 | init = True 74 | for path in paths: 75 | # check existed mp4 or under rendering 76 | if cfg.RENDER.MODE == "video": 77 | if os.path.exists(path.replace(".npy", ".mp4")) or os.path.exists( 78 | path.replace(".npy", "_frames")): 79 | print(f"npy is rendered or under rendering {path}") 80 | continue 81 | else: 82 | # check existed png 83 | if os.path.exists(path.replace(".npy", ".png")): 84 | print(f"npy is rendered or under rendering {path}") 85 | continue 86 | 87 | if cfg.RENDER.MODE == "video": 88 | frames_folder = os.path.join( 89 | output_dir, 90 | path.replace(".npy", "_frames").split('/')[-1]) 91 | os.makedirs(frames_folder, exist_ok=True) 92 | else: 93 | frames_folder = os.path.join( 94 | output_dir, 95 | path.replace(".npy", ".png").split('/')[-1]) 96 | 97 | try: 98 | data = np.load(path) 99 | if data.shape[0] == 1: 100 | data = data[0] 101 | except FileNotFoundError: 102 | print(f"{path} not found") 103 | continue 104 | 105 | if cfg.RENDER.MODE == "video": 106 | frames_folder = os.path.join( 107 | output_dir, 108 | path.replace(".npy", "_frames").split("/")[-1]) 109 | else: 110 | frames_folder = os.path.join( 111 | output_dir, 112 | path.replace(".npy", ".png").split("/")[-1]) 113 | 114 | out = render( 115 | data, 116 | frames_folder, 117 | canonicalize=cfg.RENDER.CANONICALIZE, 118 | exact_frame=cfg.RENDER.EXACT_FRAME, 119 | num=cfg.RENDER.NUM, 120 | mode=cfg.RENDER.MODE, 121 | model_path=cfg.RENDER.MODEL_PATH, 122 | faces_path=cfg.RENDER.FACES_PATH, 123 | downsample=cfg.RENDER.DOWNSAMPLE, 124 | always_on_floor=cfg.RENDER.ALWAYS_ON_FLOOR, 125 | oldrender=cfg.RENDER.OLDRENDER, 126 | res=cfg.RENDER.RES, 127 | init=init, 128 | gt=cfg.RENDER.GT, 129 | accelerator=cfg.ACCELERATOR, 130 | device=cfg.DEVICE, 131 | ) 132 | 133 | init = False 134 | 135 | if cfg.RENDER.MODE == "video": 136 | shutil.copytree(frames_folder, frames_folder+'_img') 137 | if cfg.RENDER.DOWNSAMPLE: 138 | video = Video(frames_folder, fps=cfg.RENDER.FPS) 139 | else: 140 | video = Video(frames_folder, fps=cfg.RENDER.FPS) 141 | 142 | vid_path = frames_folder.replace("_frames", ".mp4") 143 | video.save(out_path=vid_path) 144 | shutil.rmtree(frames_folder) 145 | print(f"remove tmp fig folder and save video in {vid_path}") 146 | 147 | else: 148 | print(f"Frame generated at: {out}") 149 | 150 | 151 | if __name__ == "__main__": 152 | render_cli() 153 | --------------------------------------------------------------------------------