├── .gitignore ├── LICENSE ├── OpenTMA ├── .gitignore ├── README.md ├── assets │ └── logo.png ├── configs │ ├── assets.yaml │ ├── base.yaml │ ├── configs_temos │ │ ├── H3D-TMR.yaml │ │ ├── MotionX-TMR.yaml │ │ ├── UniMocap-TMR.yaml │ │ └── infonce.yaml │ └── modules_temos │ │ ├── motiondecoder.yaml │ │ ├── motionencoder.yaml │ │ └── text_encoder.yaml ├── requirements.txt ├── retrieval.py ├── retrieval.sh ├── test.py ├── test_temos.py ├── tma │ ├── __init__.py │ ├── callback │ │ ├── __init__.py │ │ └── progress.py │ ├── config.py │ ├── data │ │ ├── HumanML3D.py │ │ ├── Humanact12.py │ │ ├── Kit.py │ │ ├── MotionX.py │ │ ├── Uestc.py │ │ ├── UniMocap.py │ │ ├── __init__.py │ │ ├── a2m │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── humanact12poses.py │ │ │ ├── tools.py │ │ │ ├── uestc.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── misc.py │ │ │ │ ├── rotation_conversions.py │ │ │ │ └── tensors.py │ │ ├── base.py │ │ ├── get_data.py │ │ ├── humanml │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── common │ │ │ │ ├── quaternion.py │ │ │ │ └── skeleton.py │ │ │ ├── data │ │ │ │ ├── __init__.py │ │ │ │ └── dataset.py │ │ │ ├── dataset.py │ │ │ ├── scripts │ │ │ │ └── motion_process.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── get_opt.py │ │ │ │ ├── metrics.py │ │ │ │ ├── paramUtil.py │ │ │ │ ├── plot_script.py │ │ │ │ ├── utils.py │ │ │ │ └── word_vectorizer.py │ │ ├── sampling │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── framerate.py │ │ │ └── frames.py │ │ └── utils.py │ ├── launch │ │ ├── __init__.py │ │ ├── prepare.py │ │ └── tools.py │ ├── models │ │ ├── __init__.py │ │ ├── architectures │ │ │ ├── __init__.py │ │ │ ├── actor_vae.py │ │ │ ├── fc.py │ │ │ ├── humanact12_gru.py │ │ │ ├── t2m_motionenc.py │ │ │ ├── t2m_textenc.py │ │ │ ├── temos │ │ │ │ ├── __init__.py │ │ │ │ ├── motiondecoder │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── actor.py │ │ │ │ │ └── gru.py │ │ │ │ ├── motionencoder │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── actor.py │ │ │ │ │ └── gru.py │ │ │ │ └── textencoder │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── distillbert.py │ │ │ │ │ └── distillbert_actor.py │ │ │ ├── tools │ │ │ │ ├── embeddings.py │ │ │ │ └── transformer_layers.py │ │ │ ├── vision_transformer.py │ │ │ └── vposert_vae.py │ │ ├── body_skeleton │ │ │ ├── __init__.py │ │ │ ├── paramUtil.py │ │ │ ├── quaternion.py │ │ │ └── skeleton.py │ │ ├── get_model.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── actor.py │ │ │ ├── infonce.py │ │ │ ├── kl.py │ │ │ ├── mld.py │ │ │ ├── temos.py │ │ │ ├── tmost.py │ │ │ ├── utils.py │ │ │ └── vqvae.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── compute.py │ │ │ ├── mm.py │ │ │ ├── retrieval_recall.py │ │ │ ├── tm2t.py │ │ │ ├── uncond.py │ │ │ └── utils.py │ │ ├── modeltype │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── smplx_layer.py │ │ │ └── temos.py │ │ ├── operator │ │ │ ├── __init__.py │ │ │ ├── adain.py │ │ │ ├── blocks.py │ │ │ ├── conv2d_gradfix.py │ │ │ ├── cross_attention.py │ │ │ ├── position_encoding.py │ │ │ ├── position_encoding_layer.py │ │ │ └── self_attention.py │ │ └── tools │ │ │ ├── __init__.py │ │ │ ├── hessian_penalty.py │ │ │ └── tools.py │ ├── tools │ │ ├── __init__.py │ │ ├── geometry.py │ │ ├── logging.py │ │ └── runid.py │ ├── transforms │ │ ├── __init__.py │ │ ├── base.py │ │ ├── feats2smpl.py │ │ ├── identity.py │ │ ├── joints2jfeats │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── rifke.py │ │ │ └── tools.py │ │ ├── joints2rots │ │ │ ├── config.py │ │ │ ├── customloss.py │ │ │ ├── prior.py │ │ │ └── smplify.py │ │ ├── rotation2xyz.py │ │ ├── rots2joints │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── smplh.py │ │ ├── rots2rfeats │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── smplvelp.py │ │ ├── smpl.py │ │ └── xyz.py │ └── utils │ │ ├── __init__.py │ │ ├── demo_utils.py │ │ ├── easyconvert.py │ │ ├── fixseed.py │ │ ├── geometry.py │ │ ├── joints.py │ │ ├── logger.py │ │ ├── misc.py │ │ ├── rotation_conversions.py │ │ ├── sample_utils.py │ │ ├── temos_utils.py │ │ └── tensors.py ├── tmp.py ├── train.py └── train.sh ├── README.md ├── assets ├── highlight.png ├── system.png └── tomato-logo.png └── src └── tomato_represenation ├── README.md ├── common ├── quaternion.py └── skeleton.py ├── dataset.py ├── motion_representation.py ├── paramUtil.py ├── plot_3d_global.py ├── plot_feature.py ├── raw_pose_processing.py └── smplx2joints.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | License for Non-commercial Scientific Research Purposes 2 | 3 | IDEA grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty free and limited license under IDEA’s copyright interests to reproduce, distribute, and create derivative works of the text, videos, codes solely for your non-commercial research purposes. 4 | 5 | Any other use, in particular any use for commercial, pornographic, military, or surveillance, purposes is prohibited. 6 | 7 | Text and visualization results are owned by International Digital Economy Academy (IDEA). 8 | 9 | We have used the AIST Dance Video Database (https://aistdancedb.ongaaccel.jp) for demonstration. -------------------------------------------------------------------------------- /OpenTMA/.gitignore: -------------------------------------------------------------------------------- 1 | experiments 2 | deps 3 | datasets 4 | .vector_cache 5 | *.pyc 6 | *.out -------------------------------------------------------------------------------- /OpenTMA/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/assets/logo.png -------------------------------------------------------------------------------- /OpenTMA/configs/assets.yaml: -------------------------------------------------------------------------------- 1 | FOLDER: './experiments' # Experiment files saving path 2 | 3 | TEST: 4 | FOLDER: './results' # Testing files saving path 5 | 6 | DATASET: 7 | SMPL_PATH: './deps/smpl' 8 | TRANSFORM_PATH: './deps/transforms/' 9 | WORD_VERTILIZER_PATH: './deps/glove/' 10 | UNIMOCAP: 11 | ROOT: './datasets/UniMocap' # KIT directory 12 | SPLIT_ROOT: './datasets/UniMocap' # KIT splits directory 13 | KIT: 14 | ROOT: './datasets/kit-ml' # KIT directory 15 | SPLIT_ROOT: './datasets/kit-ml' # KIT splits directory 16 | HUMANML3D: 17 | ROOT: './datasets/humanml3d' # HumanML3D directory 18 | SPLIT_ROOT: './datasets/humanml3d' # HumanML3D splits directory 19 | HUMANACT12: 20 | ROOT: ./datasets/HumanAct12Poses 21 | SPLIT_ROOT: ./datasets/HumanAct12Poses 22 | UESTC: 23 | ROOT: ./datasets/uestc 24 | SPLIT_ROOT: ./datasets/uestc 25 | AMASS: 26 | DB_ROOT: /apdcephfs/share_1227775/shingxchen/uicap/data/vibe_db 27 | MOTIONX: 28 | ROOT: ./datasets/Motion-X 29 | SPLIT_ROOT: ./datasets/Motion-X/split 30 | SEMANTIC_TEXT_ROOT: ./datasets/Motion-X/texts/semantic_texts 31 | FACE_TEXT_ROOT: ./datasets/Motion-X/texts/face_texts 32 | 33 | 34 | model: 35 | bert_path: './deps/distilbert-base-uncased' # bert model path for all text encoders 36 | clip_path: './deps/clip-vit-large-patch14' # bert model path for all text encoders 37 | t2m_path: './deps/t2m/' 38 | 39 | humanact12_rec_path: './deps/actionrecognition' 40 | uestc_rec_path: './deps/actionrecognition' 41 | 42 | RENDER: 43 | BLENDER_PATH: '/apdcephfs/share_1227775/mingzhenzhu/jiangbiao/libs/blender-2.93.2-linux-x64/blender' 44 | FACES_PATH: '/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smplh/smplh.faces' 45 | FOLDER: ./animations 46 | -------------------------------------------------------------------------------- /OpenTMA/configs/base.yaml: -------------------------------------------------------------------------------- 1 | # FOLDER: ./experiments 2 | SEED_VALUE: 1234 3 | DEBUG: True 4 | TRAIN: 5 | SPLIT: 'train' 6 | NUM_WORKERS: 2 # Number of workers 7 | BATCH_SIZE: 4 # Size of batches 8 | START_EPOCH: 0 # Start epoch 9 | END_EPOCH: 2000 # End epoch 10 | RESUME: '' # Experiment path to be resumed training 11 | PRETRAINED_VAE: '' 12 | PRETRAINED: '' # Pretrained model path 13 | 14 | OPTIM: 15 | OPTIM.TYPE: 'AdamW' # Optimizer type 16 | OPTIM.LR: 1e-4 # Learning rate 17 | 18 | ABLATION: 19 | VAE_TYPE: 'actor' # vae ablation: actor or mcross 20 | VAE_ARCH: 'encoder_decoder' # mdiffusion vae architecture 21 | PE_TYPE: 'actor' # mdiffusion tma or actor 22 | DIFF_PE_TYPE: 'actor' # mdiffusion tma or actor 23 | SKIP_CONNECT: False # skip connection for denoiser va 24 | # use linear to expand mean and std rather expand token nums 25 | MLP_DIST: False 26 | IS_DIST: False # Mcross distribution kl 27 | PREDICT_EPSILON: True # noise or motion 28 | 29 | EVAL: 30 | SPLIT: 'gtest' 31 | BATCH_SIZE: 1 # Evaluating Batch size 32 | NUM_WORKERS: 12 # Evaluating Batch size 33 | 34 | TEST: 35 | TEST_DIR: '' 36 | CHECKPOINTS: '' # Pretrained model path 37 | SPLIT: 'gtest' 38 | BATCH_SIZE: 1 # Testing Batch size 39 | NUM_WORKERS: 12 # Evaluating Batch size 40 | SAVE_PREDICTIONS: False # Weather to save predictions 41 | COUNT_TIME: False # Weather to count time during test 42 | REPLICATION_TIMES: 20 # Number of times to replicate the test 43 | MM_NUM_SAMPLES: 100 # Number of samples for multimodal test 44 | MM_NUM_REPEATS: 30 # Number of repeats for multimodal test 45 | MM_NUM_TIMES: 10 # Number of times to repeat the multimodal test 46 | DIVERSITY_TIMES: 300 # Number of times to repeat the diversity test 47 | REP_I: 0 48 | model: 49 | target: 'modules' 50 | t2m_textencoder: 51 | dim_word: 300 52 | dim_pos_ohot: 15 53 | dim_text_hidden: 512 54 | dim_coemb_hidden: 512 55 | 56 | t2m_motionencoder: 57 | dim_move_hidden: 512 58 | dim_move_latent: 512 59 | dim_motion_hidden: 1024 60 | dim_motion_latent: 512 61 | LOSS: 62 | LAMBDA_LATENT: 1e-5 # Lambda for latent losses 63 | LAMBDA_KL: 1e-5 # Lambda for kl losses 64 | LAMBDA_REC: 1.0 # Lambda for reconstruction losses 65 | LAMBDA_JOINT: 1.0 # Lambda for joint losses 66 | LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses 67 | LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses 68 | LAMBDA_CYCLE: 1.0 # Lambda for cycle losses 69 | LAMBDA_PRIOR: 0.0 70 | DIST_SYNC_ON_STEP: True 71 | METRIC: 72 | FORCE_IN_METER: True 73 | DIST_SYNC_ON_STEP: True 74 | DATASET: 75 | NCLASSES: 10 76 | SAMPLER: 77 | MAX_SQE: -1 78 | MAX_LEN: 196 79 | MIN_LEN: 40 80 | MAX_TEXT_LEN: 20 81 | KIT: 82 | PICK_ONE_TEXT: true 83 | FRAME_RATE: 12.5 84 | UNIT_LEN: 4 85 | HUMANML3D: 86 | PICK_ONE_TEXT: true 87 | FRAME_RATE: 20.0 88 | UNIT_LEN: 4 89 | UNIMOCAP: 90 | PICK_ONE_TEXT: true 91 | FRAME_RATE: 20.0 92 | UNIT_LEN: 4 93 | HUMANACT12: 94 | NUM_FRAMES: 60 95 | POSE_REP: rot6d 96 | GLOB: true 97 | TRANSLATION: true 98 | UESTC: 99 | NUM_FRAMES: 60 100 | POSE_REP: rot6d 101 | GLOB: true 102 | TRANSLATION: true 103 | MOTIONX: 104 | UNIT_LEN: 4 105 | 106 | LOGGER: 107 | SACE_CHECKPOINT_EPOCH: 1 108 | LOG_EVERY_STEPS: 1 109 | VAL_EVERY_STEPS: 10 110 | TENSORBOARD: true 111 | WANDB: 112 | OFFLINE: false 113 | PROJECT: null 114 | RESUME_ID: null 115 | RENDER: 116 | JOINT_TYPE: mmm 117 | INPUT_MODE: npy 118 | DIR: '' 119 | NPY: '' 120 | DENOISING: true 121 | OLDRENDER: true 122 | RES: high 123 | DOWNSAMPLE: true 124 | FPS: 12.5 125 | CANONICALIZE: true 126 | EXACT_FRAME: 0.5 127 | NUM: 7 128 | MODE: sequence 129 | VID_EXT: mp4 130 | ALWAYS_ON_FLOOR: false 131 | GT: false 132 | DEMO: 133 | MOTION_TRANSFER: false 134 | RENDER: false 135 | FRAME_RATE: 12.5 136 | EXAMPLE: null 137 | -------------------------------------------------------------------------------- /OpenTMA/configs/configs_temos/H3D-TMR.yaml: -------------------------------------------------------------------------------- 1 | NAME: H3D-TMR-v1 # Experiment name 2 | DEBUG: False # Debug mode 3 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 4 | DEVICE: [0,1,2,3] # Index of gpus eg. [0] or [0,1,2,3] 5 | # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 6 | 7 | # Training configuration 8 | TRAIN: 9 | #--------------------------------- 10 | STAGE: temos # stage "vae" or "diffusion", "vae_diffusion" 11 | #--------------------------------- 12 | DATASETS: ['humanml3d'] # Training datasets 13 | NUM_WORKERS: 11 # Number of workers 14 | BATCH_SIZE: 128 # Size of batches 15 | START_EPOCH: 0 # Start epochMMOTIONENCODER 16 | END_EPOCH: 1000 # End epoch 17 | RESUME: '' # Resume training from this path 18 | OPTIM: 19 | TYPE: AdamW # Optimizer type 20 | LR: 1e-4 # Learning rate 21 | 22 | # Evaluating Configuration 23 | EVAL: 24 | DATASETS: ['humanml3d'] # Evaluating datasets 25 | BATCH_SIZE: 32 # Evaluating Batch size 26 | SPLIT: test 27 | 28 | # Test Configuration 29 | TEST: 30 | PRETRAINED_CHECKPOINTS_VAE: '' 31 | SAVE_PREDICTIONS: False 32 | CHECKPOINTS: './experiments/temos/H3D-TMR-v1/checkpoints/epoch=299.ckpt' # Pretrained model path 33 | DATASETS: ['humanml3d'] # training datasets 34 | SPLIT: test 35 | BATCH_SIZE: 32 # training Batch size 36 | MEAN: False 37 | NUM_SAMPLES: 1 38 | FACT: 1 39 | # REPLICATION_TIM 40 | 41 | # Datasets Configuration 42 | DATASET: 43 | JOINT_TYPE: 'humanml3d' # join type 44 | VERSION: '' 45 | MOTION_TYPE: 'vector_263' 46 | METRIC: 47 | # TYPE: ['TemosMetric', 'TM2TMetrics'] 48 | TYPE: [] 49 | # Losses Configuration 50 | LOSS: 51 | TYPE: temos # Losses type 52 | USE_INFONCE: True 53 | USE_INFONCE_FILTER: True 54 | LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses 55 | LAMBDA_KL: 1.0e-5 # Lambda for kl Losses 56 | LAMBDA_REC: 1.0 # Lambda for reconstruction Losses 57 | LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses 58 | LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses 59 | LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses 60 | LAMBDA_PRIOR: 0.0 61 | LAMBDA_INFONCE: 0.1 # Lambda for infonce 62 | INFONCE_TEMP: 0.1 63 | DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained 64 | USE_RECLIPLOSS: False 65 | SYNC: False 66 | TRAIN_TMR: False 67 | 68 | # Model Configuration 69 | model: 70 | vae: true # whether vae model 71 | model_type: temos # model type 72 | condition: 'text' 73 | target: modules_temos 74 | ##### 75 | latent_dim: 256 # latent dimension 76 | ff_size: 1024 # 77 | num_layers: 4 # number of layers 78 | num_head: 6 # number of head layers 79 | dropout: 0.1 # dropout rate 80 | activation: gelu # activation type 81 | eval_text_encode_way: given_glove 82 | eval_text_source: token 83 | 84 | # Logger configuration 85 | LOGGER: 86 | SAVE_CHECKPOINT_EPOCH: 100 87 | LOG_EVERY_STEPS: 1 88 | VAL_EVERY_STEPS: 100 89 | TENSORBOARD: True 90 | WANDB: 91 | PROJECT: null 92 | OFFLINE: False 93 | RESUME_ID: null 94 | -------------------------------------------------------------------------------- /OpenTMA/configs/configs_temos/MotionX-TMR.yaml: -------------------------------------------------------------------------------- 1 | NAME: MotionX-TMR-v1 # Experiment name 2 | DEBUG: False # Debug mode 3 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 4 | DEVICE: [0,1,2] # Index of gpus eg. [0] or [0,1,2,3] 5 | # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 6 | 7 | # Training configuration 8 | TRAIN: 9 | #--------------------------------- 10 | STAGE: temos # stage "vae" or "diffusion", "vae_diffusion" 11 | #--------------------------------- 12 | DATASETS: ['motionx'] # Training datasets 13 | NUM_WORKERS: 11 # Number of workers 14 | BATCH_SIZE: 128 # Size of batches 15 | START_EPOCH: 0 # Start epochMMOTIONENCODER 16 | END_EPOCH: 4000 # End epoch 17 | RESUME: '' # Resume training from this path 18 | PRETRAINED_VAE: '' 19 | PRETRAINED_MLD: '' 20 | OPTIM: 21 | TYPE: AdamW # Optimizer type 22 | LR: 1e-4 # Learning rate 23 | 24 | # Evaluating Configuration 25 | EVAL: 26 | DATASETS: ['motionx'] # Evaluating datasets 27 | BATCH_SIZE: 32 # Evaluating Batch size 28 | SPLIT: test 29 | 30 | # Test Configuration 31 | TEST: 32 | PRETRAINED_CHECKPOINTS_VAE: '' 33 | SAVE_PREDICTIONS: False 34 | CHECKPOINTS: './experiments/temos/MotionX-TMR-v1/checkpoints/epoch=299.ckpt' # Pretrained model path 35 | DATASETS: ['motionx'] # training datasets 36 | SPLIT: test 37 | BATCH_SIZE: 32 # training Batch size 38 | MEAN: False 39 | NUM_SAMPLES: 1 40 | FACT: 1 41 | # REPLICATION_TIM 42 | 43 | # Datasets Configuration 44 | DATASET: 45 | JOINT_TYPE: 'motionx' # join type 46 | VERSION: '' 47 | MOTION_TYPE: 'vector_623' 48 | TEXT_SOURCE: 'only_text_token' 49 | METRIC: 50 | TYPE: ['TemosMetric', 'TM2TMetrics'] 51 | # Losses Configuration 52 | LOSS: 53 | TYPE: temos # Losses type 54 | USE_INFONCE: True 55 | USE_INFONCE_FILTER: True 56 | LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses 57 | LAMBDA_KL: 1.0e-5 # Lambda for kl Losses 58 | LAMBDA_REC: 1.0 # Lambda for reconstruction Losses 59 | LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses 60 | LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses 61 | LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses 62 | LAMBDA_PRIOR: 0.0 63 | LAMBDA_INFONCE: 0.1 # Lambda for infonce 64 | INFONCE_TEMP: 0.1 65 | DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained 66 | USE_RECLIPLOSS: False 67 | SYNC: False 68 | TRAIN_TMR: False 69 | 70 | # Model Configuration 71 | model: 72 | vae: true # whether vae model 73 | model_type: temos # model type 74 | condition: 'text' 75 | target: modules_temos 76 | ##### 77 | latent_dim: 256 # latent dimension 78 | ff_size: 1024 # 79 | num_layers: 4 # number of layers 80 | num_head: 6 # number of head layers 81 | dropout: 0.1 # dropout rate 82 | activation: gelu # activation type 83 | eval_text_encode_way: glove_6B_GRU 84 | eval_text_source: caption 85 | 86 | # Logger configuration 87 | LOGGER: 88 | SAVE_CHECKPOINT_EPOCH: 100 89 | LOG_EVERY_STEPS: 1 90 | VAL_EVERY_STEPS: 100 91 | TENSORBOARD: True 92 | WANDB: 93 | PROJECT: null 94 | OFFLINE: False 95 | RESUME_ID: null 96 | -------------------------------------------------------------------------------- /OpenTMA/configs/configs_temos/UniMocap-TMR.yaml: -------------------------------------------------------------------------------- 1 | NAME: UniMocap-TMR-v1 # Experiment name 2 | DEBUG: False # Debug mode 3 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto” 4 | DEVICE: [0, 1, 2, 3] # Index of gpus eg. [0] or [0,1,2,3] 5 | # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3] 6 | 7 | # Training configuration 8 | TRAIN: 9 | #--------------------------------- 10 | STAGE: temos # stage "vae" or "diffusion", "vae_diffusion" 11 | #--------------------------------- 12 | DATASETS: ['unimocap'] # Training datasets 13 | NUM_WORKERS: 11 # Number of workers 14 | BATCH_SIZE: 128 # Size of batches 15 | START_EPOCH: 0 # Start epochMMOTIONENCODER 16 | END_EPOCH: 4000 # End epoch 17 | RESUME: '' # Resume training from this path 18 | PRETRAINED_VAE: '' 19 | PRETRAINED_MLD: '' 20 | OPTIM: 21 | TYPE: AdamW # Optimizer type 22 | LR: 1e-4 # Learning rate 23 | 24 | # Evaluating Configuration 25 | EVAL: 26 | DATASETS: ['unimocap'] # Evaluating datasets 27 | BATCH_SIZE: 32 # Evaluating Batch size 28 | SPLIT: test 29 | 30 | # Test Configuration 31 | TEST: 32 | PRETRAINED_CHECKPOINTS_VAE: '' 33 | SAVE_PREDICTIONS: False 34 | CHECKPOINTS: './experiments/temos/UniMocap-TMR/checkpoints/epoch=299.ckpt' # Pretrained model path 35 | DATASETS: ['unimocap'] # training datasets 36 | SPLIT: test 37 | BATCH_SIZE: 32 # training Batch size 38 | MEAN: False 39 | NUM_SAMPLES: 1 40 | FACT: 1 41 | # REPLICATION_TIM 42 | 43 | # Datasets Configuration 44 | DATASET: 45 | JOINT_TYPE: 'humanml3d' # join type 46 | VERSION: '' 47 | MOTION_TYPE: 'vector_263' 48 | TEXT_SOURCE: 'only_text_token' 49 | METRIC: 50 | TYPE: ['TemosMetric', 'TM2TMetrics'] 51 | # Losses Configuration 52 | LOSS: 53 | TYPE: temos # Losses type 54 | USE_INFONCE: True 55 | USE_INFONCE_FILTER: True 56 | LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses 57 | LAMBDA_KL: 1.0e-5 # Lambda for kl Losses 58 | LAMBDA_REC: 1.0 # Lambda for reconstruction Losses 59 | LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses 60 | LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses 61 | LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses 62 | LAMBDA_PRIOR: 0.0 63 | LAMBDA_INFONCE: 0.1 # Lambda for infonce 64 | INFONCE_TEMP: 0.1 65 | DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained 66 | USE_RECLIPLOSS: False 67 | SYNC: False 68 | TRAIN_TMR: False 69 | 70 | # Model Configuration 71 | model: 72 | vae: true # whether vae model 73 | model_type: temos # model type 74 | condition: 'text' 75 | target: modules_temos 76 | ##### 77 | latent_dim: 256 # latent dimension 78 | ff_size: 1024 # 79 | num_layers: 6 # number of layers 80 | num_head: 6 # number of head layers 81 | dropout: 0.1 # dropout rate 82 | activation: gelu # activation type 83 | eval_text_encode_way: given_glove 84 | eval_text_source: only_text_token 85 | 86 | # Logger configuration 87 | LOGGER: 88 | SAVE_CHECKPOINT_EPOCH: 100 89 | LOG_EVERY_STEPS: 1 90 | VAL_EVERY_STEPS: 100 91 | TENSORBOARD: True 92 | WANDB: 93 | PROJECT: null 94 | OFFLINE: False 95 | RESUME_ID: null 96 | -------------------------------------------------------------------------------- /OpenTMA/configs/configs_temos/infonce.yaml: -------------------------------------------------------------------------------- 1 | _target_: temos.model.losses.InfoNCE 2 | -------------------------------------------------------------------------------- /OpenTMA/configs/modules_temos/motiondecoder.yaml: -------------------------------------------------------------------------------- 1 | motiondecoder: 2 | name: actor_decoder 3 | target: tma.models.architectures.temos.motiondecoder.actor.ActorAgnosticDecoder 4 | params: 5 | latent_dim: ${model.latent_dim} 6 | ff_size: ${model.ff_size} 7 | num_layers: ${model.num_layers} 8 | num_head: ${model.num_head} 9 | droupout: ${model.dropout} 10 | activation: ${model.activation} 11 | nfeats: ${DATASET.NFEATS} -------------------------------------------------------------------------------- /OpenTMA/configs/modules_temos/motionencoder.yaml: -------------------------------------------------------------------------------- 1 | motionencoder: 2 | name: actor_encoder 3 | target: tma.models.architectures.temos.motionencoder.actor.ActorAgnosticEncoder 4 | params: 5 | latent_dim: ${model.latent_dim} 6 | vae: ${model.vae} 7 | ff_size: ${model.ff_size} 8 | num_layers: ${model.num_layers} 9 | num_head: ${model.num_head} 10 | droupout: ${model.dropout} 11 | activation: ${model.activation} 12 | nfeats: ${DATASET.NFEATS} -------------------------------------------------------------------------------- /OpenTMA/configs/modules_temos/text_encoder.yaml: -------------------------------------------------------------------------------- 1 | textencoder: 2 | name: distilbert_actor 3 | target: tma.models.architectures.temos.textencoder.distillbert_actor.DistilbertActorAgnosticEncoder 4 | params: 5 | latent_dim: ${model.latent_dim} 6 | vae: ${model.vae} 7 | ff_size: ${model.ff_size} 8 | num_layers: ${model.num_layers} 9 | num_head: ${model.num_head} 10 | droupout: ${model.dropout} 11 | activation: ${model.activation} 12 | finetune: false 13 | modelpath: ${model.bert_path} -------------------------------------------------------------------------------- /OpenTMA/requirements.txt: -------------------------------------------------------------------------------- 1 | einops==0.8.0 2 | human_body_prior==0.8.5.0 3 | hydra-core==1.3.2 4 | ipdb==0.13.13 5 | joblib==1.2.0 6 | matplotlib==3.7.1 7 | numpy==1.23.0 8 | omegaconf==2.3.0 9 | opencv_python==4.8.0.76 10 | Pillow==10.3.0 11 | psutil==5.9.5 12 | pytorch_lightning==1.9.0 13 | rich==13.7.1 14 | scikit_learn==1.2.2 15 | scipy==1.13.0 16 | sentence_transformers==2.2.2 17 | shortuuid==1.0.13 18 | smplx==0.1.28 19 | spacy==3.6.0 20 | tensorboardX==2.6.2.2 21 | torch==2.1.2 22 | torchmetrics==0.7.0 23 | torchtext==0.16.2 24 | tqdm==4.65.0 25 | transformers==4.30.2 26 | -------------------------------------------------------------------------------- /OpenTMA/retrieval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | path1="./experiments/temos/H3D-TMR-v1/embeddings/val/epoch_99/" 4 | path2="./experiments/temos/H3D-TMR-v1/embeddings/val/epoch_599/" 5 | path3="./experiments/temos/H3D-TMR-v1/embeddings/val/epoch_999/" 6 | 7 | 8 | for protocal in A B D 9 | do 10 | echo "**protocal" $protocal"**" 11 | for retrieval_type in T2M M2T 12 | do 13 | echo $retrieval_type 14 | python retrieval.py --retrieval_type $retrieval_type --protocal $protocal --expdirs $path1 $path2 $path3 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /OpenTMA/tma/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/callback/__init__.py: -------------------------------------------------------------------------------- 1 | from .progress import ProgressLogger 2 | -------------------------------------------------------------------------------- /OpenTMA/tma/callback/progress.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pytorch_lightning import LightningModule, Trainer 3 | from pytorch_lightning.callbacks import Callback 4 | import psutil 5 | 6 | logger = logging.getLogger() 7 | 8 | 9 | class ProgressLogger(Callback): 10 | """ 11 | A custom callback class for PyTorch Lightning that logs progress information during training. 12 | """ 13 | 14 | def __init__(self, metric_monitor: dict, precision: int = 3): 15 | # Metric to monitor 16 | self.metric_monitor = metric_monitor 17 | self.precision = precision 18 | 19 | def on_train_start( 20 | self, trainer: Trainer, pl_module: LightningModule, **kwargs 21 | ) -> None: 22 | # Log a message when training starts 23 | logger.info("Training started") 24 | 25 | def on_train_end( 26 | self, trainer: Trainer, pl_module: LightningModule, **kwargs 27 | ) -> None: 28 | # Log a message when training ends 29 | logger.info("Training done") 30 | 31 | def on_validation_epoch_end( 32 | self, trainer: Trainer, pl_module: LightningModule, **kwargs 33 | ) -> None: 34 | # Log a message when a validation epoch ends 35 | if trainer.sanity_checking: 36 | logger.info("Sanity checking ok.") 37 | 38 | def on_train_epoch_end( 39 | self, trainer: Trainer, pl_module: LightningModule, padding=False, **kwargs 40 | ) -> None: 41 | # Log a message when a training epoch ends 42 | # Format for logging metrics 43 | metric_format = f"{{:.{self.precision}e}}" 44 | # Start the log line with the epoch number 45 | line = f"Epoch {trainer.current_epoch}" 46 | if padding: 47 | line = f"{line:>{len('Epoch xxxx')}}" # Right padding 48 | metrics_str = [] 49 | 50 | losses_dict = trainer.callback_metrics 51 | for metric_name, dico_name in self.metric_monitor.items(): 52 | # If the metric is in the dictionary, format it and add it to the log line 53 | if dico_name in losses_dict: 54 | metric = losses_dict[dico_name].item() 55 | metric = metric_format.format(metric) 56 | metric = f"{metric_name} {metric}" 57 | metrics_str.append(metric) 58 | 59 | # If there are no metrics, return 60 | if len(metrics_str) == 0: 61 | return 62 | 63 | # Add the current memory usage to the log line 64 | memory = f"Memory {psutil.virtual_memory().percent}%" 65 | line = line + ": " + " ".join(metrics_str) + " " + memory 66 | logger.info(line) 67 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/HumanML3D.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from tma.data.humanml.scripts.motion_process import ( 5 | process_file, 6 | recover_from_ric, 7 | recover_from_root_rot6d, 8 | ) 9 | 10 | from .base import BASEDataModule 11 | from .humanml.data.dataset import Text2MotionDatasetV2, TextOnlyDataset 12 | from .humanml.common.skeleton import Skeleton 13 | 14 | 15 | class HumanML3DDataModule(BASEDataModule): 16 | 17 | def __init__( 18 | self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs 19 | ): 20 | super().__init__( 21 | batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn 22 | ) 23 | self.save_hyperparameters(logger=False) 24 | self.name = "humanml3d" 25 | self.njoints = 22 26 | self.hparams["njoints"] = 22 27 | if phase == "text_only": 28 | self.Dataset = TextOnlyDataset 29 | else: 30 | self.Dataset = Text2MotionDatasetV2 31 | self.cfg = cfg 32 | sample_overrides = {"split": "val", "tiny": True, "progress_bar": False} 33 | 34 | self._sample_set = self.get_sample_set(overrides=sample_overrides) 35 | # Get additional info of the dataset 36 | # import pdb; pdb.set_trace() 37 | self.nfeats = self._sample_set.nfeats 38 | # self.transforms = self._sample_set.transforms 39 | 40 | def feats2joints(self, features, skel=None, motion_type="vector_263"): 41 | # mean = torch.tensor(self.hparams.mean).to(features) 42 | # std = torch.tensor(self.hparams.std).to(features) 43 | # features = features * std + mean 44 | # return recover_from_ric(features, self.njoints) 45 | if motion_type in [ 46 | "vector_263", 47 | "root_position", 48 | "root_position_vel", 49 | "root_position_rot6d", 50 | ]: 51 | mean = torch.tensor(self.hparams.mean).to(features) 52 | std = torch.tensor(self.hparams.std).to(features) 53 | features = features * std + mean 54 | return recover_from_ric( 55 | features, self.njoints 56 | ) # torch.Size([32, 92, 22, 3]) 57 | elif motion_type in ["root_rot6d"]: 58 | mean = torch.tensor(self.hparams.mean).to(features) 59 | std = torch.tensor(self.hparams.std).to(features) 60 | features = features * std + mean 61 | 62 | # skeleton = Skeleton(n_raw_offsets, kinematic_chain, ) 63 | return recover_from_root_rot6d(features, self.njoints, skel) 64 | elif motion_type == "smplx_212": 65 | assert smplx_model is not None 66 | mean = torch.tensor(self.hparams.mean).to(features) 67 | std = torch.tensor(self.hparams.std).to(features) 68 | features = features * (std + 1e-7) + mean 69 | bs = features.shape[0] 70 | features = features.reshape(-1, 212) 71 | output = smplx_model.smplx_model( 72 | pose_body=features[:, 3:66], 73 | pose_hand=features[:, 66:156], 74 | root_orient=features[:, :3], 75 | ).Jtr 76 | return output.reshape(bs, -1, 55, 3) # torch.Size([32, 96, 55, 3]) 77 | else: 78 | raise NotImplementedError 79 | 80 | def joints2feats(self, features): 81 | features = process_file(features, self.njoints)[0] 82 | # mean = torch.tensor(self.hparams.mean).to(features) 83 | # std = torch.tensor(self.hparams.std).to(features) 84 | # features = (features - mean) / std 85 | return features 86 | 87 | def renorm4t2m(self, features): 88 | # renorm to t2m norms for using t2m evaluators 89 | ori_mean = torch.tensor(self.hparams.mean).to(features) 90 | ori_std = torch.tensor(self.hparams.std).to(features) 91 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 92 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 93 | features = features * ori_std + ori_mean 94 | features = (features - eval_mean) / eval_std 95 | return features 96 | 97 | def mm_mode(self, mm_on=True): 98 | # random select samples for mm 99 | if mm_on: 100 | self.is_mm = True 101 | self.name_list = self.test_dataset.name_list 102 | self.mm_list = np.random.choice( 103 | self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False 104 | ) 105 | self.test_dataset.name_list = self.mm_list 106 | else: 107 | self.is_mm = False 108 | self.test_dataset.name_list = self.name_list 109 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/Humanact12.py: -------------------------------------------------------------------------------- 1 | from .base import BASEDataModule 2 | from .a2m import HumanAct12Poses 3 | import numpy as np 4 | 5 | 6 | class Humanact12DataModule(BASEDataModule): 7 | 8 | def __init__( 9 | self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs 10 | ): 11 | super().__init__( 12 | batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn 13 | ) 14 | self.save_hyperparameters(logger=False) 15 | self.name = "HumanAct12" 16 | self.Dataset = HumanAct12Poses 17 | self.cfg = cfg 18 | sample_overrides = { 19 | "num_seq_max": 2, 20 | "split": "test", 21 | "tiny": True, 22 | "progress_bar": False, 23 | } 24 | # self._sample_set = self.get_sample_set(overrides=sample_overrides) 25 | # Get additional info of the dataset 26 | self.nfeats = 150 27 | self.njoints = 25 28 | self.nclasses = 12 29 | # self.transforms = self._sample_set.transforms 30 | 31 | # def mm_mode(self, mm_on=True): 32 | # # random select samples for mm 33 | # if mm_on: 34 | # self.is_mm = True 35 | # if self.split == 'train': 36 | # self.name_list = self.test_dataset._train[index] 37 | # else: 38 | # self.name_list = self.test_dataset._test[index] 39 | # self.name_list = self.test_dataset.name_list 40 | # self.mm_list = np.random.choice(self.name_list, 41 | # self.cfg.TEST.MM_NUM_SAMPLES, 42 | # replace=False) 43 | # self.test_dataset.name_list = self.mm_list 44 | # else: 45 | # self.is_mm = False 46 | # self.test_dataset.name_list = self.name_list 47 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/Kit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from tma.data.humanml.scripts.motion_process import recover_from_ric 5 | 6 | from .base import BASEDataModule 7 | from .humanml.data.dataset import Text2MotionDatasetV2, TextOnlyDataset 8 | from .utils import all_collate 9 | 10 | 11 | class KitDataModule(BASEDataModule): 12 | 13 | def __init__( 14 | self, 15 | cfg, 16 | phase="train", 17 | collate_fn=all_collate, 18 | batch_size: int = 32, 19 | num_workers: int = 16, 20 | **kwargs 21 | ): 22 | super().__init__( 23 | batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn 24 | ) 25 | self.save_hyperparameters(logger=False) 26 | self.name = "kit" 27 | self.njoints = 21 28 | if phase == "text_only": 29 | self.Dataset = TextOnlyDataset 30 | else: 31 | self.Dataset = Text2MotionDatasetV2 32 | self.cfg = cfg 33 | 34 | sample_overrides = {"split": "val", "tiny": True, "progress_bar": False} 35 | self._sample_set = self.get_sample_set(overrides=sample_overrides) 36 | 37 | # Get additional info of the dataset 38 | self.nfeats = self._sample_set.nfeats 39 | # self.transforms = self._sample_set.transforms 40 | 41 | def feats2joints(self, features): 42 | mean = torch.tensor(self.hparams.mean).to(features) 43 | std = torch.tensor(self.hparams.std).to(features) 44 | features = features * std + mean 45 | return recover_from_ric(features, self.njoints) 46 | 47 | def renorm4t2m(self, features): 48 | # renorm to t2m norms for using t2m evaluators 49 | ori_mean = torch.tensor(self.hparams.mean).to(features) 50 | ori_std = torch.tensor(self.hparams.std).to(features) 51 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 52 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 53 | features = features * ori_std + ori_mean 54 | features = (features - eval_mean) / eval_std 55 | return features 56 | 57 | def mm_mode(self, mm_on=True): 58 | # random select samples for mm 59 | if mm_on: 60 | self.is_mm = True 61 | self.name_list = self.test_dataset.name_list 62 | self.mm_list = np.random.choice( 63 | self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False 64 | ) 65 | self.test_dataset.name_list = self.mm_list 66 | else: 67 | self.is_mm = False 68 | self.test_dataset.name_list = self.name_list 69 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/MotionX.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from tma.data.humanml.scripts.motion_process import process_file, recover_from_ric 5 | 6 | from .base import BASEDataModule 7 | from .humanml.data.dataset import ( 8 | Text2MotionDatasetMotionX, 9 | Text2MotionDatasetMotionX_text_all, 10 | ) 11 | 12 | 13 | class Motion_XDataModule(BASEDataModule): 14 | 15 | def __init__( 16 | self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs 17 | ): 18 | super().__init__( 19 | batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn 20 | ) 21 | self.save_hyperparameters(logger=False) 22 | self.name = "motionx" 23 | if cfg.DATASET.JOINT_TYPE == "humanml3d": 24 | self.njoints = 22 25 | elif cfg.DATASET.JOINT_TYPE == "motionx": 26 | self.njoints = 52 27 | else: 28 | raise NotImplemented 29 | 30 | if phase == "text_only": 31 | self.Dataset = TextOnlyDataset 32 | else: 33 | if cfg.model.condition in [ 34 | "text_all", 35 | "text_face", 36 | "text_body", 37 | "text_hand", 38 | "text_face_body", 39 | "text_seperate", 40 | "only_pose_concat", 41 | "only_pose_fusion", 42 | ]: 43 | self.Dataset = Text2MotionDatasetMotionX_text_all 44 | else: 45 | self.Dataset = Text2MotionDatasetMotionX 46 | 47 | self.cfg = cfg 48 | sample_overrides = {"split": "val", "tiny": True, "progress_bar": False} 49 | self._sample_set = self.get_sample_set(overrides=sample_overrides) 50 | 51 | # Get additional info of the dataset 52 | self.nfeats = self._sample_set.nfeats 53 | # self.transforms = self._sample_set.transforms 54 | 55 | def feats2joints(self, features, motion_type, smplx_model=None): 56 | # import pdb; pdb.set_trace() 57 | if motion_type in ["vector_263", "vector_623"]: 58 | mean = torch.tensor(self.hparams.mean).to(features) 59 | std = torch.tensor(self.hparams.std).to(features) 60 | features = features * std + mean 61 | 62 | return recover_from_ric( 63 | features, self.njoints 64 | ) # torch.Size([32, 92, 22, 3]) 65 | elif motion_type == "smplx_212": 66 | assert smplx_model is not None 67 | mean = torch.tensor(self.hparams.mean).to(features) 68 | std = torch.tensor(self.hparams.std).to(features) 69 | features = features * (std + 1e-7) + mean 70 | bs = features.shape[0] 71 | features = features.reshape(-1, 212) 72 | output = smplx_model.smplx_model( 73 | pose_body=features[:, 3:66], 74 | pose_hand=features[:, 66:156], 75 | root_orient=features[:, :3], 76 | ).Jtr 77 | return output.reshape(bs, -1, 55, 3) # torch.Size([32, 96, 55, 3]) 78 | else: 79 | raise NotImplementedError 80 | 81 | def joints2feats(self, features): 82 | features = process_file(features, self.njoints)[0] 83 | # mean = torch.tensor(self.hparams.mean).to(features) 84 | # std = torch.tensor(self.hparams.std).to(features) 85 | # features = (features - mean) / std 86 | return features 87 | 88 | def renorm4t2m(self, features): 89 | # renorm to t2m norms for using t2m evaluators 90 | ori_mean = torch.tensor(self.hparams.mean).to(features) 91 | ori_std = torch.tensor(self.hparams.std).to(features) 92 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 93 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 94 | features = features * (ori_std + 1e-7) + ori_mean 95 | features = (features - eval_mean) / (eval_std + 1e-7) 96 | return features 97 | 98 | def renormt2m_back(self, features): 99 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 100 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 101 | features = features * (eval_std + 1e-7) + eval_mean 102 | return features 103 | 104 | def mm_mode(self, mm_on=True): 105 | # random select samples for mm 106 | if mm_on: 107 | self.is_mm = True 108 | self.name_list = self.test_dataset.name_list 109 | self.mm_list = np.random.choice( 110 | self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False 111 | ) 112 | self.test_dataset.name_list = self.mm_list 113 | else: 114 | self.is_mm = False 115 | self.test_dataset.name_list = self.name_list 116 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/Uestc.py: -------------------------------------------------------------------------------- 1 | from .base import BASEDataModule 2 | from .a2m import UESTC 3 | import os 4 | import rich.progress 5 | import pickle as pkl 6 | 7 | 8 | class UestcDataModule(BASEDataModule): 9 | 10 | def __init__( 11 | self, 12 | cfg, 13 | batch_size, 14 | num_workers, 15 | collate_fn=None, 16 | method_name="vibe", 17 | phase="train", 18 | **kwargs 19 | ): 20 | super().__init__( 21 | batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn 22 | ) 23 | self.save_hyperparameters(logger=False) 24 | self.name = "Uestc" 25 | 26 | self.Dataset = UESTC 27 | self.cfg = cfg 28 | 29 | # self._sample_set = self.get_sample_set(overrides=sample_overrides) 30 | # Get additional info of the dataset 31 | self.nfeats = 150 32 | self.njoints = 25 33 | self.nclasses = 40 34 | # self.transforms = self._sample_set.transforms 35 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/UniMocap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from tma.data.humanml.scripts.motion_process import ( 5 | process_file, 6 | recover_from_ric, 7 | recover_from_root_rot6d, 8 | ) 9 | 10 | from .base import BASEDataModule 11 | from .humanml.data.dataset import UniMocapDataset 12 | from .humanml.common.skeleton import Skeleton 13 | 14 | 15 | class UniMocapDataModule(BASEDataModule): 16 | 17 | def __init__( 18 | self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs 19 | ): 20 | super().__init__( 21 | batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn 22 | ) 23 | 24 | self.save_hyperparameters(logger=False) 25 | self.name = "unimocap" 26 | self.njoints = 22 27 | self.hparams["njoints"] = 22 28 | if phase == "text_only": 29 | self.Dataset = None 30 | else: 31 | self.Dataset = UniMocapDataset 32 | self.cfg = cfg 33 | sample_overrides = {"split": "val", "tiny": True, "progress_bar": False} 34 | 35 | self._sample_set = self.get_sample_set(overrides=sample_overrides) 36 | 37 | # Get additional info of the dataset 38 | self.nfeats = self._sample_set.nfeats 39 | # self.transforms = self._sample_set.transforms 40 | 41 | def feats2joints(self, features, skel=None, motion_type="vector_263"): 42 | if motion_type in [ 43 | "vector_263", 44 | "root_position", 45 | "root_position_vel", 46 | "root_position_rot6d", 47 | ]: 48 | mean = torch.tensor(self.hparams.mean).to(features) 49 | std = torch.tensor(self.hparams.std).to(features) 50 | features = features * std + mean 51 | return recover_from_ric( 52 | features, self.njoints 53 | ) # torch.Size([32, 92, 22, 3]) 54 | elif motion_type in ["root_rot6d"]: 55 | mean = torch.tensor(self.hparams.mean).to(features) 56 | std = torch.tensor(self.hparams.std).to(features) 57 | features = features * std + mean 58 | 59 | # skeleton = Skeleton(n_raw_offsets, kinematic_chain, ) 60 | return recover_from_root_rot6d(features, self.njoints, skel) 61 | elif motion_type == "smplx_212": 62 | assert smplx_model is not None 63 | mean = torch.tensor(self.hparams.mean).to(features) 64 | std = torch.tensor(self.hparams.std).to(features) 65 | features = features * (std + 1e-7) + mean 66 | bs = features.shape[0] 67 | features = features.reshape(-1, 212) 68 | output = smplx_model.smplx_model( 69 | pose_body=features[:, 3:66], 70 | pose_hand=features[:, 66:156], 71 | root_orient=features[:, :3], 72 | ).Jtr 73 | return output.reshape(bs, -1, 55, 3) # torch.Size([32, 96, 55, 3]) 74 | else: 75 | raise NotImplementedError 76 | 77 | def joints2feats(self, features): 78 | features = process_file(features, self.njoints)[0] 79 | # mean = torch.tensor(self.hparams.mean).to(features) 80 | # std = torch.tensor(self.hparams.std).to(features) 81 | # features = (features - mean) / std 82 | return features 83 | 84 | def renorm4t2m(self, features): 85 | # renorm to t2m norms for using t2m evaluators 86 | ori_mean = torch.tensor(self.hparams.mean).to(features) 87 | ori_std = torch.tensor(self.hparams.std).to(features) 88 | eval_mean = torch.tensor(self.hparams.mean_eval).to(features) 89 | eval_std = torch.tensor(self.hparams.std_eval).to(features) 90 | features = features * ori_std + ori_mean 91 | features = (features - eval_mean) / eval_std 92 | return features 93 | 94 | def mm_mode(self, mm_on=True): 95 | # random select samples for mm 96 | if mm_on: 97 | self.is_mm = True 98 | self.name_list = self.test_dataset.name_list 99 | self.mm_list = np.random.choice( 100 | self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False 101 | ) 102 | self.test_dataset.name_list = self.mm_list 103 | else: 104 | self.is_mm = False 105 | self.test_dataset.name_list = self.name_list 106 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/data/a2m/__init__.py: -------------------------------------------------------------------------------- 1 | from .humanact12poses import HumanAct12Poses 2 | from .uestc import UESTC 3 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/a2m/humanact12poses.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as pkl 3 | 4 | import numpy as np 5 | 6 | from .dataset import Dataset 7 | from .utils import rotation_conversions as geometry 8 | import rich.progress 9 | 10 | 11 | class HumanAct12Poses(Dataset): 12 | dataname = "humanact12" 13 | 14 | def __init__(self, datapath="data/HumanAct12Poses", **kargs): 15 | self.datapath = datapath 16 | 17 | super().__init__(**kargs) 18 | 19 | pkldatafilepath = os.path.join(datapath, "humanact12poses.pkl") 20 | with rich.progress.open(pkldatafilepath, "rb", description="loading humanact12 pkl") as f: 21 | data = pkl.load(f) 22 | 23 | self._pose = [x for x in data["poses"]] 24 | self._num_frames_in_video = [p.shape[0] for p in self._pose] 25 | self._joints = [x for x in data["joints3D"]] 26 | 27 | self._actions = [x for x in data["y"]] 28 | 29 | total_num_actions = 12 30 | self.num_classes = total_num_actions 31 | 32 | self._train = list(range(len(self._pose))) 33 | 34 | keep_actions = np.arange(0, total_num_actions) 35 | 36 | self._action_to_label = {x: i for i, x in enumerate(keep_actions)} 37 | self._label_to_action = {i: x for i, x in enumerate(keep_actions)} 38 | 39 | self._action_classes = humanact12_coarse_action_enumerator 40 | 41 | def _load_joints3D(self, ind, frame_ix): 42 | return self._joints[ind][frame_ix] 43 | 44 | def _load_rotvec(self, ind, frame_ix): 45 | pose = self._pose[ind][frame_ix].reshape(-1, 24, 3) 46 | return pose 47 | 48 | 49 | humanact12_coarse_action_enumerator = { 50 | 0: "warm_up", 51 | 1: "walk", 52 | 2: "run", 53 | 3: "jump", 54 | 4: "drink", 55 | 5: "lift_dumbbell", 56 | 6: "sit", 57 | 7: "eat", 58 | 8: "turn steering wheel", 59 | 9: "phone", 60 | 10: "boxing", 61 | 11: "throw", 62 | } 63 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/a2m/tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import string 3 | 4 | 5 | def parse_info_name(path): 6 | name = os.path.splitext(os.path.split(path)[-1])[0] 7 | info = {} 8 | current_letter = None 9 | for letter in name: 10 | if letter in string.ascii_letters: 11 | info[letter] = [] 12 | current_letter = letter 13 | else: 14 | info[current_letter].append(letter) 15 | for key in info.keys(): 16 | info[key] = "".join(info[key]) 17 | return info 18 | 19 | 20 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/a2m/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/a2m/utils/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/data/a2m/utils/misc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def to_numpy(tensor): 5 | if torch.is_tensor(tensor): 6 | return tensor.cpu().numpy() 7 | elif type(tensor).__module__ != 'numpy': 8 | raise ValueError("Cannot convert {} to numpy array".format( 9 | type(tensor))) 10 | return tensor 11 | 12 | 13 | def to_torch(ndarray): 14 | if type(ndarray).__module__ == 'numpy': 15 | return torch.from_numpy(ndarray) 16 | elif not torch.is_tensor(ndarray): 17 | raise ValueError("Cannot convert {} to torch tensor".format( 18 | type(ndarray))) 19 | return ndarray 20 | 21 | 22 | def cleanexit(): 23 | import sys 24 | import os 25 | try: 26 | sys.exit(0) 27 | except SystemExit: 28 | os._exit(0) 29 | 30 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/a2m/utils/tensors.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def lengths_to_mask(lengths): 5 | max_len = max(lengths) 6 | mask = torch.arange(max_len, device=lengths.device).expand(len(lengths), max_len) < lengths.unsqueeze(1) 7 | return mask 8 | 9 | 10 | def collate_tensors(batch): 11 | dims = batch[0].dim() 12 | max_size = [max([b.size(i) for b in batch]) for i in range(dims)] 13 | size = (len(batch),) + tuple(max_size) 14 | canvas = batch[0].new_zeros(size=size) 15 | for i, b in enumerate(batch): 16 | sub_tensor = canvas[i] 17 | for d in range(dims): 18 | sub_tensor = sub_tensor.narrow(d, 0, b.size(d)) 19 | sub_tensor.add_(b) 20 | return canvas 21 | 22 | 23 | def collate(batch): 24 | databatch = [b[0] for b in batch] 25 | labelbatch = [b[1] for b in batch] 26 | lenbatch = [len(b[0][0][0]) for b in batch] 27 | 28 | databatchTensor = collate_tensors(databatch) 29 | labelbatchTensor = torch.as_tensor(labelbatch) 30 | lenbatchTensor = torch.as_tensor(lenbatch) 31 | 32 | maskbatchTensor = lengths_to_mask(lenbatchTensor) 33 | batch = {"x": databatchTensor, "y": labelbatchTensor, 34 | "mask": maskbatchTensor, "lengths": lenbatchTensor} 35 | return batch 36 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/base.py: -------------------------------------------------------------------------------- 1 | from os.path import join as pjoin 2 | import numpy as np 3 | import pytorch_lightning as pl 4 | from torch.utils.data import DataLoader 5 | 6 | 7 | class BASEDataModule(pl.LightningDataModule): 8 | 9 | def __init__(self, collate_fn, batch_size: int, num_workers: int): 10 | super().__init__() 11 | 12 | # self.dataloader_options = { 13 | # "batch_size": batch_size, "num_workers": num_workers,"collate_fn": collate_datastruct_and_text} 14 | self.dataloader_options = { 15 | "batch_size": batch_size, 16 | "num_workers": num_workers, 17 | "collate_fn": collate_fn, 18 | } 19 | 20 | # self.collate_fn = collate_fn 21 | self.persistent_workers = True 22 | self.is_mm = False 23 | # need to be overloaded: 24 | # - self.Dataset 25 | # - self._sample_set => load only a small subset 26 | # There is an helper bellow (get_sample_set) 27 | # - self.nfeats 28 | # - self.transforms 29 | 30 | def get_sample_set(self, overrides={}): 31 | sample_params = self.hparams.copy() 32 | sample_params.update(overrides) 33 | # import pdb; pdb.set_trace() 34 | 35 | split_file = pjoin( 36 | eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"), 37 | self.cfg.DATASET.VERSION, 38 | self.cfg.EVAL.SPLIT + ".txt", 39 | ) 40 | # import pdb; pdb.set_trace() 41 | return self.Dataset(split_file=split_file, **sample_params) 42 | 43 | def __getattr__(self, item): 44 | # train_dataset/val_dataset etc cached like properties 45 | if item.endswith("_dataset") and not item.startswith("_"): 46 | subset = item[: -len("_dataset")] 47 | item_c = "_" + item 48 | if item_c not in self.__dict__: 49 | # todo: config name not consistent 50 | subset = subset.upper() if subset != "val" else "EVAL" 51 | split = eval(f"self.cfg.{subset}.SPLIT") 52 | split_file = pjoin( 53 | eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"), 54 | self.cfg.DATASET.VERSION, 55 | eval(f"self.cfg.{subset}.SPLIT") + ".txt", 56 | ) 57 | self.__dict__[item_c] = self.Dataset( 58 | split_file=split_file, split=split, **self.hparams 59 | ) 60 | return getattr(self, item_c) 61 | classname = self.__class__.__name__ 62 | raise AttributeError(f"'{classname}' object has no attribute '{item}'") 63 | 64 | def setup(self, stage=None): 65 | self.stage = stage 66 | # Use the getter the first time to load the data 67 | if stage in (None, "fit"): 68 | _ = self.train_dataset 69 | _ = self.val_dataset 70 | if stage in (None, "test"): 71 | _ = self.test_dataset 72 | 73 | def train_dataloader(self): 74 | return DataLoader( 75 | self.train_dataset, 76 | shuffle=True, 77 | persistent_workers=True, 78 | **self.dataloader_options, 79 | ) 80 | 81 | def predict_dataloader(self): 82 | dataloader_options = self.dataloader_options.copy() 83 | dataloader_options["batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE 84 | dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS 85 | dataloader_options["shuffle"] = False 86 | return DataLoader( 87 | self.test_dataset, 88 | persistent_workers=True, 89 | **dataloader_options, 90 | ) 91 | 92 | def val_dataloader(self): 93 | # overrides batch_size and num_workers 94 | dataloader_options = self.dataloader_options.copy() 95 | dataloader_options["batch_size"] = self.cfg.EVAL.BATCH_SIZE 96 | dataloader_options["num_workers"] = self.cfg.EVAL.NUM_WORKERS 97 | dataloader_options["shuffle"] = False 98 | 99 | return DataLoader( 100 | self.val_dataset, 101 | persistent_workers=True, 102 | **dataloader_options, 103 | ) 104 | 105 | def test_dataloader(self): 106 | # overrides batch_size and num_workers 107 | dataloader_options = self.dataloader_options.copy() 108 | dataloader_options["batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE 109 | dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS 110 | # dataloader_options["drop_last"] = True 111 | dataloader_options["shuffle"] = False 112 | return DataLoader( 113 | self.test_dataset, 114 | persistent_workers=True, 115 | **dataloader_options, 116 | ) 117 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/README.md: -------------------------------------------------------------------------------- 1 | This code is based on https://github.com/EricGuo5513/text-to-motion.git -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/humanml/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/humanml/data/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/humanml/utils/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/utils/get_opt.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import Namespace 3 | import re 4 | from os.path import join as pjoin 5 | from .word_vectorizer import POS_enumerator 6 | 7 | 8 | def is_float(numStr): 9 | flag = False 10 | numStr = str(numStr).strip().lstrip("-").lstrip("+") 11 | try: 12 | reg = re.compile(r"^[-+]?[0-9]+\.[0-9]+$") 13 | res = reg.match(str(numStr)) 14 | if res: 15 | flag = True 16 | except Exception as ex: 17 | print("is_float() - error: " + str(ex)) 18 | return flag 19 | 20 | 21 | def is_number(numStr): 22 | flag = False 23 | numStr = str(numStr).strip().lstrip("-").lstrip("+") 24 | if str(numStr).isdigit(): 25 | flag = True 26 | return flag 27 | 28 | 29 | def get_opt(opt_path, device): 30 | opt = Namespace() 31 | opt_dict = vars(opt) 32 | 33 | skip = ( 34 | "-------------- End ----------------", 35 | "------------ Options -------------", 36 | "\n", 37 | ) 38 | print("Reading", opt_path) 39 | with open(opt_path) as f: 40 | for line in f: 41 | if line.strip() not in skip: 42 | # print(line.strip()) 43 | key, value = line.strip().split(": ") 44 | if value in ("True", "False"): 45 | opt_dict[key] = bool(value) 46 | elif is_float(value): 47 | opt_dict[key] = float(value) 48 | elif is_number(value): 49 | opt_dict[key] = int(value) 50 | else: 51 | opt_dict[key] = str(value) 52 | 53 | # print(opt) 54 | opt_dict["which_epoch"] = "latest" 55 | opt.save_root = pjoin(opt.checkpoints_dir, opt.dataset_name, opt.name) 56 | opt.model_dir = pjoin(opt.save_root, "model") 57 | opt.meta_dir = pjoin(opt.save_root, "meta") 58 | 59 | if opt.dataset_name == "t2m": 60 | opt.data_root = "./dataset/HumanML3D" 61 | opt.motion_dir = pjoin(opt.data_root, "new_joint_vecs") 62 | opt.text_dir = pjoin(opt.data_root, "texts") 63 | opt.joints_num = 22 64 | opt.dim_pose = 263 65 | opt.max_motion_length = 196 66 | elif opt.dataset_name == "kit": 67 | opt.data_root = "./dataset/KIT-ML" 68 | opt.motion_dir = pjoin(opt.data_root, "new_joint_vecs") 69 | opt.text_dir = pjoin(opt.data_root, "texts") 70 | opt.joints_num = 21 71 | opt.dim_pose = 251 72 | opt.max_motion_length = 196 73 | else: 74 | raise KeyError("Dataset not recognized") 75 | 76 | opt.dim_word = 300 77 | opt.num_classes = 200 // opt.unit_length 78 | opt.dim_pos_ohot = len(POS_enumerator) 79 | opt.is_train = False 80 | opt.is_continue = False 81 | opt.device = device 82 | 83 | return opt 84 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import linalg 3 | 4 | 5 | # (X - X_train)*(X - X_train) = -2X*X_train + X*X + X_train*X_train 6 | def euclidean_distance_matrix(matrix1, matrix2): 7 | """ 8 | Params: 9 | -- matrix1: N1 x D 10 | -- matrix2: N2 x D 11 | Returns: 12 | -- dist: N1 x N2 13 | dist[i, j] == distance(matrix1[i], matrix2[j]) 14 | """ 15 | assert matrix1.shape[1] == matrix2.shape[1] 16 | d1 = -2 * np.dot(matrix1, matrix2.T) # shape (num_test, num_train) 17 | d2 = np.sum(np.square(matrix1), axis=1, keepdims=True) # shape (num_test, 1) 18 | d3 = np.sum(np.square(matrix2), axis=1) # shape (num_train, ) 19 | dists = np.sqrt(d1 + d2 + d3) # broadcasting 20 | return dists 21 | 22 | def calculate_top_k(mat, top_k): 23 | size = mat.shape[0] 24 | gt_mat = np.expand_dims(np.arange(size), 1).repeat(size, 1) 25 | bool_mat = (mat == gt_mat) 26 | correct_vec = False 27 | top_k_list = [] 28 | for i in range(top_k): 29 | # print(correct_vec, bool_mat[:, i]) 30 | correct_vec = (correct_vec | bool_mat[:, i]) 31 | # print(correct_vec) 32 | top_k_list.append(correct_vec[:, None]) 33 | top_k_mat = np.concatenate(top_k_list, axis=1) 34 | return top_k_mat 35 | 36 | 37 | def calculate_R_precision(embedding1, embedding2, top_k, sum_all=False): 38 | dist_mat = euclidean_distance_matrix(embedding1, embedding2) 39 | argmax = np.argsort(dist_mat, axis=1) 40 | top_k_mat = calculate_top_k(argmax, top_k) 41 | if sum_all: 42 | return top_k_mat.sum(axis=0) 43 | else: 44 | return top_k_mat 45 | 46 | 47 | def calculate_matching_score(embedding1, embedding2, sum_all=False): 48 | assert len(embedding1.shape) == 2 49 | assert embedding1.shape[0] == embedding2.shape[0] 50 | assert embedding1.shape[1] == embedding2.shape[1] 51 | 52 | dist = linalg.norm(embedding1 - embedding2, axis=1) 53 | if sum_all: 54 | return dist.sum(axis=0) 55 | else: 56 | return dist 57 | 58 | 59 | 60 | def calculate_activation_statistics(activations): 61 | """ 62 | Params: 63 | -- activation: num_samples x dim_feat 64 | Returns: 65 | -- mu: dim_feat 66 | -- sigma: dim_feat x dim_feat 67 | """ 68 | mu = np.mean(activations, axis=0) 69 | cov = np.cov(activations, rowvar=False) 70 | return mu, cov 71 | 72 | 73 | def calculate_diversity(activation, diversity_times): 74 | assert len(activation.shape) == 2 75 | assert activation.shape[0] > diversity_times 76 | num_samples = activation.shape[0] 77 | 78 | first_indices = np.random.choice(num_samples, diversity_times, replace=False) 79 | second_indices = np.random.choice(num_samples, diversity_times, replace=False) 80 | dist = linalg.norm(activation[first_indices] - activation[second_indices], axis=1) 81 | return dist.mean() 82 | 83 | 84 | def calculate_multimodality(activation, multimodality_times): 85 | assert len(activation.shape) == 3 86 | assert activation.shape[1] > multimodality_times 87 | num_per_sent = activation.shape[1] 88 | 89 | first_dices = np.random.choice(num_per_sent, multimodality_times, replace=False) 90 | second_dices = np.random.choice(num_per_sent, multimodality_times, replace=False) 91 | dist = linalg.norm(activation[:, first_dices] - activation[:, second_dices], axis=2) 92 | return dist.mean() 93 | 94 | 95 | def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): 96 | """Numpy implementation of the Frechet Distance. 97 | The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) 98 | and X_2 ~ N(mu_2, C_2) is 99 | d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). 100 | Stable version by Dougal J. Sutherland. 101 | Params: 102 | -- mu1 : Numpy array containing the activations of a layer of the 103 | inception net (like returned by the function 'get_predictions') 104 | for generated samples. 105 | -- mu2 : The sample mean over activations, precalculated on an 106 | representative dataset set. 107 | -- sigma1: The covariance matrix over activations for generated samples. 108 | -- sigma2: The covariance matrix over activations, precalculated on an 109 | representative dataset set. 110 | Returns: 111 | -- : The Frechet Distance. 112 | """ 113 | 114 | mu1 = np.atleast_1d(mu1) 115 | mu2 = np.atleast_1d(mu2) 116 | 117 | sigma1 = np.atleast_2d(sigma1) 118 | sigma2 = np.atleast_2d(sigma2) 119 | 120 | assert mu1.shape == mu2.shape, \ 121 | 'Training and test mean vectors have different lengths' 122 | assert sigma1.shape == sigma2.shape, \ 123 | 'Training and test covariances have different dimensions' 124 | 125 | diff = mu1 - mu2 126 | 127 | # Product might be almost singular 128 | covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) 129 | if not np.isfinite(covmean).all(): 130 | msg = ('fid calculation produces singular product; ' 131 | 'adding %s to diagonal of cov estimates') % eps 132 | print(msg) 133 | offset = np.eye(sigma1.shape[0]) * eps 134 | covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) 135 | 136 | # Numerical error might give slight imaginary component 137 | if np.iscomplexobj(covmean): 138 | if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): 139 | m = np.max(np.abs(covmean.imag)) 140 | raise ValueError('Imaginary component {}'.format(m)) 141 | covmean = covmean.real 142 | 143 | tr_covmean = np.trace(covmean) 144 | 145 | return (diff.dot(diff) + np.trace(sigma1) + 146 | np.trace(sigma2) - 2 * tr_covmean) -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/utils/paramUtil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Define a kinematic tree for the skeletal struture 4 | kit_kinematic_chain = [ 5 | [0, 11, 12, 13, 14, 15], 6 | [0, 16, 17, 18, 19, 20], 7 | [0, 1, 2, 3, 4], 8 | [3, 5, 6, 7], 9 | [3, 8, 9, 10], 10 | ] 11 | 12 | kit_raw_offsets = np.array( 13 | [ 14 | [0, 0, 0], 15 | [0, 1, 0], 16 | [0, 1, 0], 17 | [0, 1, 0], 18 | [0, 1, 0], 19 | [1, 0, 0], 20 | [0, -1, 0], 21 | [0, -1, 0], 22 | [-1, 0, 0], 23 | [0, -1, 0], 24 | [0, -1, 0], 25 | [1, 0, 0], 26 | [0, -1, 0], 27 | [0, -1, 0], 28 | [0, 0, 1], 29 | [0, 0, 1], 30 | [-1, 0, 0], 31 | [0, -1, 0], 32 | [0, -1, 0], 33 | [0, 0, 1], 34 | [0, 0, 1], 35 | ] 36 | ) 37 | 38 | t2m_raw_offsets = np.array( 39 | [ 40 | [0, 0, 0], 41 | [1, 0, 0], 42 | [-1, 0, 0], 43 | [0, 1, 0], 44 | [0, -1, 0], 45 | [0, -1, 0], 46 | [0, 1, 0], 47 | [0, -1, 0], 48 | [0, -1, 0], 49 | [0, 1, 0], 50 | [0, 0, 1], 51 | [0, 0, 1], 52 | [0, 1, 0], 53 | [1, 0, 0], 54 | [-1, 0, 0], 55 | [0, 0, 1], 56 | [0, -1, 0], 57 | [0, -1, 0], 58 | [0, -1, 0], 59 | [0, -1, 0], 60 | [0, -1, 0], 61 | [0, -1, 0], 62 | ] 63 | ) 64 | 65 | t2m_kinematic_chain = [ 66 | [0, 2, 5, 8, 11], 67 | [0, 1, 4, 7, 10], 68 | [0, 3, 6, 9, 12, 15], 69 | [9, 14, 17, 19, 21], 70 | [9, 13, 16, 18, 20], 71 | ] 72 | t2m_left_hand_chain = [ 73 | [20, 22, 23, 24], 74 | [20, 34, 35, 36], 75 | [20, 25, 26, 27], 76 | [20, 31, 32, 33], 77 | [20, 28, 29, 30], 78 | ] 79 | t2m_right_hand_chain = [ 80 | [21, 43, 44, 45], 81 | [21, 46, 47, 48], 82 | [21, 40, 41, 42], 83 | [21, 37, 38, 39], 84 | [21, 49, 50, 51], 85 | ] 86 | 87 | 88 | kit_tgt_skel_id = "03950" 89 | 90 | t2m_tgt_skel_id = "000021" 91 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/humanml/utils/plot_script.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | # import cv2 4 | from textwrap import wrap 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | import mpl_toolkits.mplot3d.axes3d as p3 9 | import numpy as np 10 | from matplotlib.animation import FFMpegFileWriter, FuncAnimation 11 | from mpl_toolkits.mplot3d import Axes3D 12 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection 13 | 14 | import tma.data.humanml.utils.paramUtil as paramUtil 15 | 16 | skeleton = paramUtil.t2m_kinematic_chain 17 | 18 | 19 | def list_cut_average(ll, intervals): 20 | if intervals == 1: 21 | return ll 22 | 23 | bins = math.ceil(len(ll) * 1.0 / intervals) 24 | ll_new = [] 25 | for i in range(bins): 26 | l_low = intervals * i 27 | l_high = l_low + intervals 28 | l_high = l_high if l_high < len(ll) else len(ll) 29 | ll_new.append(np.mean(ll[l_low:l_high])) 30 | return ll_new 31 | 32 | 33 | def plot_3d_motion( 34 | save_path, joints, title, figsize=(3, 3), fps=120, radius=3, kinematic_tree=skeleton 35 | ): 36 | matplotlib.use("Agg") 37 | 38 | # title_sp = title.split(' ') 39 | # if len(title_sp) > 20: 40 | # title = '\n'.join([' '.join(title_sp[:10]), ' '.join(title_sp[10:20]), ' '.join(title_sp[20:])]) 41 | # elif len(title_sp) > 10: 42 | # title = '\n'.join([' '.join(title_sp[:10]), ' '.join(title_sp[10:])]) 43 | title = "\n".join(wrap(title, 20)) 44 | 45 | def init(): 46 | ax.set_xlim3d([-radius / 2, radius / 2]) 47 | ax.set_ylim3d([0, radius]) 48 | ax.set_zlim3d([-radius / 3.0, radius * 2 / 3.0]) 49 | # print(title) 50 | fig.suptitle(title, fontsize=10) 51 | ax.grid(b=False) 52 | 53 | def plot_xzPlane(minx, maxx, miny, minz, maxz): 54 | # Plot a plane XZ 55 | verts = [ 56 | [minx, miny, minz], 57 | [minx, miny, maxz], 58 | [maxx, miny, maxz], 59 | [maxx, miny, minz], 60 | ] 61 | xz_plane = Poly3DCollection([verts]) 62 | xz_plane.set_facecolor((0.5, 0.5, 0.5, 0.5)) 63 | ax.add_collection3d(xz_plane) 64 | 65 | # return ax 66 | 67 | # (seq_len, joints_num, 3) 68 | data = joints.copy().reshape(len(joints), -1, 3) 69 | fig = plt.figure(figsize=figsize) 70 | plt.tight_layout() 71 | ax = p3.Axes3D(fig) 72 | init() 73 | MINS = data.min(axis=0).min(axis=0) 74 | MAXS = data.max(axis=0).max(axis=0) 75 | # colors = ['red', 'blue', 'black', 'red', 'blue', 76 | # 'darkblue', 'darkblue', 'darkblue', 'darkblue', 'darkblue', 77 | # 'darkred', 'darkred', 'darkred', 'darkred', 'darkred'] 78 | colors = [ 79 | "#DD5A37", 80 | "#D69E00", 81 | "#B75A39", 82 | "#DD5A37", 83 | "#D69E00", 84 | "#FF6D00", 85 | "#FF6D00", 86 | "#FF6D00", 87 | "#FF6D00", 88 | "#FF6D00", 89 | "#DDB50E", 90 | "#DDB50E", 91 | "#DDB50E", 92 | "#DDB50E", 93 | "#DDB50E", 94 | ] 95 | 96 | frame_number = data.shape[0] 97 | # print(dataset.shape) 98 | 99 | height_offset = MINS[1] 100 | data[:, :, 1] -= height_offset 101 | trajec = data[:, 0, [0, 2]] 102 | 103 | data[..., 0] -= data[:, 0:1, 0] 104 | data[..., 2] -= data[:, 0:1, 2] 105 | 106 | def update(index): 107 | # ax.lines = [] 108 | # ax.collections = [] 109 | ax.view_init(elev=120, azim=-90) 110 | ax.dist = 7.5 111 | # ax = 112 | plot_xzPlane( 113 | MINS[0] - trajec[index, 0], 114 | MAXS[0] - trajec[index, 0], 115 | 0, 116 | MINS[2] - trajec[index, 1], 117 | MAXS[2] - trajec[index, 1], 118 | ) 119 | 120 | for i, (chain, color) in enumerate(zip(kinematic_tree, colors)): 121 | if i < 5: 122 | linewidth = 4.0 123 | else: 124 | linewidth = 2.0 125 | ax.plot3D( 126 | data[index, chain, 0], 127 | data[index, chain, 1], 128 | data[index, chain, 2], 129 | linewidth=linewidth, 130 | color=color, 131 | ) 132 | 133 | plt.axis("off") 134 | ax.set_xticklabels([]) 135 | ax.set_yticklabels([]) 136 | ax.set_zticklabels([]) 137 | 138 | ani = FuncAnimation( 139 | fig, update, frames=frame_number, interval=1000 / fps, repeat=False 140 | ) 141 | 142 | ani.save(save_path, fps=fps) 143 | plt.close() 144 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import FrameSampler 2 | from .framerate import subsample, upsample 3 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/sampling/base.py: -------------------------------------------------------------------------------- 1 | from .frames import get_frameix_from_data_index 2 | 3 | 4 | class FrameSampler: 5 | def __init__( 6 | self, 7 | sampling="conseq", 8 | sampling_step=1, 9 | request_frames=None, 10 | threshold_reject=0.75, 11 | max_len=1000, 12 | min_len=10, 13 | ): 14 | self.sampling = sampling 15 | 16 | self.sampling_step = sampling_step 17 | self.request_frames = request_frames 18 | self.threshold_reject = threshold_reject 19 | self.max_len = max_len 20 | self.min_len = min_len 21 | 22 | def __call__(self, num_frames): 23 | 24 | return get_frameix_from_data_index( 25 | num_frames, self.request_frames, self.sampling, self.sampling_step 26 | ) 27 | 28 | def accept(self, duration): 29 | # Outputs have original lengths 30 | # Check if it is too long 31 | if self.request_frames is None: 32 | if duration > self.max_len: 33 | return False 34 | elif duration < self.min_len: 35 | return False 36 | else: 37 | # Reject sample if the length is 38 | # too little relative to 39 | # the request frames 40 | min_number = self.threshold_reject * self.request_frames 41 | if duration < min_number: 42 | return False 43 | return True 44 | 45 | def get(self, key, default=None): 46 | return getattr(self, key, default) 47 | 48 | def __getitem__(self, key): 49 | return getattr(self, key) 50 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/sampling/framerate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # TODO: use a real subsampler.. 5 | def subsample(num_frames, last_framerate, new_framerate): 6 | step = int(last_framerate / new_framerate) 7 | assert step >= 1 8 | frames = np.arange(0, num_frames, step) 9 | return frames 10 | 11 | 12 | # TODO: use a real upsampler.. 13 | def upsample(motion, last_framerate, new_framerate): 14 | step = int(new_framerate / last_framerate) 15 | assert step >= 1 16 | 17 | # Alpha blending => interpolation 18 | alpha = np.linspace(0, 1, step + 1) 19 | last = np.einsum("l,...->l...", 1 - alpha, motion[:-1]) 20 | new = np.einsum("l,...->l...", alpha, motion[1:]) 21 | 22 | chuncks = (last + new)[:-1] 23 | output = np.concatenate(chuncks.swapaxes(1, 0)) 24 | # Don't forget the last one 25 | output = np.concatenate((output, motion[[-1]])) 26 | return output 27 | 28 | 29 | if __name__ == "__main__": 30 | motion = np.arange(105) 31 | submotion = motion[subsample(len(motion), 100.0, 12.5)] 32 | newmotion = upsample(submotion, 12.5, 100) 33 | 34 | print(newmotion) 35 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/sampling/frames.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import numpy as np 4 | from numpy import ndarray as Array 5 | import random 6 | 7 | 8 | def get_frameix_from_data_index( 9 | num_frames: int, 10 | request_frames: Optional[int], 11 | sampling: str = "conseq", 12 | sampling_step: int = 1, 13 | ) -> Array: 14 | nframes = num_frames 15 | 16 | if request_frames is None: 17 | frame_ix = np.arange(nframes) 18 | else: 19 | # sampling goal: input: ----------- 11 nframes 20 | # o--o--o--o- 4 ninputs 21 | # 22 | # step number is computed like that: [(11-1)/(4-1)] = 3 23 | # [---][---][---][- 24 | # So step = 3, and we take 0 to step*ninputs+1 with steps 25 | # [o--][o--][o--][o-] 26 | # then we can randomly shift the vector 27 | # -[o--][o--][o--]o 28 | # If there are too much frames required 29 | if request_frames > nframes: 30 | fair = False # True 31 | if fair: 32 | # distills redundancy everywhere 33 | choices = np.random.choice(range(nframes), request_frames, replace=True) 34 | frame_ix = sorted(choices) 35 | else: 36 | # adding the last frame until done 37 | ntoadd = max(0, request_frames - nframes) 38 | lastframe = nframes - 1 39 | padding = lastframe * np.ones(ntoadd, dtype=int) 40 | frame_ix = np.concatenate((np.arange(0, nframes), padding)) 41 | 42 | elif sampling in ["conseq", "random_conseq"]: 43 | step_max = (nframes - 1) // (request_frames - 1) 44 | if sampling == "conseq": 45 | if ( 46 | sampling_step == -1 47 | or sampling_step * (request_frames - 1) >= nframes 48 | ): 49 | step = step_max 50 | else: 51 | step = sampling_step 52 | elif sampling == "random_conseq": 53 | step = random.randint(1, step_max) 54 | 55 | lastone = step * (request_frames - 1) 56 | shift_max = nframes - lastone - 1 57 | shift = random.randint(0, max(0, shift_max - 1)) 58 | frame_ix = shift + np.arange(0, lastone + 1, step) 59 | 60 | elif sampling == "random": 61 | choices = np.random.choice(range(nframes), request_frames, replace=False) 62 | frame_ix = sorted(choices) 63 | 64 | else: 65 | raise ValueError("Sampling not recognized.") 66 | 67 | return frame_ix 68 | -------------------------------------------------------------------------------- /OpenTMA/tma/data/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def lengths_to_mask(lengths): 5 | max_len = max(lengths) 6 | mask = torch.arange(max_len, device=lengths.device).expand( 7 | len(lengths), max_len 8 | ) < lengths.unsqueeze(1) 9 | return mask 10 | 11 | 12 | # padding to max length in one batch 13 | def collate_tensors(batch): 14 | dims = batch[0].dim() 15 | max_size = [max([b.size(i) for b in batch]) for i in range(dims)] 16 | size = (len(batch),) + tuple(max_size) 17 | canvas = batch[0].new_zeros(size=size) 18 | for i, b in enumerate(batch): 19 | sub_tensor = canvas[i] 20 | for d in range(dims): 21 | sub_tensor = sub_tensor.narrow(d, 0, b.size(d)) 22 | sub_tensor.add_(b) 23 | return canvas 24 | 25 | 26 | def all_collate(batch): 27 | notnone_batches = [b for b in batch if b is not None] 28 | databatch = [b["motion"] for b in notnone_batches] 29 | # labelbatch = [b['target'] for b in notnone_batches] 30 | if "lengths" in notnone_batches[0]: 31 | lenbatch = [b["lengths"] for b in notnone_batches] 32 | else: 33 | lenbatch = [len(b["inp"][0][0]) for b in notnone_batches] 34 | 35 | databatchTensor = collate_tensors(databatch) 36 | # labelbatchTensor = torch.as_tensor(labelbatch) 37 | lenbatchTensor = torch.as_tensor(lenbatch) 38 | maskbatchTensor = ( 39 | lengths_to_mask(lenbatchTensor, databatchTensor.shape[-1]) 40 | .unsqueeze(1) 41 | .unsqueeze(1) 42 | ) # unqueeze for broadcasting 43 | 44 | motion = databatchTensor 45 | cond = {"y": {"mask": maskbatchTensor, "lengths": lenbatchTensor}} 46 | 47 | if "text" in notnone_batches[0]: 48 | textbatch = [b["text"] for b in notnone_batches] 49 | cond["y"].update({"text": textbatch}) 50 | 51 | # collate action textual names 52 | if "action_text" in notnone_batches[0]: 53 | action_text = [b["action_text"] for b in notnone_batches] 54 | cond["y"].update({"action_text": action_text}) 55 | 56 | return motion, cond 57 | 58 | 59 | # an adapter to our collate func 60 | def tma_collate(batch): 61 | notnone_batches = [b for b in batch if b is not None] 62 | notnone_batches.sort(key=lambda x: x[3], reverse=True) 63 | # batch.sort(key=lambda x: x[3], reverse=True) 64 | adapted_batch = { 65 | "motion": collate_tensors( 66 | [torch.tensor(b[4]).float() for b in notnone_batches] 67 | ), 68 | "text": [b[2] for b in notnone_batches], 69 | "length": [b[5] for b in notnone_batches], 70 | "word_embs": collate_tensors( 71 | [torch.tensor(b[0]).float() for b in notnone_batches] 72 | ), 73 | "pos_ohot": collate_tensors( 74 | [torch.tensor(b[1]).float() for b in notnone_batches] 75 | ), 76 | "text_len": collate_tensors([torch.tensor(b[3]) for b in notnone_batches]), 77 | "tokens": [b[6] for b in notnone_batches], 78 | "retrieval_name": [b[7] for b in notnone_batches], 79 | } 80 | return adapted_batch 81 | 82 | 83 | def tma_collate_text_all(batch): 84 | # import pdb; pdb.set_trace() 85 | notnone_batches = [b for b in batch if b is not None] 86 | notnone_batches.sort(key=lambda x: x[3], reverse=True) 87 | # batch.sort(key=lambda x: x[3], reverse=True) 88 | adapted_batch = { 89 | "motion": collate_tensors( 90 | [torch.tensor(b[4]).float() for b in notnone_batches] 91 | ), 92 | "text": [b[2] for b in notnone_batches], 93 | "length": [b[5] for b in notnone_batches], 94 | "word_embs": collate_tensors( 95 | [torch.tensor(b[0]).float() for b in notnone_batches] 96 | ), 97 | "pos_ohot": collate_tensors( 98 | [torch.tensor(b[1]).float() for b in notnone_batches] 99 | ), 100 | "text_len": collate_tensors( 101 | [torch.tensor(b[3]).float() for b in notnone_batches] 102 | ), 103 | "tokens": [b[6] for b in notnone_batches], 104 | "body_text": [b[7] for b in notnone_batches], 105 | "hand_text": [b[8] for b in notnone_batches], 106 | "face_text": [b[9] for b in notnone_batches], 107 | } 108 | return adapted_batch 109 | 110 | 111 | def a2m_collate(batch): 112 | 113 | databatch = [b[0] for b in batch] 114 | labelbatch = [b[1] for b in batch] 115 | lenbatch = [len(b[0][0][0]) for b in batch] 116 | labeltextbatch = [b[3] for b in batch] 117 | 118 | databatchTensor = collate_tensors(databatch) 119 | labelbatchTensor = torch.as_tensor(labelbatch).unsqueeze(1) 120 | lenbatchTensor = torch.as_tensor(lenbatch) 121 | 122 | maskbatchTensor = lengths_to_mask(lenbatchTensor) 123 | adapted_batch = { 124 | "motion": databatchTensor.permute(0, 3, 2, 1).flatten(start_dim=2), 125 | "action": labelbatchTensor, 126 | "action_text": labeltextbatch, 127 | "mask": maskbatchTensor, 128 | "length": lenbatchTensor, 129 | } 130 | return adapted_batch 131 | -------------------------------------------------------------------------------- /OpenTMA/tma/launch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/launch/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/launch/prepare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from pathlib import Path 4 | 5 | import hydra 6 | from mld.tools.runid import generate_id 7 | from omegaconf import OmegaConf 8 | 9 | 10 | # Local paths 11 | def code_path(path=""): 12 | code_dir = hydra.utils.get_original_cwd() 13 | code_dir = Path(code_dir) 14 | return str(code_dir / path) 15 | 16 | 17 | def working_path(path): 18 | return str(Path(os.getcwd()) / path) 19 | 20 | 21 | # fix the id for this run 22 | ID = generate_id() 23 | 24 | 25 | def generate_id(): 26 | return ID 27 | 28 | 29 | def get_last_checkpoint(path, ckpt_name="last.ckpt"): 30 | output_dir = Path(hydra.utils.to_absolute_path(path)) 31 | last_ckpt_path = output_dir / "checkpoints" / ckpt_name 32 | return str(last_ckpt_path) 33 | 34 | 35 | def get_kitname(load_amass_data: bool, load_with_rot: bool): 36 | if not load_amass_data: 37 | return "kit-mmm-xyz" 38 | if load_amass_data and not load_with_rot: 39 | return "kit-amass-xyz" 40 | if load_amass_data and load_with_rot: 41 | return "kit-amass-rot" 42 | 43 | 44 | OmegaConf.register_new_resolver("code_path", code_path) 45 | OmegaConf.register_new_resolver("working_path", working_path) 46 | OmegaConf.register_new_resolver("generate_id", generate_id) 47 | OmegaConf.register_new_resolver("absolute_path", hydra.utils.to_absolute_path) 48 | OmegaConf.register_new_resolver("get_last_checkpoint", get_last_checkpoint) 49 | OmegaConf.register_new_resolver("get_kitname", get_kitname) 50 | 51 | 52 | # Remove warnings 53 | warnings.filterwarnings( 54 | "ignore", ".*Trying to infer the `batch_size` from an ambiguous collection.*" 55 | ) 56 | 57 | warnings.filterwarnings( 58 | "ignore", ".*does not have many workers which may be a bottleneck*" 59 | ) 60 | 61 | warnings.filterwarnings( 62 | "ignore", ".*Our suggested max number of worker in current system is*" 63 | ) 64 | 65 | 66 | # os.environ["HYDRA_FULL_ERROR"] = "1" 67 | os.environ["NUMEXPR_MAX_THREADS"] = "24" 68 | -------------------------------------------------------------------------------- /OpenTMA/tma/launch/tools.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from omegaconf import DictConfig, OmegaConf 3 | import hydra 4 | import os 5 | 6 | 7 | def resolve_cfg_path(cfg: DictConfig): 8 | working_dir = os.getcwd() 9 | cfg.working_dir = working_dir 10 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/fc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Encoder_FC(nn.Module): 7 | def __init__( 8 | self, 9 | modeltype, 10 | njoints, 11 | nfeats, 12 | num_frames, 13 | num_classes, 14 | translation, 15 | pose_rep, 16 | glob, 17 | glob_rot, 18 | latent_dim=256, 19 | **kargs 20 | ): 21 | super().__init__() 22 | 23 | self.modeltype = modeltype 24 | self.njoints = njoints 25 | self.nfeats = nfeats 26 | self.num_frames = num_frames 27 | self.num_classes = num_classes 28 | self.translation = translation 29 | self.pose_rep = pose_rep 30 | self.glob = glob 31 | self.glob_rot = glob_rot 32 | 33 | self.latent_dim = latent_dim 34 | 35 | self.activation = nn.GELU() 36 | 37 | self.input_dim = self.njoints * self.nfeats * self.num_frames + self.num_classes 38 | 39 | self.fully_connected = nn.Sequential( 40 | nn.Linear(self.input_dim, 512), nn.GELU(), nn.Linear(512, 256), nn.GELU() 41 | ) 42 | if self.modeltype == "cvae": 43 | self.mu = nn.Linear(256, self.latent_dim) 44 | self.var = nn.Linear(256, self.latent_dim) 45 | else: 46 | self.final = nn.Linear(256, self.latent_dim) 47 | 48 | def forward(self, batch): 49 | x, y = batch["x"], batch["y"] 50 | bs, njoints, feats, nframes = x.size() 51 | if (njoints * feats * nframes) != self.njoints * self.nfeats * self.num_frames: 52 | raise ValueError("This model is not adapted with this input") 53 | 54 | if len(y.shape) == 1: # can give on hot encoded as input 55 | y = F.one_hot(y, self.num_classes) 56 | y = y.to(dtype=x.dtype) 57 | x = x.reshape(bs, njoints * feats * nframes) 58 | x = torch.cat((x, y), 1) 59 | 60 | x = self.fully_connected(x) 61 | 62 | if self.modeltype == "cvae": 63 | return {"mu": self.mu(x), "logvar": self.var(x)} 64 | else: 65 | return {"z": self.final(x)} 66 | 67 | 68 | class Decoder_FC(nn.Module): 69 | def __init__( 70 | self, 71 | modeltype, 72 | njoints, 73 | nfeats, 74 | num_frames, 75 | num_classes, 76 | translation, 77 | pose_rep, 78 | glob, 79 | glob_rot, 80 | latent_dim=256, 81 | **kargs 82 | ): 83 | super().__init__() 84 | 85 | self.modeltype = modeltype 86 | self.njoints = njoints 87 | self.nfeats = nfeats 88 | self.num_frames = num_frames 89 | self.num_classes = num_classes 90 | self.translation = translation 91 | self.pose_rep = pose_rep 92 | self.glob = glob 93 | self.glob_rot = glob_rot 94 | 95 | self.latent_dim = latent_dim 96 | 97 | self.input_dim = self.latent_dim + self.num_classes 98 | self.output_dim = self.njoints * self.nfeats * self.num_frames 99 | 100 | self.fully_connected = nn.Sequential( 101 | nn.Linear(self.input_dim, 256), 102 | nn.GELU(), 103 | nn.Linear(256, 512), 104 | nn.GELU(), 105 | nn.Linear(512, self.output_dim), 106 | nn.GELU(), 107 | ) 108 | 109 | def forward(self, batch): 110 | z, y = batch["z"], batch["y"] 111 | # z: [batch_size, latent_dim] 112 | # y: [batch_size] 113 | if len(y.shape) == 1: # can give on hot encoded as input 114 | y = F.one_hot(y, self.num_classes) 115 | y = y.to(dtype=z.dtype) # y: [batch_size, num_classes] 116 | # z: [batch_size, latent_dim+num_classes] 117 | z = torch.cat((z, y), dim=1) 118 | 119 | z = self.fully_connected(z) 120 | 121 | bs, _ = z.size() 122 | 123 | z = z.reshape(bs, self.njoints, self.nfeats, self.num_frames) 124 | batch["output"] = z 125 | return batch 126 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/humanact12_gru.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # adapted from action2motion to take inputs of different lengths 6 | class MotionDiscriminator(nn.Module): 7 | 8 | def __init__( 9 | self, input_size, hidden_size, hidden_layer, output_size=12, use_noise=None 10 | ): 11 | super().__init__() 12 | 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | self.hidden_layer = hidden_layer 16 | self.use_noise = use_noise 17 | 18 | self.recurrent = nn.GRU(input_size, hidden_size, hidden_layer) 19 | self.linear1 = nn.Linear(hidden_size, 30) 20 | self.linear2 = nn.Linear(30, output_size) 21 | 22 | def forward(self, motion_sequence, lengths=None, hidden_unit=None): 23 | # dim (motion_length, num_samples, hidden_size) 24 | bs, njoints, nfeats, num_frames = motion_sequence.shape 25 | motion_sequence = motion_sequence.reshape(bs, njoints * nfeats, num_frames) 26 | motion_sequence = motion_sequence.permute(2, 0, 1) 27 | if hidden_unit is None: 28 | hidden_unit = self.initHidden( 29 | motion_sequence.size(1), self.hidden_layer 30 | ).to(motion_sequence.device) 31 | gru_o, _ = self.recurrent(motion_sequence.float(), hidden_unit) 32 | 33 | # select the last valid, instead of: gru_o[-1, :, :] 34 | out = gru_o[ 35 | tuple( 36 | torch.stack( 37 | (lengths - 1, torch.arange(bs, device=motion_sequence.device)) 38 | ) 39 | ) 40 | ] 41 | 42 | # dim (num_samples, 30) 43 | lin1 = self.linear1(out) 44 | lin1 = torch.tanh(lin1) 45 | # dim (num_samples, output_size) 46 | lin2 = self.linear2(lin1) 47 | return lin2 48 | 49 | def initHidden(self, num_samples, layer): 50 | return torch.randn(layer, num_samples, self.hidden_size, requires_grad=False) 51 | 52 | 53 | class MotionDiscriminatorForFID(MotionDiscriminator): 54 | 55 | def forward(self, motion_sequence, lengths=None, hidden_unit=None): 56 | # dim (motion_length, num_samples, hidden_size) 57 | bs, njoints, nfeats, num_frames = motion_sequence.shape 58 | motion_sequence = motion_sequence.reshape(bs, njoints * nfeats, num_frames) 59 | motion_sequence = motion_sequence.permute(2, 0, 1) 60 | if hidden_unit is None: 61 | # motion_sequence = motion_sequence.permute(1, 0, 2) 62 | hidden_unit = self.initHidden( 63 | motion_sequence.size(1), self.hidden_layer 64 | ).to(motion_sequence.device) 65 | gru_o, _ = self.recurrent(motion_sequence.float(), hidden_unit) 66 | 67 | # select the last valid, instead of: gru_o[-1, :, :] 68 | out = gru_o[ 69 | tuple( 70 | torch.stack( 71 | (lengths - 1, torch.arange(bs, device=motion_sequence.device)) 72 | ) 73 | ) 74 | ] 75 | 76 | # dim (num_samples, 30) 77 | lin1 = self.linear1(out) 78 | lin1 = torch.tanh(lin1) 79 | return lin1 80 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/t2m_motionenc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.utils.rnn import pack_padded_sequence 4 | 5 | 6 | class MovementConvEncoder(nn.Module): 7 | def __init__(self, input_size, hidden_size, output_size): 8 | super(MovementConvEncoder, self).__init__() 9 | self.main = nn.Sequential( 10 | nn.Conv1d(input_size, hidden_size, 4, 2, 1), 11 | nn.Dropout(0.2, inplace=True), 12 | nn.LeakyReLU(0.2, inplace=True), 13 | nn.Conv1d(hidden_size, output_size, 4, 2, 1), 14 | nn.Dropout(0.2, inplace=True), 15 | nn.LeakyReLU(0.2, inplace=True), 16 | ) 17 | self.out_net = nn.Linear(output_size, output_size) 18 | 19 | def forward(self, inputs): 20 | inputs = inputs.permute(0, 2, 1) 21 | outputs = self.main(inputs).permute(0, 2, 1) 22 | return self.out_net(outputs) 23 | 24 | 25 | class MotionEncoderBiGRUCo(nn.Module): 26 | def __init__(self, input_size, hidden_size, output_size): 27 | super(MotionEncoderBiGRUCo, self).__init__() 28 | 29 | self.input_emb = nn.Linear(input_size, hidden_size) 30 | self.gru = nn.GRU( 31 | hidden_size, hidden_size, batch_first=True, bidirectional=True 32 | ) 33 | self.output_net = nn.Sequential( 34 | nn.Linear(hidden_size * 2, hidden_size), 35 | nn.LayerNorm(hidden_size), 36 | nn.LeakyReLU(0.2, inplace=True), 37 | nn.Linear(hidden_size, output_size), 38 | ) 39 | self.hidden_size = hidden_size 40 | self.hidden = nn.Parameter( 41 | torch.randn((2, 1, self.hidden_size), requires_grad=True) 42 | ) 43 | 44 | # input(batch_size, seq_len, dim) 45 | def forward(self, inputs, m_lens): 46 | num_samples = inputs.shape[0] 47 | 48 | input_embs = self.input_emb(inputs) 49 | hidden = self.hidden.repeat(1, num_samples, 1) 50 | 51 | cap_lens = m_lens.data.tolist() 52 | emb = pack_padded_sequence(input_embs, cap_lens, batch_first=True) 53 | 54 | gru_seq, gru_last = self.gru(emb, hidden) 55 | 56 | gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1) 57 | 58 | return self.output_net(gru_last) 59 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/t2m_textenc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.utils.rnn import pack_padded_sequence 4 | 5 | 6 | class TextEncoderBiGRUCo(nn.Module): 7 | def __init__(self, word_size, pos_size, hidden_size, output_size): 8 | super(TextEncoderBiGRUCo, self).__init__() 9 | 10 | self.pos_emb = nn.Linear(pos_size, word_size) 11 | self.input_emb = nn.Linear(word_size, hidden_size) 12 | self.gru = nn.GRU( 13 | hidden_size, hidden_size, batch_first=True, bidirectional=True 14 | ) 15 | self.output_net = nn.Sequential( 16 | nn.Linear(hidden_size * 2, hidden_size), 17 | nn.LayerNorm(hidden_size), 18 | nn.LeakyReLU(0.2, inplace=True), 19 | nn.Linear(hidden_size, output_size), 20 | ) 21 | 22 | self.hidden_size = hidden_size 23 | self.hidden = nn.Parameter( 24 | torch.randn((2, 1, self.hidden_size), requires_grad=True) 25 | ) 26 | 27 | def forward(self, word_embs, pos_onehot, cap_lens): 28 | num_samples = word_embs.shape[0] 29 | 30 | pos_embs = self.pos_emb(pos_onehot) 31 | inputs = word_embs + pos_embs 32 | input_embs = self.input_emb(inputs) 33 | hidden = self.hidden.repeat(1, num_samples, 1) 34 | 35 | cap_lens = cap_lens.data.tolist() 36 | emb = pack_padded_sequence(input_embs, cap_lens, batch_first=True) 37 | 38 | gru_seq, gru_last = self.gru(emb, hidden) 39 | 40 | gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1) 41 | 42 | return self.output_net(gru_last) 43 | 44 | 45 | class TextEncoderBiGRUCoV2(nn.Module): 46 | def __init__(self, word_size, pos_size, hidden_size, output_size, dataset=None): 47 | super(TextEncoderBiGRUCoV2, self).__init__() 48 | if dataset == "unimocap": 49 | self.pos_emb = nn.Linear(pos_size, word_size) 50 | self.input_emb = nn.Linear(word_size, hidden_size) 51 | self.gru = nn.GRU( 52 | hidden_size, hidden_size, batch_first=True, bidirectional=True 53 | ) 54 | self.output_net = nn.Sequential( 55 | nn.Linear(hidden_size * 2, hidden_size), 56 | nn.LayerNorm(hidden_size), 57 | nn.LeakyReLU(0.2, inplace=True), 58 | nn.Linear(hidden_size, output_size), 59 | ) 60 | 61 | self.hidden_size = hidden_size 62 | self.hidden = nn.Parameter( 63 | torch.randn((2, 1, self.hidden_size), requires_grad=True) 64 | ) 65 | 66 | def forward(self, word_embs, cap_lens): 67 | num_samples = word_embs.shape[0] 68 | 69 | inputs = word_embs 70 | input_embs = self.input_emb(inputs) 71 | hidden = self.hidden.repeat(1, num_samples, 1) 72 | 73 | cap_lens = cap_lens.data.tolist() 74 | emb = pack_padded_sequence(input_embs, cap_lens, batch_first=True) 75 | 76 | gru_seq, gru_last = self.gru(emb, hidden) 77 | 78 | gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1) 79 | 80 | return self.output_net(gru_last) 81 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/motiondecoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/motiondecoder/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/motiondecoder/actor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import pytorch_lightning as pl 5 | 6 | from typing import List, Optional 7 | from torch import nn, Tensor 8 | 9 | from tma.models.operator import PositionalEncoding 10 | from tma.utils.temos_utils import lengths_to_mask 11 | 12 | 13 | class ActorAgnosticDecoder(pl.LightningModule): 14 | """ 15 | This class is a decoder module for actor-agnostic features. It uses a transformer-based architecture for decoding. 16 | 17 | Args: 18 | nfeats (int): The number of features in the input. 19 | latent_dim (int, optional): The dimensionality of the latent space. Defaults to 256. 20 | ff_size (int, optional): The dimensionality of the feedforward network model. Defaults to 1024. 21 | num_layers (int, optional): The number of sub-encoder-layers in the transformer model. Defaults to 4. 22 | num_heads (int, optional): The number of heads in the multiheadattention models. Defaults to 4. 23 | dropout (float, optional): The dropout value. Defaults to 0.1. 24 | activation (str, optional): The activation function of intermediate layer, relu or gelu. Defaults to "gelu". 25 | """ 26 | 27 | def __init__( 28 | self, 29 | nfeats: int, 30 | latent_dim: int = 256, 31 | ff_size: int = 1024, 32 | num_layers: int = 4, 33 | num_heads: int = 4, 34 | dropout: float = 0.1, 35 | activation: str = "gelu", 36 | **kwargs 37 | ) -> None: 38 | 39 | super().__init__() 40 | self.save_hyperparameters(logger=False) 41 | 42 | output_feats = nfeats 43 | 44 | self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout) 45 | 46 | # Transformer decoder 47 | seq_trans_decoder_layer = nn.TransformerDecoderLayer( 48 | d_model=latent_dim, 49 | nhead=num_heads, 50 | dim_feedforward=ff_size, 51 | dropout=dropout, 52 | activation=activation, 53 | ) 54 | 55 | self.seqTransDecoder = nn.TransformerDecoder( 56 | seq_trans_decoder_layer, num_layers=num_layers 57 | ) 58 | 59 | # Final linear layer 60 | self.final_layer = nn.Linear(latent_dim, output_feats) 61 | 62 | def forward(self, z: Tensor, lengths: List[int]): 63 | """ 64 | Forward pass for the decoder. 65 | 66 | Args: 67 | z (Tensor): The input tensor. 68 | lengths (List[int]): The lengths of the sequences. 69 | 70 | Returns: 71 | Tensor: The output features. 72 | """ 73 | 74 | # Create a mask based on the lengths 75 | mask = lengths_to_mask(lengths, z.device) 76 | latent_dim = z.shape[1] 77 | bs, nframes = mask.shape 78 | nfeats = self.hparams.nfeats 79 | 80 | z = z[None] # sequence of 1 element for the memory 81 | 82 | # Construct time queries 83 | time_queries = torch.zeros(nframes, bs, latent_dim, device=z.device) 84 | time_queries = self.sequence_pos_encoding(time_queries) 85 | 86 | # Pass through the transformer decoder 87 | # with the latent vector for memory 88 | output = self.seqTransDecoder( 89 | tgt=time_queries, memory=z, tgt_key_padding_mask=~mask 90 | ) 91 | 92 | output = self.final_layer(output) 93 | # zero for padded area 94 | output[~mask.T] = 0 95 | # Pytorch Transformer: [Sequence, Batch size, ...] 96 | feats = output.permute(1, 0, 2) 97 | return feats 98 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/motiondecoder/gru.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import pytorch_lightning as pl 5 | 6 | from typing import List, Optional 7 | from torch import nn, Tensor 8 | 9 | from tma.models.operator import PositionalEncoding 10 | from tma.utils.temos_utils import lengths_to_mask 11 | 12 | 13 | class GRUDecoder(pl.LightningModule): 14 | """ 15 | This class is a decoder module for features using a GRU-based architecture. 16 | 17 | Args: 18 | nfeats (int): The number of features in the input. 19 | latent_dim (int, optional): The dimensionality of the latent space. Defaults to 256. 20 | num_layers (int, optional): The number of layers in the GRU model. Defaults to 4. 21 | """ 22 | 23 | def __init__( 24 | self, nfeats: int, latent_dim: int = 256, num_layers: int = 4, **kwargs 25 | ) -> None: 26 | 27 | super().__init__() 28 | self.save_hyperparameters(logger=False) 29 | 30 | output_feats = nfeats 31 | 32 | # Embedding layer to transform the input 33 | self.emb_layer = nn.Linear(latent_dim + 1, latent_dim) 34 | 35 | # GRU layer 36 | self.gru = nn.GRU(latent_dim, latent_dim, num_layers=num_layers) 37 | 38 | # Final linear layer 39 | self.final_layer = nn.Linear(latent_dim, output_feats) 40 | 41 | def forward(self, z: Tensor, lengths: List[int]): 42 | """ 43 | Forward pass for the decoder. 44 | 45 | Args: 46 | z (Tensor): The input tensor. 47 | lengths (List[int]): The lengths of the sequences. 48 | 49 | Returns: 50 | Tensor: The output features. 51 | """ 52 | 53 | # Create a mask based on the lengths 54 | mask = lengths_to_mask(lengths, z.device) 55 | latent_dim = z.shape[1] 56 | bs, nframes = mask.shape 57 | nfeats = self.hparams.nfeats 58 | 59 | lengths = torch.tensor(lengths, device=z.device) 60 | 61 | # Repeat the input 62 | z = z[None].repeat((nframes, 1, 1)) 63 | 64 | # Add time information to the input 65 | time = mask * 1 / (lengths[..., None] - 1) 66 | time = (time[:, None] * torch.arange(time.shape[1], device=z.device))[:, 0] 67 | time = time.T[..., None] 68 | z = torch.cat((z, time), 2) 69 | 70 | # emb to latent space again 71 | z = self.emb_layer(z) 72 | 73 | # pass to gru 74 | z = self.gru(z)[0] 75 | output = self.final_layer(z) 76 | 77 | # zero for padded area 78 | output[~mask.T] = 0 79 | 80 | # Pytorch GRU: [Sequence, Batch size, ...] 81 | feats = output.permute(1, 0, 2) 82 | 83 | return feats 84 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/motionencoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/motionencoder/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/motionencoder/gru.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import pytorch_lightning as pl 5 | 6 | from typing import List, Optional, Union 7 | from torch import nn, Tensor 8 | from torch.distributions.distribution import Distribution 9 | 10 | from tma.models.operator import PositionalEncoding 11 | from tma.utils.temos_utils import lengths_to_mask 12 | 13 | 14 | class GRUEncoder(pl.LightningModule): 15 | """ 16 | This class is a GRU encoder for encoding input features. 17 | 18 | Attributes: 19 | - skel_embedding: a linear layer for embedding the input features. 20 | - gru: a GRU layer for encoding the embedded features. 21 | - mu: a linear layer for generating the mean of the latent distribution (only if VAE is used). 22 | - logvar: a linear layer for generating the log variance of the latent distribution (only if VAE is used). 23 | - final: a linear layer for generating the final output (only if VAE is not used). 24 | 25 | Methods: 26 | - __init__: initializes the GRUEncoder object with the given parameters. 27 | - forward: encodes the input features and returns the encoded output. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | nfeats: int, 33 | vae: bool, 34 | latent_dim: int = 256, 35 | num_layers: int = 4, 36 | **kwargs 37 | ): 38 | """ 39 | Initializes the GRUEncoder object with the given parameters. 40 | 41 | Inputs: 42 | - nfeats: the number of input features. 43 | - vae: a flag indicating whether to use a Variational Autoencoder (VAE). 44 | - latent_dim: the dimension of the latent space. 45 | - num_layers: the number of layers in the GRU. 46 | 47 | Outputs: None 48 | """ 49 | super().__init__() 50 | self.save_hyperparameters(logger=False) 51 | input_feats = nfeats 52 | 53 | # Embed the input features 54 | self.skel_embedding = nn.Linear(input_feats, latent_dim) 55 | 56 | # Initialize the GRU layer 57 | self.gru = nn.GRU(latent_dim, latent_dim, num_layers=num_layers) 58 | 59 | # Action agnostic: only one set of params 60 | if vae: 61 | self.mu = nn.Linear(latent_dim, latent_dim) 62 | self.logvar = nn.Linear(latent_dim, latent_dim) 63 | else: 64 | self.final = nn.Linear(latent_dim, latent_dim) 65 | 66 | def forward(self, features: Tensor, lengths: Optional[List[int]] = None): 67 | """ 68 | Encodes the input features and returns the encoded output. 69 | 70 | Inputs: 71 | - features: a tensor of input features. 72 | - lengths: a list of lengths of the input features. 73 | 74 | Outputs: the encoded output. 75 | """ 76 | if lengths is None: 77 | lengths = [len(feature) for feature in features] 78 | 79 | device = features.device 80 | 81 | bs, nframes, nfeats = features.shape 82 | mask = lengths_to_mask(lengths, device) 83 | 84 | x = features 85 | # Embed each human poses into latent vectors 86 | x = self.skel_embedding(x) 87 | 88 | # Switch sequence and batch_size because the input of 89 | # Pytorch Transformer is [Sequence, Batch size, ...] 90 | x = x.permute(1, 0, 2) # now it is [nframes, bs, latent_dim] 91 | 92 | # Get all the output of the gru 93 | x = self.gru(x)[0] 94 | 95 | # Put back the batch dimention first 96 | x = x.permute(1, 0, 2) # now it is [nframes, bs, latent_dim] 97 | 98 | # Extract the last valid input 99 | x = x[ 100 | tuple( 101 | torch.stack( 102 | ( 103 | torch.arange(bs, device=x.device), 104 | torch.tensor(lengths, device=x.device) - 1, 105 | ) 106 | ) 107 | ) 108 | ] 109 | 110 | if self.hparams.vae: 111 | mu = self.mu(x) 112 | logvar = self.logvar(x) 113 | std = logvar.exp().pow(0.5) 114 | # https://github.com/kampta/pytorch-distributions/blob/master/gaussian_vae.py 115 | return torch.distributions.Normal(mu, std) 116 | else: 117 | return self.final(x) 118 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/textencoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/textencoder/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/textencoder/distillbert.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import pytorch_lightning as pl 3 | 4 | import torch.nn as nn 5 | import os 6 | 7 | import torch 8 | from torch import Tensor 9 | from torch.distributions.distribution import Distribution 10 | from transformers import AutoTokenizer, AutoModel 11 | from transformers import logging 12 | 13 | 14 | class DistilbertEncoderBase(pl.LightningModule): 15 | """ 16 | This class is a base encoder for DistilBERT models. 17 | 18 | Attributes: 19 | - tokenizer: the tokenizer for the pre-trained DistilBERT model. 20 | - text_model: the pre-trained DistilBERT model. 21 | - text_encoded_dim: the dimension of the hidden state in the DistilBERT model. 22 | 23 | Methods: 24 | - __init__: initializes the DistilbertEncoderBase object with the given parameters. 25 | - train: sets the training mode for the model. 26 | """ 27 | 28 | def __init__(self, modelpath: str, finetune: bool = False): 29 | """ 30 | Initializes the DistilbertEncoderBase object with the given parameters. 31 | 32 | Inputs: 33 | - modelpath: the path to the pre-trained DistilBERT model. 34 | - finetune: a flag indicating whether to fine-tune the DistilBERT model. 35 | 36 | Outputs: None 37 | """ 38 | super().__init__() 39 | logging.set_verbosity_error() 40 | 41 | # Tokenizer 42 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 43 | self.tokenizer = AutoTokenizer.from_pretrained(modelpath) 44 | 45 | # Text model 46 | self.text_model = AutoModel.from_pretrained(modelpath) 47 | 48 | # Don't train the model 49 | if not finetune: 50 | self.text_model.training = False 51 | for p in self.text_model.parameters(): 52 | p.requires_grad = False 53 | 54 | # Then configure the model 55 | self.text_encoded_dim = self.text_model.config.hidden_size 56 | 57 | def train(self, mode: bool = True): 58 | """ 59 | Sets the training mode for the model. 60 | 61 | Inputs: 62 | - mode: a flag indicating whether to set the model to training mode. 63 | 64 | Outputs: None 65 | """ 66 | self.training = mode 67 | for module in self.children(): 68 | # Don't put the model in 69 | if module == self.text_model and not self.hparams.finetune: 70 | continue 71 | module.train(mode) 72 | return self 73 | 74 | def get_last_hidden_state(self, texts: List[str], return_mask: bool = False): 75 | """ 76 | Sets the training mode for the model. 77 | 78 | Inputs: 79 | - mode: a flag indicating whether to set the model to training mode. 80 | 81 | Outputs: None 82 | """ 83 | # Tokenize the texts and convert them to tensors 84 | encoded_inputs = self.tokenizer(texts, return_tensors="pt", padding=True) 85 | 86 | # Pass the encoded inputs to the DistilBERT model 87 | output = self.text_model(**encoded_inputs.to(self.text_model.device)) 88 | 89 | # If not returning the attention mask, return the last hidden state 90 | if not return_mask: 91 | return output.last_hidden_state 92 | 93 | # If returning the attention mask, return the last hidden state and the attention mask 94 | return output.last_hidden_state, encoded_inputs.attention_mask.to(dtype=bool) 95 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/temos/textencoder/distillbert_actor.py: -------------------------------------------------------------------------------- 1 | from .distillbert import DistilbertEncoderBase 2 | import torch 3 | 4 | from typing import List, Union 5 | from torch import nn, Tensor 6 | from torch.distributions.distribution import Distribution 7 | 8 | from tma.models.operator import PositionalEncoding 9 | from tma.utils.temos_utils import lengths_to_mask 10 | 11 | 12 | class DistilbertActorAgnosticEncoder(DistilbertEncoderBase): 13 | def __init__( 14 | self, 15 | modelpath: str, 16 | finetune: bool = False, 17 | vae: bool = True, 18 | latent_dim: int = 256, 19 | ff_size: int = 1024, 20 | num_layers: int = 4, 21 | num_heads: int = 4, 22 | dropout: float = 0.1, 23 | activation: str = "gelu", 24 | **kwargs 25 | ): 26 | """ 27 | Initializes the DistilbertActorAgnosticEncoder object with the given parameters. 28 | 29 | Inputs: 30 | - modelpath: the path to the pre-trained DistilBERT model. 31 | - finetune: a flag indicating whether to fine-tune the DistilBERT model. 32 | - vae: a flag indicating whether to use a VAE model. 33 | - latent_dim: the dimension of the latent space. 34 | - ff_size: the size of the feedforward network in the transformer encoder. 35 | - num_layers: the number of layers in the transformer encoder. 36 | - num_heads: the number of attention heads in the transformer encoder. 37 | - dropout: the dropout rate. 38 | - activation: the activation function to use in the transformer encoder. 39 | 40 | Outputs: None 41 | """ 42 | super().__init__(modelpath=modelpath, finetune=finetune) 43 | self.save_hyperparameters(logger=False) 44 | 45 | encoded_dim = self.text_encoded_dim 46 | 47 | # Projection of the text-outputs into the latent space 48 | self.projection = nn.Sequential(nn.ReLU(), nn.Linear(encoded_dim, latent_dim)) 49 | 50 | # TransformerVAE adapted from ACTOR 51 | # Action agnostic: only one set of params 52 | if vae: 53 | self.mu_token = nn.Parameter(torch.randn(latent_dim)) 54 | self.logvar_token = nn.Parameter(torch.randn(latent_dim)) 55 | else: 56 | self.emb_token = nn.Parameter(torch.randn(latent_dim)) 57 | 58 | self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout) 59 | 60 | seq_trans_encoder_layer = nn.TransformerEncoderLayer( 61 | d_model=latent_dim, 62 | nhead=num_heads, 63 | dim_feedforward=ff_size, 64 | dropout=dropout, 65 | activation=activation, 66 | ) 67 | 68 | self.seqTransEncoder = nn.TransformerEncoder( 69 | seq_trans_encoder_layer, num_layers=num_layers 70 | ) 71 | 72 | def forward(self, texts: List[str]): 73 | text_encoded, mask = self.get_last_hidden_state(texts, return_mask=True) 74 | 75 | x = self.projection(text_encoded) 76 | bs, nframes, _ = x.shape 77 | # bs, nframes, totjoints, nfeats = x.shape 78 | # Switch sequence and batch_size because the input of 79 | # Pytorch Transformer is [Sequence, Batch size, ...] 80 | x = x.permute(1, 0, 2) # now it is [nframes, bs, latent_dim] 81 | 82 | if self.hparams.vae: 83 | mu_token = torch.tile(self.mu_token, (bs,)).reshape(bs, -1) 84 | logvar_token = torch.tile(self.logvar_token, (bs,)).reshape(bs, -1) 85 | 86 | # adding the distribution tokens for all sequences 87 | xseq = torch.cat((mu_token[None], logvar_token[None], x), 0) 88 | 89 | # create a bigger mask, to allow attend to mu and logvar 90 | token_mask = torch.ones((bs, 2), dtype=bool, device=x.device) 91 | aug_mask = torch.cat((token_mask, mask), 1) 92 | else: 93 | emb_token = torch.tile(self.emb_token, (bs,)).reshape(bs, -1) 94 | 95 | # adding the embedding token for all sequences 96 | xseq = torch.cat((emb_token[None], x), 0) 97 | 98 | # create a bigger mask, to allow attend to emb 99 | token_mask = torch.ones((bs, 1), dtype=bool, device=x.device) 100 | aug_mask = torch.cat((token_mask, mask), 1) 101 | 102 | # add positional encoding 103 | xseq = self.sequence_pos_encoding(xseq) 104 | final = self.seqTransEncoder(xseq, src_key_padding_mask=~aug_mask) 105 | 106 | if self.hparams.vae: 107 | mu, logvar = final[0], final[1] 108 | std = logvar.exp().pow(0.5) 109 | # https://github.com/kampta/pytorch-distributions/blob/master/gaussian_vae.py 110 | try: 111 | dist = torch.distributions.Normal(mu, std) 112 | except ValueError: 113 | import ipdb 114 | 115 | ipdb.set_trace() # noqa 116 | pass 117 | return dist 118 | else: 119 | return final[0] 120 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/architectures/vposert_vae.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch import Tensor, nn 9 | from torch.distributions.distribution import Distribution 10 | 11 | from tma.models.architectures.tools.embeddings import TimestepEmbedding, Timesteps 12 | from tma.models.operator import PositionalEncoding 13 | from tma.models.operator.cross_attention import ( 14 | SkipTransformerEncoder, 15 | SkipTransformerDecoder, 16 | TransformerDecoder, 17 | TransformerDecoderLayer, 18 | TransformerEncoder, 19 | TransformerEncoderLayer, 20 | ) 21 | from tma.models.operator.position_encoding import build_position_encoding 22 | from tma.utils.temos_utils import lengths_to_mask 23 | 24 | """ 25 | vae 26 | skip connection encoder 27 | skip connection decoder 28 | mem for each decoder layer 29 | """ 30 | 31 | 32 | class VPosert(nn.Module): 33 | 34 | def __init__(self, cfg, **kwargs) -> None: 35 | 36 | super(VPosert, self).__init__() 37 | 38 | num_neurons = 512 39 | self.latentD = 256 40 | 41 | # self.num_joints = 21 42 | n_features = 196 * 263 43 | 44 | self.encoder_net = nn.Sequential( 45 | BatchFlatten(), 46 | nn.BatchNorm1d(n_features), 47 | nn.Linear(n_features, num_neurons), 48 | nn.LeakyReLU(), 49 | nn.BatchNorm1d(num_neurons), 50 | nn.Dropout(0.1), 51 | nn.Linear(num_neurons, num_neurons), 52 | nn.Linear(num_neurons, num_neurons), 53 | NormalDistDecoder(num_neurons, self.latentD), 54 | ) 55 | 56 | self.decoder_net = nn.Sequential( 57 | nn.Linear(self.latentD, num_neurons), 58 | nn.LeakyReLU(), 59 | nn.Dropout(0.1), 60 | nn.Linear(num_neurons, num_neurons), 61 | nn.LeakyReLU(), 62 | nn.Linear(num_neurons, n_features), 63 | ContinousRotReprDecoder(), 64 | ) 65 | 66 | def forward(self, features: Tensor, lengths: Optional[List[int]] = None): 67 | q_z = self.encode(features) 68 | feats_rst = self.decode(q_z) 69 | return feats_rst, q_z 70 | 71 | def encode(self, pose_body, lengths: Optional[List[int]] = None): 72 | """ 73 | :param Pin: Nx(numjoints*3) 74 | :param rep_type: 'matrot'/'aa' for matrix rotations or axis-angle 75 | :return: 76 | """ 77 | q_z = self.encoder_net(pose_body) 78 | q_z_sample = q_z.rsample() 79 | return q_z_sample.unsqueeze(0), q_z 80 | 81 | def decode(self, Zin, lengths: Optional[List[int]] = None): 82 | bs = Zin.shape[0] 83 | Zin = Zin[0] 84 | 85 | prec = self.decoder_net(Zin) 86 | 87 | return prec 88 | 89 | 90 | class BatchFlatten(nn.Module): 91 | 92 | def __init__(self): 93 | super(BatchFlatten, self).__init__() 94 | self._name = "batch_flatten" 95 | 96 | def forward(self, x): 97 | return x.view(x.shape[0], -1) 98 | 99 | 100 | class ContinousRotReprDecoder(nn.Module): 101 | 102 | def __init__(self): 103 | super(ContinousRotReprDecoder, self).__init__() 104 | 105 | def forward(self, module_input): 106 | reshaped_input = module_input.view(-1, 196, 263) 107 | return reshaped_input 108 | 109 | 110 | class NormalDistDecoder(nn.Module): 111 | 112 | def __init__(self, num_feat_in, latentD): 113 | super(NormalDistDecoder, self).__init__() 114 | 115 | self.mu = nn.Linear(num_feat_in, latentD) 116 | self.logvar = nn.Linear(num_feat_in, latentD) 117 | 118 | def forward(self, Xout): 119 | return torch.distributions.normal.Normal( 120 | self.mu(Xout), F.softplus(self.logvar(Xout)) 121 | ) 122 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/body_skeleton/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/body_skeleton/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/body_skeleton/paramUtil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Define a kinematic tree for the skeletal struture 4 | kit_kinematic_chain = [ 5 | [0, 11, 12, 13, 14, 15], 6 | [0, 16, 17, 18, 19, 20], 7 | [0, 1, 2, 3, 4], 8 | [3, 5, 6, 7], 9 | [3, 8, 9, 10], 10 | ] 11 | 12 | kit_raw_offsets = np.array( 13 | [ 14 | [0, 0, 0], 15 | [0, 1, 0], 16 | [0, 1, 0], 17 | [0, 1, 0], 18 | [0, 1, 0], 19 | [1, 0, 0], 20 | [0, -1, 0], 21 | [0, -1, 0], 22 | [-1, 0, 0], 23 | [0, -1, 0], 24 | [0, -1, 0], 25 | [1, 0, 0], 26 | [0, -1, 0], 27 | [0, -1, 0], 28 | [0, 0, 1], 29 | [0, 0, 1], 30 | [-1, 0, 0], 31 | [0, -1, 0], 32 | [0, -1, 0], 33 | [0, 0, 1], 34 | [0, 0, 1], 35 | ] 36 | ) 37 | 38 | t2m_raw_offsets = np.array( 39 | [ 40 | [0, 0, 0], 41 | [1, 0, 0], 42 | [-1, 0, 0], 43 | [0, 1, 0], 44 | [0, -1, 0], 45 | [0, -1, 0], 46 | [0, 1, 0], 47 | [0, -1, 0], 48 | [0, -1, 0], 49 | [0, 1, 0], 50 | [0, 0, 1], 51 | [0, 0, 1], 52 | [0, 1, 0], 53 | [1, 0, 0], 54 | [-1, 0, 0], 55 | [0, 0, 1], 56 | [0, -1, 0], 57 | [0, -1, 0], 58 | [0, -1, 0], 59 | [0, -1, 0], 60 | [0, -1, 0], 61 | [0, -1, 0], 62 | ] 63 | ) 64 | 65 | t2m_kinematic_chain = [ 66 | [0, 2, 5, 8, 11], 67 | [0, 1, 4, 7, 10], 68 | [0, 3, 6, 9, 12, 15], 69 | [9, 14, 17, 19, 21], 70 | [9, 13, 16, 18, 20], 71 | ] 72 | t2m_left_hand_chain = [ 73 | [20, 22, 23, 24], 74 | [20, 34, 35, 36], 75 | [20, 25, 26, 27], 76 | [20, 31, 32, 33], 77 | [20, 28, 29, 30], 78 | ] 79 | t2m_right_hand_chain = [ 80 | [21, 43, 44, 45], 81 | [21, 46, 47, 48], 82 | [21, 40, 41, 42], 83 | [21, 37, 38, 39], 84 | [21, 49, 50, 51], 85 | ] 86 | 87 | 88 | kit_tgt_skel_id = "03950" 89 | 90 | t2m_tgt_skel_id = "000021" 91 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/get_model.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | 4 | def get_model(cfg, datamodule, phase="train"): 5 | """ 6 | Inputs: 7 | cfg (Config): The configuration object containing model details. 8 | datamodule (DataModule): The data module object for data loading and processing. 9 | phase (str): The phase of model training. Default is "train". 10 | 11 | This function returns the model based on the model type specified in the configuration. If the model type is not supported, it raises a ValueError. 12 | 13 | Returns: 14 | Model (object): The model object. 15 | """ 16 | modeltype = cfg.model.model_type 17 | if modeltype in ["mld", "temos"]: 18 | return get_module(cfg, datamodule) 19 | else: 20 | raise ValueError(f"Invalid model type {modeltype}.") 21 | 22 | 23 | def get_module(cfg, datamodule): 24 | """ 25 | Inputs: 26 | cfg (Config): The configuration object containing model details. 27 | datamodule (DataModule): The data module object for data loading and processing. 28 | 29 | This function imports the model module based on the model type specified in the configuration, gets the model class from the module, and returns an instance of the model class. 30 | 31 | Returns: 32 | Model (object): The model object. 33 | """ 34 | modeltype = cfg.model.model_type 35 | model_module = importlib.import_module( 36 | f".modeltype.{cfg.model.model_type}", package="tma.models") 37 | Model = model_module.__getattribute__(f"{modeltype.upper()}") 38 | return Model(cfg=cfg, datamodule=datamodule) 39 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from tma.models.losses.temos import TemosLosses 2 | from tma.models.losses.tmost import TmostLosses 3 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/losses/actor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchmetrics import Metric 4 | 5 | 6 | class ACTORLosses(Metric): 7 | """ 8 | Loss 9 | Modify loss 10 | 11 | """ 12 | 13 | def __init__(self, vae, mode, cfg): 14 | super().__init__(dist_sync_on_step=cfg.LOSS.DIST_SYNC_ON_STEP) 15 | 16 | # Save parameters 17 | self.vae = vae 18 | self.mode = mode 19 | 20 | losses = [] 21 | losses.append("recons_feature") 22 | losses.append("recons_verts") 23 | losses.append("recons_joints") 24 | losses.append("recons_limb") 25 | 26 | # latent loss 27 | losses.append("latent_st2sm") 28 | 29 | # KL loss 30 | losses.append("kl_motion") 31 | losses.append("total") 32 | 33 | for loss in losses: 34 | self.register_buffer(loss, torch.tensor(0.0)) 35 | self.register_buffer("count", torch.tensor(0)) 36 | self.losses = losses 37 | 38 | self._losses_func = {} 39 | self._params = {} 40 | for loss in losses: 41 | if loss != "total": 42 | if loss.split("_")[0] == "kl": 43 | self._losses_func[loss] = KLLoss() 44 | self._params[loss] = cfg.LOSS.LAMBDA_KL 45 | elif loss.split("_")[0] == "recons": 46 | self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean") 47 | self._params[loss] = cfg.LOSS.LAMBDA_REC 48 | elif loss.split("_")[0] == "cross": 49 | self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean") 50 | self._params[loss] = cfg.LOSS.LAMBDA_CROSS 51 | elif loss.split("_")[0] == "latent": 52 | self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean") 53 | self._params[loss] = cfg.LOSS.LAMBDA_LATENT 54 | elif loss.split("_")[0] == "cycle": 55 | self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean") 56 | self._params[loss] = cfg.LOSS.LAMBDA_CYCLE 57 | else: 58 | ValueError("This loss is not recognized.") 59 | 60 | def update(self, rs_set, dist_ref): 61 | total: float = 0.0 62 | # Compute the losses 63 | # loss1 - reconstruction loss 64 | total += self._update_loss("recons_feature", rs_set["m_rst"], rs_set["m_ref"]) 65 | # total += self._update_loss("recons_verts", rs_set['verts_rs'], rs_set['verts_ref']) 66 | # total += self._update_loss("recons_joints", rs_set['joints_rs'], rs_set['joints_ref']) 67 | # total += self._update_loss("recons_limb", rs_set['rs_base'], rs_set['m1']) 68 | 69 | # loss - text motion latent loss 70 | total += self._update_loss("kl_motion", rs_set["dist_m"], dist_ref) 71 | 72 | self.total += total.detach() 73 | self.count += 1 74 | 75 | return total 76 | 77 | def compute(self, split): 78 | count = getattr(self, "count") 79 | return {loss: getattr(self, loss) / count for loss in self.losses} 80 | 81 | def _update_loss(self, loss: str, outputs, inputs): 82 | # Update the loss 83 | val = self._losses_func[loss](outputs, inputs) 84 | getattr(self, loss).__iadd__(val.detach()) 85 | # Return a weighted sum 86 | weighted_loss = self._params[loss] * val 87 | return weighted_loss 88 | 89 | def loss2logname(self, loss: str, split: str): 90 | if loss == "total": 91 | log_name = f"{loss}/{split}" 92 | else: 93 | loss_type, name = loss.split("_") 94 | log_name = f"{loss_type}/{name}/{split}" 95 | return log_name 96 | 97 | 98 | class KLLoss: 99 | def __init__(self): 100 | pass 101 | 102 | def __call__(self, q, p): 103 | div = torch.distributions.kl_divergence(q, p) 104 | return div.mean() 105 | 106 | def __repr__(self): 107 | return "KLLoss()" 108 | 109 | 110 | class KLLossMulti: 111 | def __init__(self): 112 | self.klloss = KLLoss() 113 | 114 | def __call__(self, qlist, plist): 115 | return sum([self.klloss(q, p) for q, p in zip(qlist, plist)]) 116 | 117 | def __repr__(self): 118 | return "KLLossMulti()" 119 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/losses/infonce.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | 5 | 6 | class InfoNCE: 7 | """ 8 | This class implements the InfoNCE loss function. 9 | 10 | Attributes: 11 | - t: a temperature parameter for the softmax function in the loss calculation. 12 | 13 | Methods: 14 | - __call__: computes the InfoNCE loss given the motion and text features, and an optional distance matrix. 15 | """ 16 | 17 | def __init__(self, t): 18 | """ 19 | Initializes the InfoNCE object with a given temperature parameter. 20 | 21 | Inputs: 22 | - t: a temperature parameter for the softmax function in the loss calculation. 23 | """ 24 | self.t = t 25 | 26 | def __call__(self, f, dist): 27 | """ 28 | Computes the InfoNCE loss given the motion and text features, and an optional distance matrix. 29 | 30 | Inputs: 31 | - f: a tuple containing the motion and text features. Each feature is a 2D tensor of shape (N, d). 32 | - dist: an optional distance matrix. If provided, it is used to mask the logits. 33 | 34 | Outputs: 35 | - loss_m: the InfoNCE loss computed using the motion features. 36 | - loss_t: the InfoNCE loss computed using the text features. 37 | """ 38 | t = self.t 39 | f_motion, f_text = f[0], f[1] 40 | 41 | N, d = f_motion.shape[0], f_motion.shape[1] 42 | 43 | # Normalize the motion and text features 44 | Emb_motion = F.normalize(f_motion, dim=1) 45 | Emb_text = F.normalize(f_text, dim=1) 46 | 47 | # Compute the logits as the dot product of the normalized features 48 | t = torch.tensor(t).to(f_motion.device) 49 | logits = torch.mm(Emb_motion, Emb_text.T) 50 | 51 | # If a distance matrix is provided, use it to mask the logits 52 | if dist is not None: 53 | text_logits = dist.detach() 54 | mask = torch.where( 55 | torch.logical_and(text_logits > 0.85, text_logits < 1.0 - 1e-100), 56 | torch.tensor(float("-inf")).to(f_motion.device), 57 | torch.tensor(1.0e100).to(f_motion.device), 58 | ) 59 | mask.diagonal().fill_(float("inf")) 60 | logits = torch.min(mask, logits) 61 | 62 | N = f_motion.shape[0] 63 | 64 | # Compute the labels as the indices of the features 65 | labels = torch.arange(N).to(f_motion.device) 66 | 67 | # Compute the InfoNCE loss for the motion and text features 68 | loss_m = F.cross_entropy(logits / t, labels) 69 | loss_t = F.cross_entropy(logits.T / t, labels) 70 | 71 | loss = (loss_m + loss_t) / 2 72 | 73 | return loss 74 | 75 | def __repr__(self): 76 | return "InfoNCE()" 77 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/losses/kl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class KLLoss: 5 | def __init__(self): 6 | pass 7 | 8 | def __call__(self, q, p): 9 | div = torch.distributions.kl_divergence(q, p) 10 | return div.mean() 11 | 12 | def __repr__(self): 13 | return "KLLoss()" 14 | 15 | 16 | class KLLossMulti: 17 | def __init__(self): 18 | self.klloss = KLLoss() 19 | 20 | def __call__(self, qlist, plist): 21 | return sum([self.klloss(q, p) for q, p in zip(qlist, plist)]) 22 | 23 | def __repr__(self): 24 | return "KLLossMulti()" 25 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .compute import ComputeMetrics 2 | from .tm2t import TM2TMetrics 3 | from .mm import MMMetrics 4 | from .uncond import UncondMetrics 5 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/metrics/mm.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torchmetrics import Metric 6 | from torchmetrics.functional import pairwise_euclidean_distance 7 | 8 | from .utils import * 9 | 10 | 11 | class MMMetrics(Metric): 12 | full_state_update = True 13 | 14 | def __init__(self, mm_num_times=10, dist_sync_on_step=True, **kwargs): 15 | super().__init__(dist_sync_on_step=dist_sync_on_step) 16 | 17 | self.name = "MultiModality scores" 18 | 19 | self.mm_num_times = mm_num_times 20 | 21 | self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum") 22 | self.add_state("count_seq", default=torch.tensor(0), dist_reduce_fx="sum") 23 | 24 | self.metrics = ["MultiModality"] 25 | self.add_state("MultiModality", default=torch.tensor(0.0), dist_reduce_fx="sum") 26 | 27 | # chached batches 28 | self.add_state("mm_motion_embeddings", default=[], dist_reduce_fx=None) 29 | 30 | def compute(self, sanity_flag): 31 | count = self.count.item() 32 | count_seq = self.count_seq.item() 33 | 34 | # init metrics 35 | metrics = {metric: getattr(self, metric) for metric in self.metrics} 36 | 37 | # if in sanity check stage then jump 38 | if sanity_flag: 39 | return metrics 40 | 41 | # cat all embeddings 42 | all_mm_motions = torch.cat(self.mm_motion_embeddings, axis=0).cpu().numpy() 43 | metrics["MultiModality"] = calculate_multimodality_np( 44 | all_mm_motions, self.mm_num_times 45 | ) 46 | 47 | return {**metrics} 48 | 49 | def update( 50 | self, 51 | mm_motion_embeddings: Tensor, 52 | lengths: List[int], 53 | ): 54 | self.count += sum(lengths) 55 | self.count_seq += len(lengths) 56 | 57 | # store all mm motion embeddings 58 | self.mm_motion_embeddings.append(mm_motion_embeddings) 59 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/metrics/uncond.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torchmetrics import Metric 6 | from torchmetrics.functional import pairwise_euclidean_distance 7 | 8 | from .utils import * 9 | 10 | 11 | class UncondMetrics(Metric): 12 | full_state_update = True 13 | 14 | def __init__( 15 | self, top_k=3, R_size=32, diversity_times=300, dist_sync_on_step=True, **kwargs 16 | ): 17 | super().__init__(dist_sync_on_step=dist_sync_on_step) 18 | 19 | self.name = "fid, kid, and diversity scores" 20 | 21 | self.top_k = top_k 22 | self.R_size = R_size 23 | self.diversity_times = 300 24 | 25 | self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum") 26 | self.add_state("count_seq", default=torch.tensor(0), dist_reduce_fx="sum") 27 | 28 | self.metrics = [] 29 | 30 | # KID 31 | self.add_state("KID_mean", default=torch.tensor(0.0), dist_reduce_fx="mean") 32 | self.add_state("KID_std", default=torch.tensor(0.0), dist_reduce_fx="mean") 33 | self.metrics.extend(["KID_mean", "KID_std"]) 34 | # Fid 35 | self.add_state("FID", default=torch.tensor(0.0), dist_reduce_fx="mean") 36 | self.metrics.append("FID") 37 | 38 | # Diversity 39 | self.add_state("Diversity", default=torch.tensor(0.0), dist_reduce_fx="sum") 40 | self.add_state("gt_Diversity", default=torch.tensor(0.0), dist_reduce_fx="sum") 41 | self.metrics.extend(["Diversity", "gt_Diversity"]) 42 | 43 | # chached batches 44 | self.add_state("recmotion_embeddings", default=[], dist_reduce_fx=None) 45 | self.add_state("gtmotion_embeddings", default=[], dist_reduce_fx=None) 46 | 47 | def compute(self, sanity_flag): 48 | count = self.count.item() 49 | count_seq = self.count_seq.item() 50 | 51 | # init metrics 52 | metrics = {metric: getattr(self, metric) for metric in self.metrics} 53 | 54 | # if in sanity check stage then jump 55 | if sanity_flag: 56 | return metrics 57 | 58 | # cat all embeddings 59 | all_gtmotions = torch.cat(self.gtmotion_embeddings, axis=0).cpu() 60 | all_genmotions = torch.cat(self.recmotion_embeddings, axis=0).cpu() 61 | 62 | # Compute kid 63 | 64 | KID_mean, KID_std = calculate_kid(all_gtmotions, all_genmotions) 65 | metrics["KID_mean"] = KID_mean 66 | metrics["KID_std"] = KID_std 67 | 68 | # tensor -> numpy for FID 69 | all_genmotions = all_genmotions.numpy() 70 | all_gtmotions = all_gtmotions.numpy() 71 | 72 | # Compute fid 73 | mu, cov = calculate_activation_statistics_np(all_genmotions) 74 | 75 | # gt_mu, gt_cov = calculate_activation_statistics_np(all_gtmotions) 76 | gt_mu, gt_cov = calculate_activation_statistics_np(all_gtmotions) 77 | metrics["FID"] = calculate_frechet_distance_np(gt_mu, gt_cov, mu, cov) 78 | 79 | # Compute diversity 80 | assert count_seq > self.diversity_times 81 | print(all_genmotions.shape) 82 | print(all_gtmotions.shape) 83 | metrics["Diversity"] = calculate_diversity_np( 84 | all_genmotions, self.diversity_times 85 | ) 86 | metrics["gt_Diversity"] = calculate_diversity_np( 87 | all_gtmotions, self.diversity_times 88 | ) 89 | 90 | return {**metrics} 91 | 92 | def update( 93 | self, 94 | gtmotion_embeddings: Tensor, 95 | lengths: List[int], 96 | recmotion_embeddings=None, 97 | ): 98 | self.count += sum(lengths) 99 | self.count_seq += len(lengths) 100 | 101 | # [bs, nlatent*ndim] <= [bs, nlatent, ndim] 102 | if recmotion_embeddings is not None: 103 | recmotion_embeddings = torch.flatten( 104 | recmotion_embeddings, start_dim=1 105 | ).detach() 106 | 107 | # store all texts and motions 108 | self.recmotion_embeddings.append(recmotion_embeddings) 109 | gtmotion_embeddings = torch.flatten(gtmotion_embeddings, start_dim=1).detach() 110 | 111 | self.gtmotion_embeddings.append(gtmotion_embeddings) 112 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/modeltype/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/modeltype/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/modeltype/smplx_layer.py: -------------------------------------------------------------------------------- 1 | from human_body_prior.body_model.body_model import BodyModel 2 | from pytorch_lightning import LightningModule 3 | import numpy as np 4 | import torch 5 | import time 6 | from torch import nn 7 | 8 | 9 | class smplx_layer(LightningModule): 10 | def __init__(self): 11 | super(smplx_layer, self).__init__() 12 | self.smplx_model = BodyModel( 13 | bm_fname="/comp_robot/lushunlin/HumanML3D-1/body_models/smplx/neutral/model.npz", 14 | num_betas=10, 15 | model_type="smplx", 16 | ) 17 | 18 | 19 | if __name__ == "__main__": 20 | pose = ( 21 | torch.tensor( 22 | np.load( 23 | "/comp_robot/lushunlin/visualization/visualization/test_case/motionx_humanml_smplx_322.npy" 24 | ) 25 | ) 26 | .float() 27 | .cuda() 28 | ) 29 | smplx = smplx_layer().cuda() 30 | output = smplx.smplx_model( 31 | pose_body=pose[:, 3:66], 32 | pose_hand=pose[:, 66:156], 33 | root_orient=pose[:, :3], 34 | pose_jaw=pose[:, 156:159], 35 | ).Jtr 36 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/operator/__init__.py: -------------------------------------------------------------------------------- 1 | from .adain import AdaptiveInstanceNorm1d 2 | from .blocks import ConvBlock, LinearBlock 3 | from .position_encoding_layer import PositionalEncoding 4 | 5 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/operator/adain.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class AdaptiveInstanceNorm1d(nn.Module): 6 | """ 7 | This class is a subclass of nn.Module. 8 | It implements the Adaptive Instance Normalization (AdaIN) layer. 9 | """ 10 | def __init__(self, num_features, eps=1e-5, momentum=0.1): 11 | """ 12 | Inputs: 13 | num_features (int): The number of features in the input. 14 | eps (float): A small number added to the denominator for numerical stability. Default is 1e-5. 15 | momentum (float): The momentum factor. Default is 0.1. 16 | 17 | This function is the constructor of the AdaptiveInstanceNorm1d class. It initializes the class variables and registers the running mean and variance buffers. 18 | """ 19 | super(AdaptiveInstanceNorm1d, self).__init__() 20 | self.num_features = num_features 21 | self.eps = eps 22 | self.momentum = momentum 23 | self.weight = None 24 | self.bias = None 25 | self.register_buffer('running_mean', torch.zeros(nuś_features)) 26 | self.register_buffer('running_var', torch.ones(num_features)) 27 | 28 | def forward(self, x, direct_weighting=False, no_std=False): 29 | """ 30 | Inputs: 31 | x (Tensor): The input tensor. 32 | direct_weighting (bool): If True, apply direct weighting. Default is False. 33 | no_std (bool): If True, do not apply standard deviation. Default is False. 34 | 35 | This function applies the AdaIN operation to the input tensor and returns the output tensor. 36 | 37 | Returns: 38 | Tensor: The output tensor. 39 | """ 40 | assert self.weight is not None and \ 41 | self.bias is not None, "Please assign AdaIN weight first" 42 | 43 | # (bs, nfeats, nframe) <= (nframe, bs, nfeats) 44 | x = x.permute(1,2,0) 45 | 46 | b, c = x.size(0), x.size(1) # batch size & channels 47 | running_mean = self.running_mean.repeat(b) 48 | running_var = self.running_var.repeat(b) 49 | if direct_weighting: 50 | x_reshaped = x.contiguous().view(b * c) 51 | if no_std: 52 | out = x_reshaped + self.bias 53 | else: 54 | out = x_reshaped.mul(self.weight) + self.bias 55 | out = out.view(b, c, *x.size()[2:]) 56 | else: 57 | x_reshaped = x.contiguous().view(1, b * c, *x.size()[2:]) 58 | out = F.batch_norm( 59 | x_reshaped, running_mean, running_var, self.weight, self.bias, 60 | True, self.momentum, self.eps) 61 | out = out.view(b, c, *x.size()[2:]) 62 | 63 | # (nframe, bs, nfeats) <= (bs, nfeats, nframe) 64 | out = out.permute(2,0,1) 65 | return out 66 | 67 | def __repr__(self): 68 | return self.__class__.__name__ + '(' + str(self.num_features) + ')' 69 | 70 | def assign_adain_params(adain_params, model): 71 | """ 72 | Inputs: 73 | adain_params (Tensor): The AdaIN parameters. 74 | model (nn.Module): The model. 75 | 76 | This function assigns the AdaIN parameters to the AdaIN layers in the model. 77 | 78 | Returns: 79 | None 80 | """ 81 | # assign the adain_params to the AdaIN layers in model 82 | for m in model.modules(): 83 | if m.__class__.__name__ == "AdaptiveInstanceNorm1d": 84 | mean = adain_params[: , : m.num_features] 85 | std = adain_params[: , m.num_features: 2 * m.num_features] 86 | m.bias = mean.contiguous().view(-1) 87 | m.weight = std.contiguous().view(-1) 88 | if adain_params.size(1) > 2 * m.num_features: 89 | adain_params = adain_params[: , 2 * m.num_features:] 90 | 91 | 92 | def get_num_adain_params(model): 93 | """ 94 | Inputs: 95 | model (nn.Module): The model. 96 | 97 | This function returns the number of AdaIN parameters needed by the model. 98 | 99 | Returns: 100 | int: The number of AdaIN parameters needed by the model. 101 | """ 102 | # return the number of AdaIN parameters needed by the model 103 | num_adain_params = 0 104 | for m in model.modules(): 105 | if m.__class__.__name__ == "AdaptiveInstanceNorm1d": 106 | num_adain_params += 2 * m.num_features 107 | return num_adain_params 108 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/operator/position_encoding_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class PositionalEncoding(nn.Module): 7 | 8 | def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False): 9 | super().__init__() 10 | self.batch_first = batch_first 11 | 12 | self.dropout = nn.Dropout(p=dropout) 13 | 14 | pe = torch.zeros(max_len, d_model) 15 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 16 | div_term = torch.exp(torch.arange( 17 | 0, d_model, 2).float() * (-np.log(10000.0) / d_model)) 18 | pe[:, 0::2] = torch.sin(position * div_term) 19 | pe[:, 1::2] = torch.cos(position * div_term) 20 | pe = pe.unsqueeze(0).transpose(0, 1) 21 | 22 | self.register_buffer("pe", pe) 23 | 24 | def forward(self, x): 25 | # not used in the final model 26 | if self.batch_first: 27 | x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :] 28 | else: 29 | x = x + self.pe[: x.shape[0], :] 30 | return self.dropout(x) 31 | -------------------------------------------------------------------------------- /OpenTMA/tma/models/operator/self_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/operator/self_attention.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/tools/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/models/tools/tools.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | def remove_padding(tensors, lengths): 4 | """ 5 | Inputs: 6 | tensors (list): A list of tensors from which padding is to be removed. 7 | lengths (list): A list of integers representing the actual lengths of the tensors. 8 | 9 | This function removes padding from the tensors based on the actual lengths. 10 | It returns a list of tensors with padding removed. 11 | 12 | Returns: 13 | list: A list of tensors with padding removed. 14 | """ 15 | return [tensor[:tensor_length] for tensor, tensor_length in zip(tensors, lengths)] 16 | 17 | class AutoParams(nn.Module): 18 | """ 19 | This class is a subclass of nn.Module. 20 | It is used to automatically set the parameters of a model. 21 | It has two types of parameters: needed parameters and optional parameters. 22 | Needed parameters must be provided when an instance of the class is created, 23 | otherwise a ValueError is raised. 24 | 25 | Optional parameters can be provided when an instance of the class is created, 26 | otherwise they are set to their default values. 27 | """ 28 | def __init__(self, **kargs): 29 | try: 30 | for param in self.needed_params: 31 | if param in kargs: 32 | setattr(self, param, kargs[param]) 33 | else: 34 | raise ValueError(f"{param} is needed.") 35 | except : 36 | pass 37 | 38 | try: 39 | for param, default in self.optional_params.items(): 40 | if param in kargs and kargs[param] is not None: 41 | setattr(self, param, kargs[param]) 42 | else: 43 | setattr(self, param, default) 44 | except : 45 | pass 46 | super().__init__() 47 | 48 | 49 | # taken from joeynmt repo 50 | def freeze_params(module: nn.Module) -> None: 51 | """ 52 | Freeze the parameters of this module, 53 | i.e. do not update them during training 54 | 55 | :param module: freeze parameters of this module 56 | """ 57 | for _, p in module.named_parameters(): 58 | p.requires_grad = False 59 | -------------------------------------------------------------------------------- /OpenTMA/tma/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/tools/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/tools/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import tqdm 3 | 4 | 5 | class LevelsFilter(logging.Filter): 6 | """ 7 | This class is a filter for logging levels. It allows only the logs of specified levels to pass. 8 | """ 9 | 10 | def __init__(self, levels): 11 | self.levels = [getattr(logging, level) for level in levels] 12 | 13 | def filter(self, record): 14 | return record.levelno in self.levels 15 | 16 | 17 | class StreamToLogger(object): 18 | """ 19 | Fake file-like stream object that redirects writes to a logger instance. 20 | """ 21 | 22 | def __init__(self, logger, level): 23 | self.logger = logger 24 | self.level = level 25 | self.linebuf = '' 26 | 27 | def write(self, buf): 28 | for line in buf.rstrip().splitlines(): 29 | self.logger.log(self.level, line.rstrip()) 30 | 31 | def flush(self): 32 | pass 33 | 34 | 35 | class TqdmLoggingHandler(logging.Handler): 36 | """ 37 | This class is a logging handler that writes to a tqdm progress bar. 38 | """ 39 | 40 | def __init__(self, level=logging.NOTSET): 41 | super().__init__(level) 42 | 43 | def emit(self, record): 44 | try: 45 | msg = self.format(record) 46 | tqdm.tqdm.write(msg) 47 | self.flush() 48 | except Exception: 49 | self.handleError(record) 50 | -------------------------------------------------------------------------------- /OpenTMA/tma/tools/runid.py: -------------------------------------------------------------------------------- 1 | """ 2 | runid util. 3 | Taken from wandb.sdk.lib.runid 4 | """ 5 | 6 | import shortuuid # type: ignore 7 | 8 | 9 | def generate_id() -> str: 10 | # ~3t run ids (36**8) 11 | run_gen = shortuuid.ShortUUID(alphabet=list("0123456789abcdefghijklmnopqrstuvwxyz")) 12 | return run_gen.random(8) -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Transform 2 | from .smpl import SMPLTransform 3 | # from .xyz import XYZTransform 4 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, fields 2 | 3 | 4 | class Transform: 5 | 6 | def collate(self, lst_datastruct): 7 | from tma.datasets.utils import collate_tensor_with_padding 8 | example = lst_datastruct[0] 9 | 10 | def collate_or_none(key): 11 | if example[key] is None: 12 | return None 13 | key_lst = [x[key] for x in lst_datastruct] 14 | return collate_tensor_with_padding(key_lst) 15 | 16 | kwargs = {key: collate_or_none(key) for key in example.datakeys} 17 | 18 | return self.Datastruct(**kwargs) 19 | 20 | 21 | # Inspired from SMPLX library 22 | # need to define "datakeys" and transforms 23 | @dataclass 24 | class Datastruct: 25 | 26 | def __getitem__(self, key): 27 | return getattr(self, key) 28 | 29 | def __setitem__(self, key, value): 30 | self.__dict__[key] = value 31 | 32 | def get(self, key, default=None): 33 | return getattr(self, key, default) 34 | 35 | def __iter__(self): 36 | return self.keys() 37 | 38 | def keys(self): 39 | keys = [t.name for t in fields(self)] 40 | return iter(keys) 41 | 42 | def values(self): 43 | values = [getattr(self, t.name) for t in fields(self)] 44 | return iter(values) 45 | 46 | def items(self): 47 | data = [(t.name, getattr(self, t.name)) for t in fields(self)] 48 | return iter(data) 49 | 50 | def to(self, *args, **kwargs): 51 | for key in self.datakeys: 52 | if self[key] is not None: 53 | self[key] = self[key].to(*args, **kwargs) 54 | return self 55 | 56 | @property 57 | def device(self): 58 | return self[self.datakeys[0]].device 59 | 60 | def detach(self): 61 | 62 | def detach_or_none(tensor): 63 | if tensor is not None: 64 | return tensor.detach() 65 | return None 66 | 67 | kwargs = {key: detach_or_none(self[key]) for key in self.datakeys} 68 | return self.transforms.Datastruct(**kwargs) 69 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/feats2smpl.py: -------------------------------------------------------------------------------- 1 | from os.path import join as pjoin 2 | 3 | import numpy as np 4 | import torch 5 | 6 | import tma.data.humanml.utils.paramUtil as paramUtil 7 | from tma.data.humanml.data.dataset import Text2MotionDatasetV2 8 | from tma.data.humanml.scripts.motion_process import recover_from_ric 9 | from tma.data.humanml.utils.plot_script import plot_3d_motion 10 | 11 | # Define the skeleton structure using the kinematic chain from paramUtil 12 | skeleton = paramUtil.t2m_kinematic_chain 13 | 14 | 15 | def main(): 16 | # Define paths and parameters 17 | data_root = '../datasets/humanml3d' 18 | feastures_path = 'in.npy' 19 | animation_save_path = 'in.mp4' 20 | 21 | fps = 20 22 | # Load the mean and standard deviation of the dataset 23 | mean = np.load(pjoin(data_root, 'Mean.npy')) 24 | std = np.load(pjoin(data_root, 'Std.npy')) 25 | 26 | # Load the motion features and normalize them using the mean and standard deviation 27 | motion = np.load(feastures_path) 28 | motion = motion * std + mean 29 | motion_rec = recover_from_ric(torch.tensor(motion), 22).cpu().numpy() 30 | 31 | # Scale the recovered motion 32 | motion_rec = motion_rec * 1.3 33 | # Plot and save the 3D motion 34 | plot_3d_motion(animation_save_path, motion_rec, title='input', fps=fps) 35 | 36 | 37 | # Run the main function if the script is run as the main program 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/identity.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from torch import Tensor 3 | 4 | from .base import Datastruct, dataclass, Transform 5 | 6 | 7 | class IdentityTransform(Transform): 8 | def __init__(self, **kwargs): 9 | return 10 | 11 | def Datastruct(self, **kwargs): 12 | return IdentityDatastruct(**kwargs) 13 | 14 | def __repr__(self): 15 | return "IdentityTransform()" 16 | 17 | 18 | @dataclass 19 | class IdentityDatastruct(Datastruct): 20 | transforms: IdentityTransform 21 | 22 | features: Optional[Tensor] = None 23 | 24 | def __post_init__(self): 25 | self.datakeys = ["features"] 26 | 27 | def __len__(self): 28 | return len(self.rfeats) 29 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/joints2jfeats/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Joints2Jfeats 2 | from .rifke import Rifke 3 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/joints2jfeats/base.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch import Tensor, nn 5 | from pathlib import Path 6 | 7 | 8 | class Joints2Jfeats(nn.Module): 9 | def __init__(self, path: Optional[str] = None, 10 | normalization: bool = False, 11 | eps: float = 1e-12, 12 | **kwargs) -> None: 13 | if normalization and path is None: 14 | raise TypeError( 15 | "You should provide a path if normalization is on.") 16 | 17 | super().__init__() 18 | self.normalization = normalization 19 | self.eps = eps 20 | 21 | if normalization: 22 | mean_path = Path(path) / "jfeats_mean.pt" 23 | std_path = Path(path) / "jfeats_std.pt" 24 | self.register_buffer('mean', torch.load(mean_path)) 25 | self.register_buffer('std', torch.load(std_path)) 26 | 27 | def normalize(self, features: Tensor) -> Tensor: 28 | if self.normalization: 29 | features = (features - self.mean)/(self.std + self.eps) 30 | return features 31 | 32 | def unnormalize(self, features: Tensor) -> Tensor: 33 | if self.normalization: 34 | features = features * self.std + self.mean 35 | return features 36 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/joints2jfeats/tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from tma.utils.joints import mmm_joints, humanml3d_joints, motionx_joints 5 | 6 | # Get the indexes of particular body part 7 | 8 | 9 | # .T is deprecated now for reversing a tensor 10 | def T(x): 11 | return x.permute(*torch.arange(x.ndim - 1, -1, -1)) 12 | 13 | 14 | def get_forward_direction(poses, jointstype="mmm"): 15 | if jointstype == "mmm" or jointstype == "mmmns": 16 | joints = mmm_joints 17 | elif jointstype == "humanml3d": 18 | joints = humanml3d_joints 19 | elif jointstype == "motionx": 20 | joints = motionx_joints 21 | else: 22 | raise TypeError('Only supports mmm, mmmns and humanl3d jointstype') 23 | # Shoulders 24 | LS, RS = joints.index("LS"), joints.index("RS") 25 | # Hips 26 | LH, RH = joints.index("LH"), joints.index("RH") 27 | 28 | across = poses[..., RH, :] - poses[..., LH, :] + poses[..., RS, :] - poses[ 29 | ..., LS, :] 30 | forward = torch.stack((-across[..., 2], across[..., 0]), axis=-1) 31 | forward = torch.nn.functional.normalize(forward, dim=-1) 32 | return forward 33 | 34 | 35 | def get_floor(poses, jointstype="mmm"): 36 | if jointstype == "mmm" or jointstype == "mmmns": 37 | joints = mmm_joints 38 | elif jointstype == "humanml3d": 39 | joints = humanml3d_joints 40 | elif jointstype == "motionx": 41 | joints = motionx_joints 42 | else: 43 | raise TypeError('Only supports mmm, mmmns and humanl3d jointstype') 44 | ndim = len(poses.shape) 45 | # Feet 46 | LM, RM = joints.index("LMrot"), joints.index("RMrot") 47 | LF, RF = joints.index("LF"), joints.index("RF") 48 | foot_heights = poses[..., (LM, LF, RM, RF), 1].min(-1).values 49 | floor_height = softmin(foot_heights, softness=0.5, dim=-1) 50 | return T(floor_height[(ndim - 2) * [None]]) 51 | 52 | 53 | def softmax(x, softness=1.0, dim=None): 54 | maxi, mini = x.max(dim=dim).values, x.min(dim=dim).values 55 | return maxi + torch.log(softness + torch.exp(mini - maxi)) 56 | 57 | 58 | def softmin(x, softness=1.0, dim=0): 59 | return -softmax(-x, softness=softness, dim=dim) 60 | 61 | 62 | def gaussian_filter1d(_inputs, sigma, truncate=4.0): 63 | # Code adapted/mixed from scipy library into pytorch 64 | # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L211 65 | # and gaussian kernel 66 | # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L179 67 | # Correspond to mode="nearest" and order = 0 68 | # But works batched 69 | if len(_inputs.shape) == 2: 70 | inputs = _inputs[None] 71 | else: 72 | inputs = _inputs 73 | 74 | sd = float(sigma) 75 | radius = int(truncate * sd + 0.5) 76 | sigma2 = sigma * sigma 77 | x = torch.arange(-radius, 78 | radius + 1, 79 | device=inputs.device, 80 | dtype=inputs.dtype) 81 | phi_x = torch.exp(-0.5 / sigma2 * x**2) 82 | phi_x = phi_x / phi_x.sum() 83 | 84 | # Conv1d weights 85 | groups = inputs.shape[-1] 86 | weights = torch.tile(phi_x, (groups, 1, 1)) 87 | inputs = inputs.transpose(-1, -2) 88 | outputs = F.conv1d(inputs, weights, padding="same", 89 | groups=groups).transpose(-1, -2) 90 | 91 | return outputs.reshape(_inputs.shape) 92 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/joints2rots/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tma.utils.joints import mmm_joints, smplh2mmm_indexes 3 | 4 | # Map joints Name to SMPL joints idx 5 | JOINT_MAP = { 6 | 'MidHip': 0, 7 | 'LHip': 1, 8 | 'LKnee': 4, 9 | 'LAnkle': 7, 10 | 'LFoot': 10, 11 | 'RHip': 2, 12 | 'RKnee': 5, 13 | 'RAnkle': 8, 14 | 'RFoot': 11, 15 | 'LShoulder': 16, 16 | 'LElbow': 18, 17 | 'LWrist': 20, 18 | 'LHand': 22, 19 | 'RShoulder': 17, 20 | 'RElbow': 19, 21 | 'RWrist': 21, 22 | 'RHand': 23, 23 | 'spine1': 3, 24 | 'spine2': 6, 25 | 'spine3': 9, 26 | 'Neck': 12, 27 | 'Head': 15, 28 | 'LCollar': 13, 29 | 'Rcollar': 14, 30 | 'Nose': 24, 31 | 'REye': 26, 32 | 'LEye': 26, 33 | 'REar': 27, 34 | 'LEar': 28, 35 | 'LHeel': 31, 36 | 'RHeel': 34, 37 | 'OP RShoulder': 17, 38 | 'OP LShoulder': 16, 39 | 'OP RHip': 2, 40 | 'OP LHip': 1, 41 | 'OP Neck': 12, 42 | } 43 | 44 | mmm2smpl_correspondence = { 45 | "root": "MidHip", 46 | "BP": "spine1", 47 | "BT": "spine3", 48 | "BLN": "Neck", 49 | "BUN": "Head", 50 | "LS": "LShoulder", 51 | "LE": "LElbow", 52 | "LW": "LWrist", 53 | "RS": "RShoulder", 54 | "RE": "RElbow", 55 | "RW": "RWrist", 56 | "LH": "LHip", 57 | "LK": "LKnee", 58 | "LA": "LAnkle", 59 | "LMrot": "LHeel", 60 | "LF": "LFoot", 61 | "RH": "RHip", 62 | "RK": "RKnee", 63 | "RA": "RAnkle", 64 | "RMrot": "RHeel", 65 | "RF": "RFoot" 66 | } 67 | 68 | full_smpl_idx = range(24) 69 | key_smpl_idx = [0, 1, 4, 7, 2, 5, 8, 17, 19, 21, 16, 18, 20] 70 | 71 | AMASS_JOINT_MAP = { 72 | 'MidHip': 0, 73 | 'LHip': 1, 74 | 'LKnee': 4, 75 | 'LAnkle': 7, 76 | 'LFoot': 10, 77 | 'RHip': 2, 78 | 'RKnee': 5, 79 | 'RAnkle': 8, 80 | 'RFoot': 11, 81 | 'LShoulder': 16, 82 | 'LElbow': 18, 83 | 'LWrist': 20, 84 | 'RShoulder': 17, 85 | 'RElbow': 19, 86 | 'RWrist': 21, 87 | 'spine1': 3, 88 | 'spine2': 6, 89 | 'spine3': 9, 90 | 'Neck': 12, 91 | 'Head': 15, 92 | 'LCollar': 13, 93 | 'Rcollar': 14, 94 | } 95 | amass_idx = range(22) 96 | amass_smpl_idx = range(22) 97 | 98 | # cal mmm in smpl index 99 | smpl2mmm_correspondence = { 100 | val: key 101 | for key, val in mmm2smpl_correspondence.items() 102 | } 103 | smpl2mmm_indexes = [JOINT_MAP[mmm2smpl_correspondence[x]] for x in mmm_joints] 104 | 105 | # cal mmm joints map 106 | MMM_JOINT_MAP = { 107 | val: JOINT_MAP[val] 108 | for key, val in mmm2smpl_correspondence.items() 109 | } 110 | 111 | # mmm_idx = range(21) 112 | # mmm_smpl_dix = smpl2mmm_indexes 113 | # mmm_smpl_dix = smplh2mmm_indexes 114 | # todo - configable 115 | SMPL_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/" 116 | GMM_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/" 117 | SMPL_MEAN_FILE = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/neutral_smpl_mean_params.h5" 118 | # for collsion 119 | Part_Seg_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/smplx_parts_segm.pkl" 120 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/rotation2xyz.py: -------------------------------------------------------------------------------- 1 | # This code is based on https://github.com/Mathux/ACTOR.git 2 | import torch 3 | import tma.utils.rotation_conversions as geometry 4 | 5 | from .smpl import SMPL, JOINTSTYPE_ROOT 6 | JOINTSTYPES = ["a2m", "a2mpl", "smpl", "vibe", "vertices"] 7 | 8 | 9 | class Rotation2xyz(torch.nn.Module): 10 | 11 | def __init__(self, smpl_path): 12 | super().__init__() 13 | self.smpl_model = SMPL(smpl_path).eval() 14 | 15 | def __call__(self, 16 | x, 17 | mask, 18 | pose_rep, 19 | translation, 20 | glob, 21 | jointstype, 22 | vertstrans, 23 | betas=None, 24 | beta=0, 25 | glob_rot=None, 26 | get_rotations_back=False, 27 | **kwargs): 28 | if pose_rep == "xyz": 29 | return x 30 | 31 | if mask is None: 32 | mask = torch.ones((x.shape[0], x.shape[-1]), 33 | dtype=bool, 34 | device=x.device) 35 | 36 | if not glob and glob_rot is None: 37 | raise TypeError( 38 | "You must specify global rotation if glob is False") 39 | 40 | if jointstype not in JOINTSTYPES: 41 | raise NotImplementedError("This jointstype is not implemented.") 42 | 43 | if translation: 44 | x_translations = x[:, -1, :3] 45 | x_rotations = x[:, :-1] 46 | else: 47 | x_rotations = x 48 | 49 | x_rotations = x_rotations.permute(0, 3, 1, 2) 50 | nsamples, time, njoints, feats = x_rotations.shape 51 | 52 | # Compute rotations (convert only masked sequences output) 53 | if pose_rep == "rotvec": 54 | rotations = geometry.axis_angle_to_matrix(x_rotations[mask]) 55 | elif pose_rep == "rotmat": 56 | rotations = x_rotations[mask].view(-1, njoints, 3, 3) 57 | elif pose_rep == "rotquat": 58 | rotations = geometry.quaternion_to_matrix(x_rotations[mask]) 59 | elif pose_rep == "rot6d": 60 | rotations = geometry.rotation_6d_to_matrix(x_rotations[mask]) 61 | else: 62 | raise NotImplementedError("No geometry for this one.") 63 | 64 | if not glob: 65 | global_orient = torch.tensor(glob_rot, device=x.device) 66 | global_orient = geometry.axis_angle_to_matrix(global_orient).view( 67 | 1, 1, 3, 3) 68 | global_orient = global_orient.repeat(len(rotations), 1, 1, 1) 69 | else: 70 | global_orient = rotations[:, 0] 71 | rotations = rotations[:, 1:] 72 | 73 | if betas is None: 74 | betas = torch.zeros( 75 | [rotations.shape[0], self.smpl_model.num_betas], 76 | dtype=rotations.dtype, 77 | device=rotations.device) 78 | betas[:, 1] = beta 79 | # import ipdb; ipdb.set_trace() 80 | out = self.smpl_model(body_pose=rotations, 81 | global_orient=global_orient, 82 | betas=betas) 83 | 84 | # get the desirable joints 85 | joints = out[jointstype] 86 | 87 | x_xyz = torch.empty(nsamples, 88 | time, 89 | joints.shape[1], 90 | 3, 91 | device=x.device, 92 | dtype=x.dtype) 93 | x_xyz[~mask] = 0 94 | x_xyz[mask] = joints 95 | 96 | x_xyz = x_xyz.permute(0, 2, 3, 1).contiguous() 97 | 98 | # the first translation root at the origin on the prediction 99 | if jointstype != "vertices": 100 | rootindex = JOINTSTYPE_ROOT[jointstype] 101 | x_xyz = x_xyz - x_xyz[:, [rootindex], :, :] 102 | 103 | if translation and vertstrans: 104 | # the first translation root at the origin 105 | x_translations = x_translations - x_translations[:, :, [0]] 106 | 107 | # add the translation to all the joints 108 | x_xyz = x_xyz + x_translations[:, None, :, :] 109 | 110 | if get_rotations_back: 111 | return x_xyz, rotations, global_orient 112 | else: 113 | return x_xyz 114 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/rots2joints/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Rots2Joints 2 | from .smplh import SMPLH 3 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/rots2joints/base.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch import Tensor, nn 5 | from pathlib import Path 6 | 7 | 8 | class Rots2Joints(nn.Module): 9 | def __init__(self, path: Optional[str] = None, 10 | normalization: bool = False, 11 | eps: float = 1e-12, 12 | **kwargs) -> None: 13 | if normalization and path is None: 14 | raise TypeError( 15 | "You should provide a path if normalization is on.") 16 | 17 | super().__init__() 18 | self.normalization = normalization 19 | self.eps = eps 20 | 21 | if normalization: 22 | mean_path = Path(path) / "mean.pt" 23 | std_path = Path(path) / "std.pt" 24 | self.register_buffer('mean', torch.load(mean_path)) 25 | self.register_buffer('std', torch.load(std_path)) 26 | 27 | def normalize(self, features: Tensor) -> Tensor: 28 | if self.normalization: 29 | features = (features - self.mean)/(self.std + self.eps) 30 | return features 31 | 32 | def unnormalize(self, features: Tensor) -> Tensor: 33 | if self.normalization: 34 | features = features * self.std + self.mean 35 | return features 36 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/rots2rfeats/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Rots2Rfeats 2 | from .smplvelp import SMPLVelP 3 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/rots2rfeats/base.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch import Tensor, nn 5 | from pathlib import Path 6 | 7 | 8 | class Rots2Rfeats(nn.Module): 9 | def __init__(self, path: Optional[str] = None, 10 | normalization: bool = False, 11 | eps: float = 1e-12, 12 | **kwargs) -> None: 13 | if normalization and path is None: 14 | raise TypeError( 15 | "You should provide a path if normalization is on.") 16 | 17 | super().__init__() 18 | self.normalization = normalization 19 | self.eps = eps 20 | 21 | if normalization: 22 | mean_path = Path(path) / "rfeats_mean.pt" 23 | std_path = Path(path) / "rfeats_std.pt" 24 | self.register_buffer('mean', torch.load(mean_path)) 25 | self.register_buffer('std', torch.load(std_path)) 26 | 27 | def normalize(self, features: Tensor) -> Tensor: 28 | if self.normalization: 29 | features = (features - self.mean)/(self.std + self.eps) 30 | return features 31 | 32 | def unnormalize(self, features: Tensor) -> Tensor: 33 | if self.normalization: 34 | features = features * self.std + self.mean 35 | return features 36 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/rots2rfeats/smplvelp.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | from einops import rearrange 6 | 7 | from tma.utils.temos_utils import matrix_to, nfeats_of, to_matrix 8 | import tma.utils.geometry as geometry 9 | 10 | from .base import Rots2Rfeats 11 | 12 | 13 | class SMPLVelP(Rots2Rfeats): 14 | 15 | def __init__(self, 16 | path: Optional[str] = None, 17 | normalization: bool = False, 18 | pose_rep: str = "rot6d", 19 | canonicalize: bool = False, 20 | offset: bool = True, 21 | **kwargs) -> None: 22 | super().__init__(path=path, normalization=normalization) 23 | self.canonicalize = canonicalize 24 | self.pose_rep = pose_rep 25 | self.nfeats = nfeats_of(pose_rep) 26 | self.offset = offset 27 | 28 | def forward(self, data) -> Tensor: 29 | matrix_poses, trans = data.rots, data.trans 30 | # matrix_poses: [nframes, 22, 3, 3] 31 | 32 | # extract the root gravity axis 33 | # for smpl it is the last coordinate 34 | root_y = trans[..., 2] 35 | trajectory = trans[..., [0, 1]] 36 | 37 | # Comoute the difference of trajectory (for X and Y axis) 38 | vel_trajectory = torch.diff(trajectory, dim=-2) 39 | # 0 for the first one => keep the dimentionality 40 | vel_trajectory = torch.cat( 41 | (0 * vel_trajectory[..., [0], :], vel_trajectory), dim=-2) 42 | 43 | # first normalize the data 44 | if self.canonicalize: 45 | global_orient = matrix_poses[..., 0, :, :] 46 | # remove the rotation 47 | rot2d = geometry.matrix_to_axis_angle(global_orient[..., 0, :, :]) 48 | # Remove the fist rotation along the vertical axis 49 | # construct this by extract only the vertical component of the rotation 50 | rot2d[..., :2] = 0 51 | 52 | if self.offset: 53 | # add a bit more rotation 54 | rot2d[..., 2] += torch.pi / 2 55 | 56 | rot2d = geometry.axis_angle_to_matrix(rot2d) 57 | 58 | # turn with the same amount all the rotations 59 | global_orient = torch.einsum("...kj,...kl->...jl", rot2d, 60 | global_orient) 61 | 62 | matrix_poses = torch.cat( 63 | (global_orient[..., None, :, :], matrix_poses[..., 1:, :, :]), 64 | dim=-3) 65 | 66 | # Turn the trajectory as well 67 | vel_trajectory = torch.einsum("...kj,...lk->...lj", 68 | rot2d[..., :2, :2], vel_trajectory) 69 | 70 | poses = matrix_to(self.pose_rep, matrix_poses) 71 | features = torch.cat( 72 | (root_y[..., None], vel_trajectory, 73 | rearrange(poses, "... joints rot -> ... (joints rot)")), 74 | dim=-1) 75 | features = self.normalize(features) 76 | return features 77 | 78 | def extract(self, features): 79 | root_y = features[..., 0] 80 | vel_trajectory = features[..., 1:3] 81 | poses_features = features[..., 3:] 82 | poses = rearrange(poses_features, 83 | "... (joints rot) -> ... joints rot", 84 | rot=self.nfeats) 85 | return root_y, vel_trajectory, poses 86 | 87 | def inverse(self, features): 88 | features = self.unnormalize(features) 89 | root_y, vel_trajectory, poses = self.extract(features) 90 | 91 | # integrate the trajectory 92 | trajectory = torch.cumsum(vel_trajectory, dim=-2) 93 | # First frame should be 0, but if infered it is better to ensure it 94 | trajectory = trajectory - trajectory[..., [0], :] 95 | 96 | # Get back the translation 97 | trans = torch.cat([trajectory, root_y[..., None]], dim=-1) 98 | matrix_poses = to_matrix(self.pose_rep, poses) 99 | 100 | from temos.transforms.smpl import RotTransDatastruct 101 | return RotTransDatastruct(rots=matrix_poses, trans=trans) 102 | -------------------------------------------------------------------------------- /OpenTMA/tma/transforms/xyz.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from torch import Tensor 3 | 4 | from .base import Datastruct, dataclass, Transform 5 | from tma.datasets.utils import collate_tensor_with_padding 6 | 7 | from .joints2jfeats import Joints2Jfeats 8 | 9 | 10 | class XYZTransform(Transform): 11 | 12 | def __init__(self, joints2jfeats: Joints2Jfeats, **kwargs): 13 | self.joints2jfeats = joints2jfeats 14 | 15 | def Datastruct(self, **kwargs): 16 | return XYZDatastruct(_joints2jfeats=self.joints2jfeats, 17 | transforms=self, 18 | **kwargs) 19 | 20 | def __repr__(self): 21 | return "XYZTransform()" 22 | 23 | 24 | @dataclass 25 | class XYZDatastruct(Datastruct): 26 | transforms: XYZTransform 27 | _joints2jfeats: Joints2Jfeats 28 | 29 | features: Optional[Tensor] = None 30 | joints_: Optional[Tensor] = None 31 | jfeats_: Optional[Tensor] = None 32 | 33 | def __post_init__(self): 34 | self.datakeys = ["features", "joints_", "jfeats_"] 35 | # starting point 36 | if self.features is not None and self.jfeats_ is None: 37 | self.jfeats_ = self.features 38 | 39 | @property 40 | def joints(self): 41 | # Cached value 42 | if self.joints_ is not None: 43 | return self.joints_ 44 | 45 | # self.jfeats_ should be defined 46 | assert self.jfeats_ is not None 47 | 48 | self._joints2jfeats.to(self.jfeats.device) 49 | self.joints_ = self._joints2jfeats.inverse(self.jfeats) 50 | return self.joints_ 51 | 52 | @property 53 | def jfeats(self): 54 | # Cached value 55 | if self.jfeats_ is not None: 56 | return self.jfeats_ 57 | 58 | # self.joints_ should be defined 59 | assert self.joints_ is not None 60 | 61 | self._joints2jfeats.to(self.joints.device) 62 | self.jfeats_ = self._joints2jfeats(self.joints) 63 | return self.jfeats_ 64 | 65 | def __len__(self): 66 | return len(self.jfeats) 67 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/utils/__init__.py -------------------------------------------------------------------------------- /OpenTMA/tma/utils/demo_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | 5 | # load example data 6 | def load_example_input(txt_path): 7 | """ 8 | Parameters: 9 | txt_path (str): The path to the text file. 10 | 11 | Returns: 12 | texts (list): The list of text strings. 13 | lens (list): The list of lengths of the text strings. 14 | """ 15 | 16 | file = open(txt_path, "r") 17 | Lines = file.readlines() 18 | count = 0 19 | texts, lens = [], [] 20 | 21 | # Strips the newline character 22 | for line in Lines: 23 | count += 1 24 | 25 | # Strip the newline character from the line and split it into length and text 26 | s = line.strip() 27 | s_l = s.split(" ")[0] 28 | s_t = s[(len(s_l) + 1):] 29 | 30 | # Append the length and text to the respective lists 31 | lens.append(int(s_l)) 32 | texts.append(s_t) 33 | print("Length-{}: {}".format(s_l, s_t)) 34 | return texts, lens 35 | 36 | 37 | # render batch 38 | def render_batch(npy_dir, execute_python="./scripts/visualize_motion.sh", mode="sequence"): 39 | """ 40 | Parameters: 41 | npy_dir (str): The directory containing the npy files. 42 | execute_python (str): The path to the Python script to execute. Default is "./scripts/visualize_motion.sh". 43 | mode (str): The mode for rendering. Default is "sequence". 44 | """ 45 | # Execute the Python script with the directory and mode as arguments 46 | os.system(f"{execute_python} {npy_dir} {mode}") 47 | 48 | 49 | # render 50 | def render(execute_python, npy_path, jointtype, cfg_path): 51 | """ 52 | Parameters: 53 | execute_python (str): The path to the Python script to execute. 54 | npy_path (str): The path to the npy file. 55 | jointtype (str): The type of joints for the skeleton. 56 | cfg_path (str): The path to the configuration file. 57 | 58 | Returns: 59 | fig_path (Path): The path to the rendered figure. 60 | """ 61 | 62 | export_scripts = "render.py" 63 | 64 | os.system( 65 | f"{execute_python} --background --python {export_scripts} -- --cfg={cfg_path} --npy={npy_path} --joint_type={jointtype}" 66 | ) 67 | 68 | # Define the path to the rendered figure and return it 69 | fig_path = Path(str(npy_path).replace(".npy", ".png")) 70 | return fig_path 71 | 72 | 73 | # origin render 74 | def export_fbx_hand(pkl_path): 75 | """ 76 | Parameters: 77 | pkl_path (str): The path to the .pkl file. 78 | 79 | Returns: 80 | None 81 | """ 82 | _input = pkl_path 83 | output = pkl_path.replace(".pkl", ".fbx") 84 | 85 | execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender" 86 | export_scripts = "./scripts/fbx_output_smplx.py" 87 | os.system( 88 | f"{execute_python} -noaudio --background --python {export_scripts}\ 89 | --input {_input} \ 90 | --output {output}" 91 | ) 92 | 93 | 94 | # export fbx without hand params from pkl files 95 | def export_fbx(pkl_path): 96 | """ 97 | Parameters: 98 | pkl_path (str): The path to the .pkl file. 99 | 100 | Returns: 101 | None 102 | """ 103 | _input = pkl_path 104 | output = pkl_path.replace(".pkl", ".fbx") 105 | 106 | execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender" 107 | export_scripts = "./scripts/fbx_output.py" 108 | os.system( 109 | f"{execute_python} -noaudio --background --python {export_scripts}\ 110 | --input {_input} \ 111 | --output {output}" 112 | ) 113 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/easyconvert.py: -------------------------------------------------------------------------------- 1 | import tma.utils.geometry as geometry 2 | 3 | 4 | # This function returns the number of features for a given rotation type. 5 | def nfeats_of(rottype): 6 | """ 7 | Parameters: 8 | rottype (str): The type of rotation. 9 | 10 | Returns: 11 | int: The number of features for the rotation type. 12 | """ 13 | if rottype in ["rotvec", "axisangle"]: 14 | return 3 15 | elif rottype in ["rotquat", "quaternion"]: 16 | return 4 17 | elif rottype in ["rot6d", "6drot", "rotation6d"]: 18 | return 6 19 | elif rottype in ["rotmat"]: 20 | return 9 21 | else: 22 | return TypeError("This rotation type doesn't have features.") 23 | 24 | # This function converts axis-angle rotations to another rotation type. 25 | 26 | 27 | def axis_angle_to(newtype, rotations): 28 | """ 29 | Parameters: 30 | newtype (str): The new type of rotation. 31 | rotations (np.array): The axis-angle rotations. 32 | 33 | Returns: 34 | np.array: The rotations converted to the new type. 35 | """ 36 | if newtype in ["matrix"]: 37 | rotations = geometry.axis_angle_to_matrix(rotations) 38 | return rotations 39 | elif newtype in ["rotmat"]: 40 | rotations = geometry.axis_angle_to_matrix(rotations) 41 | rotations = matrix_to("rotmat", rotations) 42 | return rotations 43 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 44 | rotations = geometry.axis_angle_to_matrix(rotations) 45 | rotations = matrix_to("rot6d", rotations) 46 | return rotations 47 | elif newtype in ["rotquat", "quaternion"]: 48 | rotations = geometry.axis_angle_to_quaternion(rotations) 49 | return rotations 50 | elif newtype in ["rotvec", "axisangle"]: 51 | return rotations 52 | else: 53 | raise NotImplementedError 54 | 55 | # This function converts matrix rotations to another rotation type. 56 | 57 | 58 | def matrix_to(newtype, rotations): 59 | """ 60 | Parameters: 61 | newtype (str): The new type of rotation. 62 | rotations (np.array): The matrix rotations. 63 | 64 | Returns: 65 | np.array: The rotations converted to the new type. 66 | """ 67 | if newtype in ["matrix"]: 68 | return rotations 69 | if newtype in ["rotmat"]: 70 | rotations = rotations.reshape((*rotations.shape[:-2], 9)) 71 | return rotations 72 | elif newtype in ["rot6d", "6drot", "rotation6d"]: 73 | rotations = geometry.matrix_to_rotation_6d(rotations) 74 | return rotations 75 | elif newtype in ["rotquat", "quaternion"]: 76 | rotations = geometry.matrix_to_quaternion(rotations) 77 | return rotations 78 | elif newtype in ["rotvec", "axisangle"]: 79 | rotations = geometry.matrix_to_axis_angle(rotations) 80 | return rotations 81 | else: 82 | raise NotImplementedError 83 | 84 | # This function converts rotations of a given type to a matrix. 85 | 86 | 87 | def to_matrix(oldtype, rotations): 88 | """ 89 | Parameters: 90 | oldtype (str): The old type of rotation. 91 | rotations (np.array): The rotations. 92 | 93 | Returns: 94 | np.array: The rotations converted to a matrix. 95 | """ 96 | if oldtype in ["matrix"]: 97 | return rotations 98 | if oldtype in ["rotmat"]: 99 | rotations = rotations.reshape((*rotations.shape[:-2], 3, 3)) 100 | return rotations 101 | elif oldtype in ["rot6d", "6drot", "rotation6d"]: 102 | rotations = geometry.rotation_6d_to_matrix(rotations) 103 | return rotations 104 | elif oldtype in ["rotquat", "quaternion"]: 105 | rotations = geometry.quaternion_to_matrix(rotations) 106 | return rotations 107 | elif oldtype in ["rotvec", "axisangle"]: 108 | rotations = geometry.axis_angle_to_matrix(rotations) 109 | return rotations 110 | else: 111 | raise NotImplementedError 112 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/fixseed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import random 4 | 5 | 6 | def fixseed(seed): 7 | random.seed(seed) 8 | np.random.seed(seed) 9 | torch.manual_seed(seed) 10 | 11 | 12 | SEED = 10 13 | EVALSEED = 0 14 | # Provoc warning: not fully functionnal yet 15 | # torch.set_deterministic(True) 16 | torch.backends.cudnn.benchmark = False 17 | 18 | fixseed(SEED) 19 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/logger.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | import time 4 | import logging 5 | from omegaconf import OmegaConf 6 | from pytorch_lightning.utilities.rank_zero import rank_zero_only 7 | 8 | 9 | def create_logger(cfg, phase='train'): 10 | """ 11 | Creates a logger for logging the training or testing process. 12 | 13 | Args: 14 | cfg (OmegaConf): The configuration object containing all the necessary parameters. 15 | phase (str, optional): The phase of the process. Defaults to 'train'. 16 | 17 | Returns: 18 | logger (logging.Logger): The logger object for logging the process. 19 | """ 20 | # root dir set by cfg 21 | root_output_dir = Path(cfg.FOLDER) 22 | # set up logger 23 | if not root_output_dir.exists(): 24 | print('=> creating {}'.format(root_output_dir)) 25 | root_output_dir.mkdir() 26 | 27 | cfg_name = cfg.NAME 28 | model = cfg.model.model_type 29 | cfg_name = os.path.basename(cfg_name).split('.')[0] 30 | 31 | final_output_dir = root_output_dir / model / cfg_name 32 | cfg.FOLDER_EXP = str(final_output_dir) 33 | 34 | time_str = time.strftime('%Y-%m-%d-%H-%M-%S') 35 | 36 | new_dir(cfg, phase, time_str, final_output_dir) 37 | 38 | head = '%(asctime)-15s %(message)s' 39 | logger = config_logger(final_output_dir, time_str, phase, head) 40 | if logger is None: 41 | logger = logging.getLogger() 42 | logger.setLevel(logging.CRITICAL) 43 | logging.basicConfig(format=head) 44 | return logger 45 | 46 | 47 | @rank_zero_only 48 | def config_logger(final_output_dir, time_str, phase, head): 49 | """ 50 | Configures the logger for logging the training or testing process. 51 | 52 | Args: 53 | final_output_dir (Path): The final output directory where the log file will be saved. 54 | time_str (str): The current time in string format. 55 | phase (str): The phase of the process. 56 | head (str): The format of the log message. 57 | 58 | Returns: 59 | logger (logging.Logger): The logger object for logging the process. 60 | """ 61 | log_file = '{}_{}_{}.log'.format('log', time_str, phase) 62 | final_log_file = final_output_dir / log_file 63 | logging.basicConfig(filename=str(final_log_file)) 64 | logger = logging.getLogger() 65 | logger.setLevel(logging.INFO) 66 | console = logging.StreamHandler() 67 | formatter = logging.Formatter(head) 68 | console.setFormatter(formatter) 69 | logging.getLogger('').addHandler(console) 70 | file_handler = logging.FileHandler(final_log_file, 'w') 71 | file_handler.setFormatter(logging.Formatter(head)) 72 | file_handler.setLevel(logging.INFO) 73 | logging.getLogger('').addHandler(file_handler) 74 | return logger 75 | 76 | 77 | @rank_zero_only 78 | def new_dir(cfg, phase, time_str, final_output_dir): 79 | """ 80 | Creates a new directory for the experiment and saves the configuration file. 81 | 82 | Args: 83 | cfg (OmegaConf): The configuration object containing all the necessary parameters. 84 | phase (str): The phase of the process. 85 | time_str (str): The current time in string format. 86 | final_output_dir (Path): The final output directory where the log file will be saved. 87 | """ 88 | # new experiment folder 89 | cfg.TIME = str(time_str) 90 | if os.path.exists( 91 | final_output_dir) and cfg.TRAIN.RESUME is None and not cfg.DEBUG: 92 | file_list = sorted(os.listdir(final_output_dir), reverse=True) 93 | for item in file_list: 94 | if item.endswith('.log'): 95 | os.rename(str(final_output_dir), 96 | str(final_output_dir) + '_' + cfg.TIME) 97 | break 98 | final_output_dir.mkdir(parents=True, exist_ok=True) 99 | # write config yaml 100 | config_file = '{}_{}_{}.yaml'.format('config', time_str, phase) 101 | final_config_file = final_output_dir / config_file 102 | OmegaConf.save(config=cfg, f=final_config_file) 103 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/misc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def to_numpy(tensor): 5 | """ 6 | Converts a PyTorch tensor to a numpy array. 7 | 8 | Args: 9 | tensor (torch.Tensor): The PyTorch tensor to convert. 10 | 11 | Returns: 12 | ndarray (numpy.ndarray): The converted numpy array. 13 | 14 | Raises: 15 | """ 16 | if torch.is_tensor(tensor): 17 | return tensor.cpu().numpy() 18 | elif type(tensor).__module__ != 'numpy': 19 | raise ValueError("Cannot convert {} to numpy array".format( 20 | type(tensor))) 21 | return tensor 22 | 23 | 24 | def to_torch(ndarray): 25 | """ 26 | Converts a numpy array to a PyTorch tensor. 27 | 28 | Args: 29 | ndarray (numpy.ndarray): The numpy array to convert. 30 | 31 | Returns: 32 | tensor (torch.Tensor): The converted PyTorch tensor. 33 | 34 | Raises: 35 | ValueError: If the input is not a numpy array. 36 | """ 37 | if type(ndarray).__module__ == 'numpy': 38 | return torch.from_numpy(ndarray) 39 | elif not torch.is_tensor(ndarray): 40 | raise ValueError("Cannot convert {} to torch tensor".format( 41 | type(ndarray))) 42 | return ndarray 43 | 44 | 45 | def cleanexit(): 46 | """ 47 | Exits the program cleanly by handling the SystemExit exception. 48 | 49 | No input arguments or return values. 50 | """ 51 | import sys 52 | import os 53 | try: 54 | sys.exit(0) 55 | except SystemExit: 56 | os._exit(0) 57 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/sample_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | logger = logging.getLogger(__name__) 4 | 5 | 6 | def cfg_mean_nsamples_resolution(cfg): 7 | """ 8 | Resolves the number of samples based on the configuration. 9 | 10 | Args: 11 | cfg: The configuration object containing the parameters 'mean' and 'number_of_samples'. 12 | 13 | Returns: 14 | bool: True if the number of samples is 1, False otherwise. 15 | 16 | Side Effects: 17 | If 'mean' is True and 'number_of_samples' is more than 1, it logs an error and sets 'number_of_samples' to 1. 18 | """ 19 | if cfg.mean and cfg.number_of_samples > 1: 20 | logger.error( 21 | "All the samples will be the mean.. cfg.number_of_samples=1 will be forced.") 22 | cfg.number_of_samples = 1 23 | 24 | return cfg.number_of_samples == 1 25 | 26 | 27 | def get_path(sample_path: Path, is_amass: bool, gender: str, split: str, onesample: bool, mean: bool, fact: float): 28 | """ 29 | Constructs a path based on the provided parameters. 30 | 31 | Args: 32 | sample_path (Path): The base path for the sample. 33 | is_amass (bool): A flag indicating whether the sample is from AMASS. 34 | gender (str): The gender of the sample. 35 | split (str): The split of the sample (e.g., 'train', 'test'). 36 | onesample (bool): A flag indicating whether there is only one sample. 37 | mean (bool): A flag indicating whether the sample is a mean sample. 38 | fact (float): A factor to be included in the path. 39 | 40 | Returns: 41 | path (Path): The constructed path. 42 | """ 43 | extra_str = ("_mean" if mean else "") if onesample else "_multi" 44 | fact_str = "" if fact == 1 else f"{fact}_" 45 | gender_str = gender + "_" if is_amass else "" 46 | path = sample_path / f"{fact_str}{gender_str}{split}{extra_str}" 47 | return path 48 | -------------------------------------------------------------------------------- /OpenTMA/tma/utils/tensors.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def lengths_to_mask(lengths): 5 | """ 6 | Converts lengths to a mask tensor. 7 | 8 | Args: 9 | lengths (Tensor): A tensor of lengths. 10 | 11 | Returns: 12 | Tensor: A tensor mask of shape (len(lengths), max_len). 13 | """ 14 | max_len = max(lengths) 15 | mask = torch.arange(max_len, device=lengths.device).expand( 16 | len(lengths), max_len) < lengths.unsqueeze(1) 17 | return mask 18 | 19 | 20 | def collate_tensors(batch): 21 | """ 22 | Collates a batch of tensors by padding them to the same size. 23 | 24 | Args: 25 | batch (List[Tensor]): A list of tensors. 26 | 27 | Returns: 28 | Tensor: A tensor of shape (len(batch), max_size). 29 | """ 30 | dims = batch[0].dim() 31 | max_size = [max([b.size(i) for b in batch]) for i in range(dims)] 32 | size = (len(batch),) + tuple(max_size) 33 | canvas = batch[0].new_zeros(size=size) 34 | for i, b in enumerate(batch): 35 | sub_tensor = canvas[i] 36 | for d in range(dims): 37 | sub_tensor = sub_tensor.narrow(d, 0, b.size(d)) 38 | sub_tensor.add_(b) 39 | return canvas 40 | 41 | 42 | def collate(batch): 43 | """ 44 | Collates a batch of data and labels, and generates a mask tensor. 45 | 46 | Args: 47 | batch (List[Tuple[Tensor, Tensor]]): A list of tuples, each containing a tensor of data and a tensor of labels. 48 | 49 | Returns: 50 | dict: A dictionary containing the collated data, labels, mask, and lengths. 51 | """ 52 | databatch = [b[0] for b in batch] 53 | labelbatch = [b[1] for b in batch] 54 | lenbatch = [len(b[0][0][0]) for b in batch] 55 | 56 | databatchTensor = collate_tensors(databatch) 57 | labelbatchTensor = torch.as_tensor(labelbatch) 58 | lenbatchTensor = torch.as_tensor(lenbatch) 59 | 60 | maskbatchTensor = lengths_to_mask(lenbatchTensor) 61 | 62 | batch = {"x": databatchTensor, "y": labelbatchTensor, 63 | "mask": maskbatchTensor, 'lengths': lenbatchTensor} 64 | return batch 65 | 66 | 67 | # slow version with padding 68 | def collate_data3d_slow(batch): 69 | """ 70 | Collates a batch of 3D data by padding them to the same size. 71 | 72 | Args: 73 | batch (List[dict]): A list of dictionaries, each containing a tensor of 3D data. 74 | 75 | Returns: 76 | dict: A dictionary containing the collated 3D data. 77 | """ 78 | batchTensor = {} 79 | for key in batch[0].keys(): 80 | databatch = [b[key] for b in batch] 81 | batchTensor[key] = collate_tensors(databatch) 82 | 83 | batch = batchTensor 84 | return batch 85 | 86 | 87 | def collate_data3d(batch): 88 | """ 89 | Collates a batch of 3D data by stacking them along a new dimension. 90 | 91 | Args: 92 | batch (List[dict]): A list of dictionaries, each containing a tensor of 3D data. 93 | 94 | Returns: 95 | dict: A dictionary containing the collated 3D data. 96 | """ 97 | batchTensor = {} 98 | for key in batch[0].keys(): 99 | databatch = [b[key] for b in batch] 100 | if key == "paths": 101 | batchTensor[key] = databatch 102 | else: 103 | batchTensor[key] = torch.stack(databatch, axis=0) 104 | 105 | batch = batchTensor 106 | return batch 107 | -------------------------------------------------------------------------------- /OpenTMA/tmp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | path = "/comp_robot/chenlinghao/OpenTMR/experiments/temos/debug--H3D-TMR-release-2/embeddings/val/epoch_99/motion_embedding.npy" 4 | 5 | motion_embedding = np.load(path) 6 | print(motion_embedding.shape) 7 | 8 | # find the nearest neighbor of 0 index motion 9 | distances = np.linalg.norm(motion_embedding - motion_embedding[0], axis=1) 10 | print(distances, len(distances)) 11 | 12 | # find index and the distance of the nearest 4 neighbor 13 | print(np.argsort(distances)) 14 | print(np.sort(distances)) 15 | 16 | # print(motion_embedding[3688]) 17 | # print(motion_embedding[0]) 18 | -------------------------------------------------------------------------------- /OpenTMA/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J X-TMR 3 | #SBATCH -p cvr 4 | #SBATCH -N 1 5 | #SBATCH --cpus-per-task=18 6 | #SBATCH --gres=gpu:hgx:4 7 | #SBATCH --mem 300GB 8 | #SBATCH --qos=preemptive 9 | 10 | source activate temos 11 | 12 | # python -m train --cfg configs/configs_temos/MotionX-TMR.yaml --cfg_assets configs/assets.yaml --nodebug 13 | # python -m train --cfg configs/configs_temos/UniMocap-TMR.yaml --cfg_assets configs/assets.yaml --nodebug 14 | python -m train --cfg configs/configs_temos/H3D-TMR.yaml --cfg_assets configs/assets.yaml --nodebug 15 | 16 | 17 | # find ./ -type d -name "__pycache__" -exec rm -rf {} + -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HumanTOMATO: Text-aligned Whole-body Motion Generation 2 | 3 |  4 | 5 | [Shunlin Lu](https://shunlinlu.github.io)🍅 2, 3, [Ling-Hao Chen](https://lhchen.top)🍅 1, 2, [Ailing Zeng](https://ailingzeng.site)2, [Jing Lin](https://jinglin7.github.io)1, 2, [Ruimao Zhang](http://zhangruimao.site)3, [Lei Zhang](https://leizhang.org)2, and [Heung-Yeung Shum](https://scholar.google.com/citations?user=9akH-n8AAAAJ&hl=en)1, 2 6 | 7 | 🍅Co-first author. Listing order is random. 8 | 9 | 1Tsinghua University, 2International Digital Economy Academy (IDEA), 10 | 3School of Data Science, The Chinese University of Hong Kong, Shenzhen (CUHK-SZ) 11 | 12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
91 |
92 |
93 |
94 |
95 |
96 | If you have any question, please contact at: shunlinlu0803 [AT] gmail [DOT] com AND thu [DOT] lhchen [AT] gmail [DOT] com.
97 |
--------------------------------------------------------------------------------
/assets/highlight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/assets/highlight.png
--------------------------------------------------------------------------------
/assets/system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/assets/system.png
--------------------------------------------------------------------------------
/assets/tomato-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/assets/tomato-logo.png
--------------------------------------------------------------------------------
/src/tomato_represenation/README.md:
--------------------------------------------------------------------------------
1 | # 🍅 How to use tomato representation?
2 |
3 | This instruction is for creating the Motion Representation with the [Tomato](https://arxiv.org/pdf/2310.12978.pdf) format. The tomato format is extended from the [H3D](https://github.com/EricGuo5513/HumanML3D) format and is different from it. We name it `Tomato Representation` for convenience. For detailed ablation on motion representation design choice, please refer to Appendix B.1 in the [paper](https://arxiv.org/pdf/2310.12978.pdf).
4 |
5 | ## 🚀 Data Preparation
6 |
7 |
8 | Download SMPL+H, SMPLX, DMPLs.
10 |
11 | Download SMPL+H mode from [SMPL+H](https://mano.is.tue.mpg.de/download.php) (choose Extended SMPL+H model used in the AMASS project), DMPL model from [DMPL](https://smpl.is.tue.mpg.de/download.php) (choose DMPLs compatible with SMPL), and SMPL-X model from [SMPL-X](https://smpl-x.is.tue.mpg.de/download.php). Then, please place all the models under `./body_model/`. The `./body_model/` folder tree should be:
12 |
13 | ```bash
14 | ./body_models
15 | ├── dmpls
16 | │ ├── female
17 | │ │ └── model.npz
18 | │ ├── male
19 | │ │ └── model.npz
20 | │ └── neutral
21 | │ └── model.npz
22 | ├── smplh
23 | │ ├── female
24 | │ │ └── model.npz
25 | │ ├── info.txt
26 | │ ├── male
27 | │ │ └── model.npz
28 | │ └── neutral
29 | │ └── model.npz
30 | ├── smplx
31 | │ ├── female
32 | │ │ ├── model.npz
33 | │ │ └── model.pkl
34 | │ ├── male
35 | │ │ ├── model.npz
36 | │ │ └── model.pkl
37 | │ └── neutral
38 | │ ├── model.npz
39 | └───────└── model.pkl
40 | ```
41 |
42 | Download Motion-X datasets
47 |
48 | Please follow the instruction of [Motion-X](https://github.com/IDEA-Research/Motion-X) to download the SMPL-X data with the dimension of 322. Put the motion data in folder `./data/motion_data/smplx_322`.
49 |
50 |