├── .gitignore
├── LICENSE
├── OpenTMA
    ├── .gitignore
    ├── README.md
    ├── assets
    │   └── logo.png
    ├── configs
    │   ├── assets.yaml
    │   ├── base.yaml
    │   ├── configs_temos
    │   │   ├── H3D-TMR.yaml
    │   │   ├── MotionX-TMR.yaml
    │   │   ├── UniMocap-TMR.yaml
    │   │   └── infonce.yaml
    │   └── modules_temos
    │   │   ├── motiondecoder.yaml
    │   │   ├── motionencoder.yaml
    │   │   └── text_encoder.yaml
    ├── requirements.txt
    ├── retrieval.py
    ├── retrieval.sh
    ├── test.py
    ├── test_temos.py
    ├── tma
    │   ├── __init__.py
    │   ├── callback
    │   │   ├── __init__.py
    │   │   └── progress.py
    │   ├── config.py
    │   ├── data
    │   │   ├── HumanML3D.py
    │   │   ├── Humanact12.py
    │   │   ├── Kit.py
    │   │   ├── MotionX.py
    │   │   ├── Uestc.py
    │   │   ├── UniMocap.py
    │   │   ├── __init__.py
    │   │   ├── a2m
    │   │   │   ├── __init__.py
    │   │   │   ├── dataset.py
    │   │   │   ├── humanact12poses.py
    │   │   │   ├── tools.py
    │   │   │   ├── uestc.py
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── misc.py
    │   │   │   │   ├── rotation_conversions.py
    │   │   │   │   └── tensors.py
    │   │   ├── base.py
    │   │   ├── get_data.py
    │   │   ├── humanml
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── common
    │   │   │   │   ├── quaternion.py
    │   │   │   │   └── skeleton.py
    │   │   │   ├── data
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── dataset.py
    │   │   │   ├── dataset.py
    │   │   │   ├── scripts
    │   │   │   │   └── motion_process.py
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── get_opt.py
    │   │   │   │   ├── metrics.py
    │   │   │   │   ├── paramUtil.py
    │   │   │   │   ├── plot_script.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── word_vectorizer.py
    │   │   ├── sampling
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── framerate.py
    │   │   │   └── frames.py
    │   │   └── utils.py
    │   ├── launch
    │   │   ├── __init__.py
    │   │   ├── prepare.py
    │   │   └── tools.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── architectures
    │   │   │   ├── __init__.py
    │   │   │   ├── actor_vae.py
    │   │   │   ├── fc.py
    │   │   │   ├── humanact12_gru.py
    │   │   │   ├── t2m_motionenc.py
    │   │   │   ├── t2m_textenc.py
    │   │   │   ├── temos
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── motiondecoder
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── actor.py
    │   │   │   │   │   └── gru.py
    │   │   │   │   ├── motionencoder
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── actor.py
    │   │   │   │   │   └── gru.py
    │   │   │   │   └── textencoder
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── distillbert.py
    │   │   │   │   │   └── distillbert_actor.py
    │   │   │   ├── tools
    │   │   │   │   ├── embeddings.py
    │   │   │   │   └── transformer_layers.py
    │   │   │   ├── vision_transformer.py
    │   │   │   └── vposert_vae.py
    │   │   ├── body_skeleton
    │   │   │   ├── __init__.py
    │   │   │   ├── paramUtil.py
    │   │   │   ├── quaternion.py
    │   │   │   └── skeleton.py
    │   │   ├── get_model.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   ├── actor.py
    │   │   │   ├── infonce.py
    │   │   │   ├── kl.py
    │   │   │   ├── mld.py
    │   │   │   ├── temos.py
    │   │   │   ├── tmost.py
    │   │   │   ├── utils.py
    │   │   │   └── vqvae.py
    │   │   ├── metrics
    │   │   │   ├── __init__.py
    │   │   │   ├── compute.py
    │   │   │   ├── mm.py
    │   │   │   ├── retrieval_recall.py
    │   │   │   ├── tm2t.py
    │   │   │   ├── uncond.py
    │   │   │   └── utils.py
    │   │   ├── modeltype
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── smplx_layer.py
    │   │   │   └── temos.py
    │   │   ├── operator
    │   │   │   ├── __init__.py
    │   │   │   ├── adain.py
    │   │   │   ├── blocks.py
    │   │   │   ├── conv2d_gradfix.py
    │   │   │   ├── cross_attention.py
    │   │   │   ├── position_encoding.py
    │   │   │   ├── position_encoding_layer.py
    │   │   │   └── self_attention.py
    │   │   └── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── hessian_penalty.py
    │   │   │   └── tools.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   ├── geometry.py
    │   │   ├── logging.py
    │   │   └── runid.py
    │   ├── transforms
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── feats2smpl.py
    │   │   ├── identity.py
    │   │   ├── joints2jfeats
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── rifke.py
    │   │   │   └── tools.py
    │   │   ├── joints2rots
    │   │   │   ├── config.py
    │   │   │   ├── customloss.py
    │   │   │   ├── prior.py
    │   │   │   └── smplify.py
    │   │   ├── rotation2xyz.py
    │   │   ├── rots2joints
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   └── smplh.py
    │   │   ├── rots2rfeats
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   └── smplvelp.py
    │   │   ├── smpl.py
    │   │   └── xyz.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── demo_utils.py
    │   │   ├── easyconvert.py
    │   │   ├── fixseed.py
    │   │   ├── geometry.py
    │   │   ├── joints.py
    │   │   ├── logger.py
    │   │   ├── misc.py
    │   │   ├── rotation_conversions.py
    │   │   ├── sample_utils.py
    │   │   ├── temos_utils.py
    │   │   └── tensors.py
    ├── tmp.py
    ├── train.py
    └── train.sh
├── README.md
├── assets
    ├── highlight.png
    ├── system.png
    └── tomato-logo.png
└── src
    └── tomato_represenation
        ├── README.md
        ├── common
            ├── quaternion.py
            └── skeleton.py
        ├── dataset.py
        ├── motion_representation.py
        ├── paramUtil.py
        ├── plot_3d_global.py
        ├── plot_feature.py
        ├── raw_pose_processing.py
        └── smplx2joints.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | License for Non-commercial Scientific Research Purposes
2 | 
3 | IDEA grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty free and limited license under IDEA’s copyright interests to reproduce, distribute, and create derivative works of the text, videos, codes solely for your non-commercial research purposes.
4 | 
5 | Any other use, in particular any use for commercial, pornographic, military, or surveillance, purposes is prohibited.  
6 | 
7 | Text and visualization results are owned by International Digital Economy Academy (IDEA).
8 | 
9 | We have used the AIST Dance Video Database (https://aistdancedb.ongaaccel.jp) for demonstration.


--------------------------------------------------------------------------------
/OpenTMA/.gitignore:
--------------------------------------------------------------------------------
1 | experiments
2 | deps
3 | datasets
4 | .vector_cache
5 | *.pyc
6 | *.out


--------------------------------------------------------------------------------
/OpenTMA/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/assets/logo.png


--------------------------------------------------------------------------------
/OpenTMA/configs/assets.yaml:
--------------------------------------------------------------------------------
 1 | FOLDER: './experiments' # Experiment files saving path
 2 | 
 3 | TEST:
 4 |   FOLDER: './results' # Testing files saving path
 5 | 
 6 | DATASET:
 7 |   SMPL_PATH: './deps/smpl'
 8 |   TRANSFORM_PATH: './deps/transforms/'
 9 |   WORD_VERTILIZER_PATH: './deps/glove/'
10 |   UNIMOCAP:
11 |     ROOT: './datasets/UniMocap' # KIT directory
12 |     SPLIT_ROOT: './datasets/UniMocap' # KIT splits directory
13 |   KIT:
14 |     ROOT: './datasets/kit-ml' # KIT directory
15 |     SPLIT_ROOT: './datasets/kit-ml' # KIT splits directory
16 |   HUMANML3D:
17 |     ROOT: './datasets/humanml3d' # HumanML3D directory
18 |     SPLIT_ROOT: './datasets/humanml3d' # HumanML3D splits directory
19 |   HUMANACT12:
20 |     ROOT: ./datasets/HumanAct12Poses
21 |     SPLIT_ROOT: ./datasets/HumanAct12Poses
22 |   UESTC:
23 |     ROOT: ./datasets/uestc
24 |     SPLIT_ROOT: ./datasets/uestc
25 |   AMASS:
26 |     DB_ROOT: /apdcephfs/share_1227775/shingxchen/uicap/data/vibe_db
27 |   MOTIONX:
28 |     ROOT: ./datasets/Motion-X
29 |     SPLIT_ROOT: ./datasets/Motion-X/split
30 |     SEMANTIC_TEXT_ROOT: ./datasets/Motion-X/texts/semantic_texts
31 |     FACE_TEXT_ROOT: ./datasets/Motion-X/texts/face_texts
32 | 
33 | 
34 | model:
35 |   bert_path: './deps/distilbert-base-uncased' # bert model path for all text encoders
36 |   clip_path: './deps/clip-vit-large-patch14' # bert model path for all text encoders
37 |   t2m_path: './deps/t2m/'
38 | 
39 |   humanact12_rec_path: './deps/actionrecognition'
40 |   uestc_rec_path: './deps/actionrecognition'
41 | 
42 | RENDER:
43 |   BLENDER_PATH: '/apdcephfs/share_1227775/mingzhenzhu/jiangbiao/libs/blender-2.93.2-linux-x64/blender'
44 |   FACES_PATH: '/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smplh/smplh.faces'
45 |   FOLDER: ./animations
46 | 


--------------------------------------------------------------------------------
/OpenTMA/configs/base.yaml:
--------------------------------------------------------------------------------
  1 | # FOLDER: ./experiments
  2 | SEED_VALUE: 1234
  3 | DEBUG: True
  4 | TRAIN:
  5 |   SPLIT: 'train'
  6 |   NUM_WORKERS: 2 # Number of workers
  7 |   BATCH_SIZE: 4 # Size of batches
  8 |   START_EPOCH: 0 # Start epoch
  9 |   END_EPOCH: 2000 # End epoch
 10 |   RESUME: '' # Experiment path to be resumed training
 11 |   PRETRAINED_VAE: ''
 12 |   PRETRAINED: '' # Pretrained model path
 13 | 
 14 |   OPTIM:
 15 |     OPTIM.TYPE: 'AdamW' # Optimizer type
 16 |     OPTIM.LR: 1e-4 # Learning rate
 17 | 
 18 |   ABLATION:
 19 |     VAE_TYPE: 'actor' # vae ablation: actor or mcross
 20 |     VAE_ARCH: 'encoder_decoder' # mdiffusion vae architecture
 21 |     PE_TYPE: 'actor' # mdiffusion tma or actor
 22 |     DIFF_PE_TYPE: 'actor' # mdiffusion tma or actor
 23 |     SKIP_CONNECT: False # skip connection for denoiser va
 24 |     # use linear to expand mean and std rather expand token nums
 25 |     MLP_DIST: False
 26 |     IS_DIST: False # Mcross distribution kl
 27 |     PREDICT_EPSILON: True # noise or motion
 28 | 
 29 | EVAL:
 30 |   SPLIT: 'gtest'
 31 |   BATCH_SIZE: 1 # Evaluating Batch size
 32 |   NUM_WORKERS: 12 # Evaluating Batch size
 33 | 
 34 | TEST:
 35 |   TEST_DIR: ''
 36 |   CHECKPOINTS: '' # Pretrained model path
 37 |   SPLIT: 'gtest'
 38 |   BATCH_SIZE: 1 # Testing Batch size
 39 |   NUM_WORKERS: 12 # Evaluating Batch size
 40 |   SAVE_PREDICTIONS: False # Weather to save predictions
 41 |   COUNT_TIME: False # Weather to count time during test
 42 |   REPLICATION_TIMES: 20 # Number of times to replicate the test
 43 |   MM_NUM_SAMPLES: 100 # Number of samples for multimodal test
 44 |   MM_NUM_REPEATS: 30 # Number of repeats for multimodal test
 45 |   MM_NUM_TIMES: 10 # Number of times to repeat the multimodal test
 46 |   DIVERSITY_TIMES: 300 # Number of times to repeat the diversity test
 47 |   REP_I: 0
 48 | model:
 49 |   target: 'modules'
 50 |   t2m_textencoder:
 51 |     dim_word: 300
 52 |     dim_pos_ohot: 15
 53 |     dim_text_hidden: 512
 54 |     dim_coemb_hidden: 512
 55 | 
 56 |   t2m_motionencoder:
 57 |     dim_move_hidden: 512
 58 |     dim_move_latent: 512
 59 |     dim_motion_hidden: 1024
 60 |     dim_motion_latent: 512
 61 | LOSS:
 62 |   LAMBDA_LATENT: 1e-5 # Lambda for latent losses
 63 |   LAMBDA_KL: 1e-5 # Lambda for kl losses
 64 |   LAMBDA_REC: 1.0 # Lambda for reconstruction losses
 65 |   LAMBDA_JOINT: 1.0 # Lambda for joint losses
 66 |   LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
 67 |   LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses
 68 |   LAMBDA_CYCLE: 1.0 # Lambda for cycle losses
 69 |   LAMBDA_PRIOR: 0.0
 70 |   DIST_SYNC_ON_STEP: True
 71 | METRIC:
 72 |   FORCE_IN_METER: True
 73 |   DIST_SYNC_ON_STEP: True
 74 | DATASET:
 75 |   NCLASSES: 10
 76 |   SAMPLER:
 77 |     MAX_SQE: -1
 78 |     MAX_LEN: 196
 79 |     MIN_LEN: 40
 80 |     MAX_TEXT_LEN: 20
 81 |   KIT:
 82 |     PICK_ONE_TEXT: true
 83 |     FRAME_RATE: 12.5
 84 |     UNIT_LEN: 4
 85 |   HUMANML3D:
 86 |     PICK_ONE_TEXT: true
 87 |     FRAME_RATE: 20.0
 88 |     UNIT_LEN: 4
 89 |   UNIMOCAP:
 90 |     PICK_ONE_TEXT: true
 91 |     FRAME_RATE: 20.0
 92 |     UNIT_LEN: 4
 93 |   HUMANACT12:
 94 |     NUM_FRAMES: 60
 95 |     POSE_REP: rot6d
 96 |     GLOB: true
 97 |     TRANSLATION: true
 98 |   UESTC:
 99 |     NUM_FRAMES: 60
100 |     POSE_REP: rot6d
101 |     GLOB: true
102 |     TRANSLATION: true
103 |   MOTIONX:
104 |     UNIT_LEN: 4
105 | 
106 | LOGGER:
107 |   SACE_CHECKPOINT_EPOCH: 1
108 |   LOG_EVERY_STEPS: 1
109 |   VAL_EVERY_STEPS: 10
110 |   TENSORBOARD: true
111 |   WANDB:
112 |     OFFLINE: false
113 |     PROJECT: null
114 |     RESUME_ID: null
115 | RENDER:
116 |   JOINT_TYPE: mmm
117 |   INPUT_MODE: npy
118 |   DIR: ''
119 |   NPY: ''
120 |   DENOISING: true
121 |   OLDRENDER: true
122 |   RES: high
123 |   DOWNSAMPLE: true
124 |   FPS: 12.5
125 |   CANONICALIZE: true
126 |   EXACT_FRAME: 0.5
127 |   NUM: 7
128 |   MODE: sequence
129 |   VID_EXT: mp4
130 |   ALWAYS_ON_FLOOR: false
131 |   GT: false
132 | DEMO:
133 |   MOTION_TRANSFER: false
134 |   RENDER: false
135 |   FRAME_RATE: 12.5
136 |   EXAMPLE: null
137 | 


--------------------------------------------------------------------------------
/OpenTMA/configs/configs_temos/H3D-TMR.yaml:
--------------------------------------------------------------------------------
 1 | NAME: H3D-TMR-v1 # Experiment name
 2 | DEBUG: False # Debug mode
 3 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 4 | DEVICE: [0,1,2,3] # Index of gpus eg. [0] or [0,1,2,3]
 5 | # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 6 | 
 7 | # Training configuration
 8 | TRAIN:
 9 |   #---------------------------------
10 |   STAGE: temos # stage "vae" or "diffusion", "vae_diffusion"
11 |   #---------------------------------
12 |   DATASETS: ['humanml3d'] # Training datasets
13 |   NUM_WORKERS: 11 # Number of workers
14 |   BATCH_SIZE: 128 # Size of batches
15 |   START_EPOCH: 0 # Start epochMMOTIONENCODER
16 |   END_EPOCH: 1000 # End epoch
17 |   RESUME: '' # Resume training from this path
18 |   OPTIM:
19 |     TYPE: AdamW # Optimizer type
20 |     LR: 1e-4 # Learning rate
21 | 
22 | # Evaluating Configuration
23 | EVAL:
24 |   DATASETS: ['humanml3d'] # Evaluating datasets
25 |   BATCH_SIZE: 32 # Evaluating Batch size
26 |   SPLIT: test
27 | 
28 | # Test Configuration
29 | TEST:
30 |   PRETRAINED_CHECKPOINTS_VAE: ''
31 |   SAVE_PREDICTIONS: False
32 |   CHECKPOINTS: './experiments/temos/H3D-TMR-v1/checkpoints/epoch=299.ckpt' # Pretrained model path
33 |   DATASETS: ['humanml3d'] # training datasets
34 |   SPLIT: test
35 |   BATCH_SIZE: 32 # training Batch size
36 |   MEAN: False
37 |   NUM_SAMPLES: 1
38 |   FACT: 1
39 |   # REPLICATION_TIM
40 | 
41 | # Datasets Configuration
42 | DATASET:
43 |   JOINT_TYPE: 'humanml3d' # join type
44 |   VERSION: ''
45 |   MOTION_TYPE: 'vector_263'
46 | METRIC:
47 |   # TYPE: ['TemosMetric', 'TM2TMetrics']
48 |   TYPE: []
49 | # Losses Configuration
50 | LOSS:
51 |   TYPE: temos # Losses type
52 |   USE_INFONCE: True
53 |   USE_INFONCE_FILTER: True
54 |   LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses
55 |   LAMBDA_KL: 1.0e-5 # Lambda for kl Losses
56 |   LAMBDA_REC: 1.0 # Lambda for reconstruction Losses
57 |   LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
58 |   LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses
59 |   LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses
60 |   LAMBDA_PRIOR: 0.0
61 |   LAMBDA_INFONCE: 0.1 # Lambda for infonce
62 |   INFONCE_TEMP: 0.1
63 |   DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained
64 |   USE_RECLIPLOSS: False
65 |   SYNC: False
66 |   TRAIN_TMR: False
67 | 
68 | # Model Configuration
69 | model:
70 |   vae: true # whether vae model
71 |   model_type: temos # model type
72 |   condition: 'text'
73 |   target: modules_temos
74 |   #####
75 |   latent_dim: 256 # latent dimension
76 |   ff_size: 1024 #
77 |   num_layers: 4 # number of layers
78 |   num_head: 6 # number of head layers
79 |   dropout: 0.1 # dropout rate
80 |   activation: gelu # activation type
81 |   eval_text_encode_way: given_glove
82 |   eval_text_source: token
83 | 
84 | # Logger configuration
85 | LOGGER:
86 |   SAVE_CHECKPOINT_EPOCH: 100
87 |   LOG_EVERY_STEPS: 1
88 |   VAL_EVERY_STEPS: 100
89 |   TENSORBOARD: True
90 |   WANDB:
91 |     PROJECT: null
92 |     OFFLINE: False
93 |     RESUME_ID: null
94 | 


--------------------------------------------------------------------------------
/OpenTMA/configs/configs_temos/MotionX-TMR.yaml:
--------------------------------------------------------------------------------
 1 | NAME: MotionX-TMR-v1 # Experiment name
 2 | DEBUG: False # Debug mode
 3 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 4 | DEVICE: [0,1,2] # Index of gpus eg. [0] or [0,1,2,3]
 5 | # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 6 | 
 7 | # Training configuration
 8 | TRAIN:
 9 |   #---------------------------------
10 |   STAGE: temos # stage "vae" or "diffusion", "vae_diffusion"
11 |   #---------------------------------
12 |   DATASETS: ['motionx'] # Training datasets
13 |   NUM_WORKERS: 11 # Number of workers
14 |   BATCH_SIZE: 128 # Size of batches
15 |   START_EPOCH: 0 # Start epochMMOTIONENCODER
16 |   END_EPOCH: 4000 # End epoch
17 |   RESUME: '' # Resume training from this path
18 |   PRETRAINED_VAE: ''
19 |   PRETRAINED_MLD: ''
20 |   OPTIM:
21 |     TYPE: AdamW # Optimizer type
22 |     LR: 1e-4 # Learning rate
23 | 
24 | # Evaluating Configuration
25 | EVAL:
26 |   DATASETS: ['motionx'] # Evaluating datasets
27 |   BATCH_SIZE: 32 # Evaluating Batch size
28 |   SPLIT: test
29 | 
30 | # Test Configuration
31 | TEST:
32 |   PRETRAINED_CHECKPOINTS_VAE: ''
33 |   SAVE_PREDICTIONS: False
34 |   CHECKPOINTS: './experiments/temos/MotionX-TMR-v1/checkpoints/epoch=299.ckpt' # Pretrained model path
35 |   DATASETS: ['motionx'] # training datasets
36 |   SPLIT: test
37 |   BATCH_SIZE: 32 # training Batch size
38 |   MEAN: False
39 |   NUM_SAMPLES: 1
40 |   FACT: 1
41 |   # REPLICATION_TIM
42 | 
43 | # Datasets Configuration
44 | DATASET:
45 |   JOINT_TYPE: 'motionx' # join type
46 |   VERSION: ''
47 |   MOTION_TYPE: 'vector_623'
48 |   TEXT_SOURCE: 'only_text_token'
49 | METRIC:
50 |   TYPE: ['TemosMetric', 'TM2TMetrics']
51 | # Losses Configuration
52 | LOSS:
53 |   TYPE: temos # Losses type
54 |   USE_INFONCE: True
55 |   USE_INFONCE_FILTER: True
56 |   LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses
57 |   LAMBDA_KL: 1.0e-5 # Lambda for kl Losses
58 |   LAMBDA_REC: 1.0 # Lambda for reconstruction Losses
59 |   LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
60 |   LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses
61 |   LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses
62 |   LAMBDA_PRIOR: 0.0
63 |   LAMBDA_INFONCE: 0.1 # Lambda for infonce
64 |   INFONCE_TEMP: 0.1
65 |   DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained
66 |   USE_RECLIPLOSS: False
67 |   SYNC: False
68 |   TRAIN_TMR: False
69 | 
70 | # Model Configuration
71 | model:
72 |   vae: true # whether vae model
73 |   model_type: temos # model type
74 |   condition: 'text'
75 |   target: modules_temos
76 |   #####
77 |   latent_dim: 256 # latent dimension
78 |   ff_size: 1024 #
79 |   num_layers: 4 # number of layers
80 |   num_head: 6 # number of head layers
81 |   dropout: 0.1 # dropout rate
82 |   activation: gelu # activation type
83 |   eval_text_encode_way: glove_6B_GRU
84 |   eval_text_source: caption
85 | 
86 | # Logger configuration
87 | LOGGER:
88 |   SAVE_CHECKPOINT_EPOCH: 100
89 |   LOG_EVERY_STEPS: 1
90 |   VAL_EVERY_STEPS: 100
91 |   TENSORBOARD: True
92 |   WANDB:
93 |     PROJECT: null
94 |     OFFLINE: False
95 |     RESUME_ID: null
96 | 


--------------------------------------------------------------------------------
/OpenTMA/configs/configs_temos/UniMocap-TMR.yaml:
--------------------------------------------------------------------------------
 1 | NAME: UniMocap-TMR-v1 # Experiment name
 2 | DEBUG: False # Debug mode
 3 | ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
 4 | DEVICE: [0, 1, 2, 3] # Index of gpus eg. [0] or [0,1,2,3]
 5 | # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
 6 | 
 7 | # Training configuration
 8 | TRAIN:
 9 |   #---------------------------------
10 |   STAGE: temos # stage "vae" or "diffusion", "vae_diffusion"
11 |   #---------------------------------
12 |   DATASETS: ['unimocap'] # Training datasets
13 |   NUM_WORKERS: 11 # Number of workers
14 |   BATCH_SIZE: 128 # Size of batches
15 |   START_EPOCH: 0 # Start epochMMOTIONENCODER
16 |   END_EPOCH: 4000 # End epoch
17 |   RESUME: '' # Resume training from this path
18 |   PRETRAINED_VAE: ''
19 |   PRETRAINED_MLD: ''
20 |   OPTIM:
21 |     TYPE: AdamW # Optimizer type
22 |     LR: 1e-4 # Learning rate
23 | 
24 | # Evaluating Configuration
25 | EVAL:
26 |   DATASETS: ['unimocap'] # Evaluating datasets
27 |   BATCH_SIZE: 32 # Evaluating Batch size
28 |   SPLIT: test
29 | 
30 | # Test Configuration
31 | TEST:
32 |   PRETRAINED_CHECKPOINTS_VAE: ''
33 |   SAVE_PREDICTIONS: False
34 |   CHECKPOINTS: './experiments/temos/UniMocap-TMR/checkpoints/epoch=299.ckpt' # Pretrained model path
35 |   DATASETS: ['unimocap'] # training datasets
36 |   SPLIT: test
37 |   BATCH_SIZE: 32 # training Batch size
38 |   MEAN: False
39 |   NUM_SAMPLES: 1
40 |   FACT: 1
41 |   # REPLICATION_TIM
42 | 
43 | # Datasets Configuration
44 | DATASET:
45 |   JOINT_TYPE: 'humanml3d' # join type
46 |   VERSION: ''
47 |   MOTION_TYPE: 'vector_263'
48 |   TEXT_SOURCE: 'only_text_token'
49 | METRIC:
50 |   TYPE: ['TemosMetric', 'TM2TMetrics']
51 | # Losses Configuration
52 | LOSS:
53 |   TYPE: temos # Losses type
54 |   USE_INFONCE: True
55 |   USE_INFONCE_FILTER: True
56 |   LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses
57 |   LAMBDA_KL: 1.0e-5 # Lambda for kl Losses
58 |   LAMBDA_REC: 1.0 # Lambda for reconstruction Losses
59 |   LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
60 |   LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses
61 |   LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses
62 |   LAMBDA_PRIOR: 0.0
63 |   LAMBDA_INFONCE: 0.1 # Lambda for infonce
64 |   INFONCE_TEMP: 0.1
65 |   DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained
66 |   USE_RECLIPLOSS: False
67 |   SYNC: False
68 |   TRAIN_TMR: False
69 | 
70 | # Model Configuration
71 | model:
72 |   vae: true # whether vae model
73 |   model_type: temos # model type
74 |   condition: 'text'
75 |   target: modules_temos
76 |   #####
77 |   latent_dim: 256 # latent dimension
78 |   ff_size: 1024 #
79 |   num_layers: 6 # number of layers
80 |   num_head: 6 # number of head layers
81 |   dropout: 0.1 # dropout rate
82 |   activation: gelu # activation type
83 |   eval_text_encode_way: given_glove
84 |   eval_text_source: only_text_token
85 | 
86 | # Logger configuration
87 | LOGGER:
88 |   SAVE_CHECKPOINT_EPOCH: 100
89 |   LOG_EVERY_STEPS: 1
90 |   VAL_EVERY_STEPS: 100
91 |   TENSORBOARD: True
92 |   WANDB:
93 |     PROJECT: null
94 |     OFFLINE: False
95 |     RESUME_ID: null
96 | 


--------------------------------------------------------------------------------
/OpenTMA/configs/configs_temos/infonce.yaml:
--------------------------------------------------------------------------------
1 | _target_: temos.model.losses.InfoNCE
2 | 


--------------------------------------------------------------------------------
/OpenTMA/configs/modules_temos/motiondecoder.yaml:
--------------------------------------------------------------------------------
 1 | motiondecoder:
 2 |   name: actor_decoder
 3 |   target: tma.models.architectures.temos.motiondecoder.actor.ActorAgnosticDecoder
 4 |   params:
 5 |     latent_dim: ${model.latent_dim}
 6 |     ff_size: ${model.ff_size}
 7 |     num_layers: ${model.num_layers}
 8 |     num_head: ${model.num_head}
 9 |     droupout: ${model.dropout}
10 |     activation: ${model.activation}
11 |     nfeats: ${DATASET.NFEATS}


--------------------------------------------------------------------------------
/OpenTMA/configs/modules_temos/motionencoder.yaml:
--------------------------------------------------------------------------------
 1 | motionencoder:
 2 |   name: actor_encoder
 3 |   target: tma.models.architectures.temos.motionencoder.actor.ActorAgnosticEncoder
 4 |   params:
 5 |     latent_dim: ${model.latent_dim}
 6 |     vae: ${model.vae}
 7 |     ff_size: ${model.ff_size}
 8 |     num_layers: ${model.num_layers}
 9 |     num_head: ${model.num_head}
10 |     droupout: ${model.dropout}
11 |     activation: ${model.activation}
12 |     nfeats: ${DATASET.NFEATS}


--------------------------------------------------------------------------------
/OpenTMA/configs/modules_temos/text_encoder.yaml:
--------------------------------------------------------------------------------
 1 | textencoder:
 2 |   name: distilbert_actor
 3 |   target: tma.models.architectures.temos.textencoder.distillbert_actor.DistilbertActorAgnosticEncoder
 4 |   params:
 5 |     latent_dim: ${model.latent_dim}
 6 |     vae: ${model.vae}
 7 |     ff_size: ${model.ff_size}
 8 |     num_layers: ${model.num_layers}
 9 |     num_head: ${model.num_head}
10 |     droupout: ${model.dropout}
11 |     activation: ${model.activation}
12 |     finetune: false
13 |     modelpath: ${model.bert_path}


--------------------------------------------------------------------------------
/OpenTMA/requirements.txt:
--------------------------------------------------------------------------------
 1 | einops==0.8.0
 2 | human_body_prior==0.8.5.0
 3 | hydra-core==1.3.2
 4 | ipdb==0.13.13
 5 | joblib==1.2.0
 6 | matplotlib==3.7.1
 7 | numpy==1.23.0
 8 | omegaconf==2.3.0
 9 | opencv_python==4.8.0.76
10 | Pillow==10.3.0
11 | psutil==5.9.5
12 | pytorch_lightning==1.9.0
13 | rich==13.7.1
14 | scikit_learn==1.2.2
15 | scipy==1.13.0
16 | sentence_transformers==2.2.2
17 | shortuuid==1.0.13
18 | smplx==0.1.28
19 | spacy==3.6.0
20 | tensorboardX==2.6.2.2
21 | torch==2.1.2
22 | torchmetrics==0.7.0
23 | torchtext==0.16.2
24 | tqdm==4.65.0
25 | transformers==4.30.2
26 | 


--------------------------------------------------------------------------------
/OpenTMA/retrieval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | path1="./experiments/temos/H3D-TMR-v1/embeddings/val/epoch_99/"
 4 | path2="./experiments/temos/H3D-TMR-v1/embeddings/val/epoch_599/"
 5 | path3="./experiments/temos/H3D-TMR-v1/embeddings/val/epoch_999/"
 6 | 
 7 | 
 8 | for protocal in A B D
 9 | do
10 |     echo "**protocal" $protocal"**"
11 |     for retrieval_type in T2M M2T
12 |     do
13 |         echo $retrieval_type
14 |         python retrieval.py --retrieval_type $retrieval_type --protocal $protocal --expdirs $path1 $path2 $path3 
15 |     done
16 | done
17 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/callback/__init__.py:
--------------------------------------------------------------------------------
1 | from .progress import ProgressLogger
2 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/callback/progress.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pytorch_lightning import LightningModule, Trainer
 3 | from pytorch_lightning.callbacks import Callback
 4 | import psutil
 5 | 
 6 | logger = logging.getLogger()
 7 | 
 8 | 
 9 | class ProgressLogger(Callback):
10 |     """
11 |     A custom callback class for PyTorch Lightning that logs progress information during training.
12 |     """
13 | 
14 |     def __init__(self, metric_monitor: dict, precision: int = 3):
15 |         # Metric to monitor
16 |         self.metric_monitor = metric_monitor
17 |         self.precision = precision
18 | 
19 |     def on_train_start(
20 |         self, trainer: Trainer, pl_module: LightningModule, **kwargs
21 |     ) -> None:
22 |         # Log a message when training starts
23 |         logger.info("Training started")
24 | 
25 |     def on_train_end(
26 |         self, trainer: Trainer, pl_module: LightningModule, **kwargs
27 |     ) -> None:
28 |         # Log a message when training ends
29 |         logger.info("Training done")
30 | 
31 |     def on_validation_epoch_end(
32 |         self, trainer: Trainer, pl_module: LightningModule, **kwargs
33 |     ) -> None:
34 |         # Log a message when a validation epoch ends
35 |         if trainer.sanity_checking:
36 |             logger.info("Sanity checking ok.")
37 | 
38 |     def on_train_epoch_end(
39 |         self, trainer: Trainer, pl_module: LightningModule, padding=False, **kwargs
40 |     ) -> None:
41 |         # Log a message when a training epoch ends
42 |         # Format for logging metrics
43 |         metric_format = f"{{:.{self.precision}e}}"
44 |         # Start the log line with the epoch number
45 |         line = f"Epoch {trainer.current_epoch}"
46 |         if padding:
47 |             line = f"{line:>{len('Epoch xxxx')}}"  # Right padding
48 |         metrics_str = []
49 | 
50 |         losses_dict = trainer.callback_metrics
51 |         for metric_name, dico_name in self.metric_monitor.items():
52 |             # If the metric is in the dictionary, format it and add it to the log line
53 |             if dico_name in losses_dict:
54 |                 metric = losses_dict[dico_name].item()
55 |                 metric = metric_format.format(metric)
56 |                 metric = f"{metric_name} {metric}"
57 |                 metrics_str.append(metric)
58 | 
59 |         # If there are no metrics, return
60 |         if len(metrics_str) == 0:
61 |             return
62 | 
63 |         # Add the current memory usage to the log line
64 |         memory = f"Memory {psutil.virtual_memory().percent}%"
65 |         line = line + ": " + "   ".join(metrics_str) + "   " + memory
66 |         logger.info(line)
67 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/HumanML3D.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from tma.data.humanml.scripts.motion_process import (
  5 |     process_file,
  6 |     recover_from_ric,
  7 |     recover_from_root_rot6d,
  8 | )
  9 | 
 10 | from .base import BASEDataModule
 11 | from .humanml.data.dataset import Text2MotionDatasetV2, TextOnlyDataset
 12 | from .humanml.common.skeleton import Skeleton
 13 | 
 14 | 
 15 | class HumanML3DDataModule(BASEDataModule):
 16 | 
 17 |     def __init__(
 18 |         self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs
 19 |     ):
 20 |         super().__init__(
 21 |             batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn
 22 |         )
 23 |         self.save_hyperparameters(logger=False)
 24 |         self.name = "humanml3d"
 25 |         self.njoints = 22
 26 |         self.hparams["njoints"] = 22
 27 |         if phase == "text_only":
 28 |             self.Dataset = TextOnlyDataset
 29 |         else:
 30 |             self.Dataset = Text2MotionDatasetV2
 31 |         self.cfg = cfg
 32 |         sample_overrides = {"split": "val", "tiny": True, "progress_bar": False}
 33 | 
 34 |         self._sample_set = self.get_sample_set(overrides=sample_overrides)
 35 |         # Get additional info of the dataset
 36 |         # import pdb; pdb.set_trace()
 37 |         self.nfeats = self._sample_set.nfeats
 38 |         # self.transforms = self._sample_set.transforms
 39 | 
 40 |     def feats2joints(self, features, skel=None, motion_type="vector_263"):
 41 |         # mean = torch.tensor(self.hparams.mean).to(features)
 42 |         # std = torch.tensor(self.hparams.std).to(features)
 43 |         # features = features * std + mean
 44 |         # return recover_from_ric(features, self.njoints)
 45 |         if motion_type in [
 46 |             "vector_263",
 47 |             "root_position",
 48 |             "root_position_vel",
 49 |             "root_position_rot6d",
 50 |         ]:
 51 |             mean = torch.tensor(self.hparams.mean).to(features)
 52 |             std = torch.tensor(self.hparams.std).to(features)
 53 |             features = features * std + mean
 54 |             return recover_from_ric(
 55 |                 features, self.njoints
 56 |             )  # torch.Size([32, 92, 22, 3])
 57 |         elif motion_type in ["root_rot6d"]:
 58 |             mean = torch.tensor(self.hparams.mean).to(features)
 59 |             std = torch.tensor(self.hparams.std).to(features)
 60 |             features = features * std + mean
 61 |             
 62 |             # skeleton = Skeleton(n_raw_offsets, kinematic_chain, )
 63 |             return recover_from_root_rot6d(features, self.njoints, skel)
 64 |         elif motion_type == "smplx_212":
 65 |             assert smplx_model is not None
 66 |             mean = torch.tensor(self.hparams.mean).to(features)
 67 |             std = torch.tensor(self.hparams.std).to(features)
 68 |             features = features * (std + 1e-7) + mean
 69 |             bs = features.shape[0]
 70 |             features = features.reshape(-1, 212)
 71 |             output = smplx_model.smplx_model(
 72 |                 pose_body=features[:, 3:66],
 73 |                 pose_hand=features[:, 66:156],
 74 |                 root_orient=features[:, :3],
 75 |             ).Jtr
 76 |             return output.reshape(bs, -1, 55, 3)  # torch.Size([32, 96, 55, 3])
 77 |         else:
 78 |             raise NotImplementedError
 79 | 
 80 |     def joints2feats(self, features):
 81 |         features = process_file(features, self.njoints)[0]
 82 |         # mean = torch.tensor(self.hparams.mean).to(features)
 83 |         # std = torch.tensor(self.hparams.std).to(features)
 84 |         # features = (features - mean) / std
 85 |         return features
 86 | 
 87 |     def renorm4t2m(self, features):
 88 |         # renorm to t2m norms for using t2m evaluators
 89 |         ori_mean = torch.tensor(self.hparams.mean).to(features)
 90 |         ori_std = torch.tensor(self.hparams.std).to(features)
 91 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
 92 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
 93 |         features = features * ori_std + ori_mean
 94 |         features = (features - eval_mean) / eval_std
 95 |         return features
 96 | 
 97 |     def mm_mode(self, mm_on=True):
 98 |         # random select samples for mm
 99 |         if mm_on:
100 |             self.is_mm = True
101 |             self.name_list = self.test_dataset.name_list
102 |             self.mm_list = np.random.choice(
103 |                 self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False
104 |             )
105 |             self.test_dataset.name_list = self.mm_list
106 |         else:
107 |             self.is_mm = False
108 |             self.test_dataset.name_list = self.name_list
109 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/Humanact12.py:
--------------------------------------------------------------------------------
 1 | from .base import BASEDataModule
 2 | from .a2m import HumanAct12Poses
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Humanact12DataModule(BASEDataModule):
 7 | 
 8 |     def __init__(
 9 |         self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs
10 |     ):
11 |         super().__init__(
12 |             batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn
13 |         )
14 |         self.save_hyperparameters(logger=False)
15 |         self.name = "HumanAct12"
16 |         self.Dataset = HumanAct12Poses
17 |         self.cfg = cfg
18 |         sample_overrides = {
19 |             "num_seq_max": 2,
20 |             "split": "test",
21 |             "tiny": True,
22 |             "progress_bar": False,
23 |         }
24 |         # self._sample_set = self.get_sample_set(overrides=sample_overrides)
25 |         # Get additional info of the dataset
26 |         self.nfeats = 150
27 |         self.njoints = 25
28 |         self.nclasses = 12
29 |         # self.transforms = self._sample_set.transforms
30 | 
31 |     # def mm_mode(self, mm_on=True):
32 |     #     # random select samples for mm
33 |     #     if mm_on:
34 |     #         self.is_mm = True
35 |     #         if self.split == 'train':
36 |     #             self.name_list = self.test_dataset._train[index]
37 |     #         else:
38 |     #             self.name_list = self.test_dataset._test[index]
39 |     #         self.name_list = self.test_dataset.name_list
40 |     #         self.mm_list = np.random.choice(self.name_list,
41 |     #                                         self.cfg.TEST.MM_NUM_SAMPLES,
42 |     #                                         replace=False)
43 |     #         self.test_dataset.name_list = self.mm_list
44 |     #     else:
45 |     #         self.is_mm = False
46 |     #         self.test_dataset.name_list = self.name_list
47 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/Kit.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from tma.data.humanml.scripts.motion_process import recover_from_ric
 5 | 
 6 | from .base import BASEDataModule
 7 | from .humanml.data.dataset import Text2MotionDatasetV2, TextOnlyDataset
 8 | from .utils import all_collate
 9 | 
10 | 
11 | class KitDataModule(BASEDataModule):
12 | 
13 |     def __init__(
14 |         self,
15 |         cfg,
16 |         phase="train",
17 |         collate_fn=all_collate,
18 |         batch_size: int = 32,
19 |         num_workers: int = 16,
20 |         **kwargs
21 |     ):
22 |         super().__init__(
23 |             batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn
24 |         )
25 |         self.save_hyperparameters(logger=False)
26 |         self.name = "kit"
27 |         self.njoints = 21
28 |         if phase == "text_only":
29 |             self.Dataset = TextOnlyDataset
30 |         else:
31 |             self.Dataset = Text2MotionDatasetV2
32 |         self.cfg = cfg
33 | 
34 |         sample_overrides = {"split": "val", "tiny": True, "progress_bar": False}
35 |         self._sample_set = self.get_sample_set(overrides=sample_overrides)
36 |         
37 |         # Get additional info of the dataset
38 |         self.nfeats = self._sample_set.nfeats
39 |         # self.transforms = self._sample_set.transforms
40 | 
41 |     def feats2joints(self, features):
42 |         mean = torch.tensor(self.hparams.mean).to(features)
43 |         std = torch.tensor(self.hparams.std).to(features)
44 |         features = features * std + mean
45 |         return recover_from_ric(features, self.njoints)
46 | 
47 |     def renorm4t2m(self, features):
48 |         # renorm to t2m norms for using t2m evaluators
49 |         ori_mean = torch.tensor(self.hparams.mean).to(features)
50 |         ori_std = torch.tensor(self.hparams.std).to(features)
51 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
52 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
53 |         features = features * ori_std + ori_mean
54 |         features = (features - eval_mean) / eval_std
55 |         return features
56 | 
57 |     def mm_mode(self, mm_on=True):
58 |         # random select samples for mm
59 |         if mm_on:
60 |             self.is_mm = True
61 |             self.name_list = self.test_dataset.name_list
62 |             self.mm_list = np.random.choice(
63 |                 self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False
64 |             )
65 |             self.test_dataset.name_list = self.mm_list
66 |         else:
67 |             self.is_mm = False
68 |             self.test_dataset.name_list = self.name_list
69 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/MotionX.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from tma.data.humanml.scripts.motion_process import process_file, recover_from_ric
  5 | 
  6 | from .base import BASEDataModule
  7 | from .humanml.data.dataset import (
  8 |     Text2MotionDatasetMotionX,
  9 |     Text2MotionDatasetMotionX_text_all,
 10 | )
 11 | 
 12 | 
 13 | class Motion_XDataModule(BASEDataModule):
 14 | 
 15 |     def __init__(
 16 |         self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs
 17 |     ):
 18 |         super().__init__(
 19 |             batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn
 20 |         )
 21 |         self.save_hyperparameters(logger=False)
 22 |         self.name = "motionx"
 23 |         if cfg.DATASET.JOINT_TYPE == "humanml3d":
 24 |             self.njoints = 22
 25 |         elif cfg.DATASET.JOINT_TYPE == "motionx":
 26 |             self.njoints = 52
 27 |         else:
 28 |             raise NotImplemented
 29 | 
 30 |         if phase == "text_only":
 31 |             self.Dataset = TextOnlyDataset
 32 |         else:
 33 |             if cfg.model.condition in [
 34 |                 "text_all",
 35 |                 "text_face",
 36 |                 "text_body",
 37 |                 "text_hand",
 38 |                 "text_face_body",
 39 |                 "text_seperate",
 40 |                 "only_pose_concat",
 41 |                 "only_pose_fusion",
 42 |             ]:
 43 |                 self.Dataset = Text2MotionDatasetMotionX_text_all
 44 |             else:
 45 |                 self.Dataset = Text2MotionDatasetMotionX
 46 | 
 47 |         self.cfg = cfg
 48 |         sample_overrides = {"split": "val", "tiny": True, "progress_bar": False}
 49 |         self._sample_set = self.get_sample_set(overrides=sample_overrides)
 50 |         
 51 |         # Get additional info of the dataset
 52 |         self.nfeats = self._sample_set.nfeats
 53 |         # self.transforms = self._sample_set.transforms
 54 | 
 55 |     def feats2joints(self, features, motion_type, smplx_model=None):
 56 |         # import pdb; pdb.set_trace()
 57 |         if motion_type in ["vector_263", "vector_623"]:
 58 |             mean = torch.tensor(self.hparams.mean).to(features)
 59 |             std = torch.tensor(self.hparams.std).to(features)
 60 |             features = features * std + mean
 61 |             
 62 |             return recover_from_ric(
 63 |                 features, self.njoints
 64 |             )  # torch.Size([32, 92, 22, 3])
 65 |         elif motion_type == "smplx_212":
 66 |             assert smplx_model is not None
 67 |             mean = torch.tensor(self.hparams.mean).to(features)
 68 |             std = torch.tensor(self.hparams.std).to(features)
 69 |             features = features * (std + 1e-7) + mean
 70 |             bs = features.shape[0]
 71 |             features = features.reshape(-1, 212)
 72 |             output = smplx_model.smplx_model(
 73 |                 pose_body=features[:, 3:66],
 74 |                 pose_hand=features[:, 66:156],
 75 |                 root_orient=features[:, :3],
 76 |             ).Jtr
 77 |             return output.reshape(bs, -1, 55, 3)  # torch.Size([32, 96, 55, 3])
 78 |         else:
 79 |             raise NotImplementedError
 80 | 
 81 |     def joints2feats(self, features):
 82 |         features = process_file(features, self.njoints)[0]
 83 |         # mean = torch.tensor(self.hparams.mean).to(features)
 84 |         # std = torch.tensor(self.hparams.std).to(features)
 85 |         # features = (features - mean) / std
 86 |         return features
 87 | 
 88 |     def renorm4t2m(self, features):
 89 |         # renorm to t2m norms for using t2m evaluators
 90 |         ori_mean = torch.tensor(self.hparams.mean).to(features)
 91 |         ori_std = torch.tensor(self.hparams.std).to(features)
 92 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
 93 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
 94 |         features = features * (ori_std + 1e-7) + ori_mean
 95 |         features = (features - eval_mean) / (eval_std + 1e-7)
 96 |         return features
 97 | 
 98 |     def renormt2m_back(self, features):
 99 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
100 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
101 |         features = features * (eval_std + 1e-7) + eval_mean
102 |         return features
103 | 
104 |     def mm_mode(self, mm_on=True):
105 |         # random select samples for mm
106 |         if mm_on:
107 |             self.is_mm = True
108 |             self.name_list = self.test_dataset.name_list
109 |             self.mm_list = np.random.choice(
110 |                 self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False
111 |             )
112 |             self.test_dataset.name_list = self.mm_list
113 |         else:
114 |             self.is_mm = False
115 |             self.test_dataset.name_list = self.name_list
116 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/Uestc.py:
--------------------------------------------------------------------------------
 1 | from .base import BASEDataModule
 2 | from .a2m import UESTC
 3 | import os
 4 | import rich.progress
 5 | import pickle as pkl
 6 | 
 7 | 
 8 | class UestcDataModule(BASEDataModule):
 9 | 
10 |     def __init__(
11 |         self,
12 |         cfg,
13 |         batch_size,
14 |         num_workers,
15 |         collate_fn=None,
16 |         method_name="vibe",
17 |         phase="train",
18 |         **kwargs
19 |     ):
20 |         super().__init__(
21 |             batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn
22 |         )
23 |         self.save_hyperparameters(logger=False)
24 |         self.name = "Uestc"
25 | 
26 |         self.Dataset = UESTC
27 |         self.cfg = cfg
28 | 
29 |         # self._sample_set = self.get_sample_set(overrides=sample_overrides)
30 |         # Get additional info of the dataset
31 |         self.nfeats = 150
32 |         self.njoints = 25
33 |         self.nclasses = 40
34 |         # self.transforms = self._sample_set.transforms
35 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/UniMocap.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from tma.data.humanml.scripts.motion_process import (
  5 |     process_file,
  6 |     recover_from_ric,
  7 |     recover_from_root_rot6d,
  8 | )
  9 | 
 10 | from .base import BASEDataModule
 11 | from .humanml.data.dataset import UniMocapDataset
 12 | from .humanml.common.skeleton import Skeleton
 13 | 
 14 | 
 15 | class UniMocapDataModule(BASEDataModule):
 16 | 
 17 |     def __init__(
 18 |         self, cfg, batch_size, num_workers, collate_fn=None, phase="train", **kwargs
 19 |     ):
 20 |         super().__init__(
 21 |             batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn
 22 |         )
 23 |         
 24 |         self.save_hyperparameters(logger=False)
 25 |         self.name = "unimocap"
 26 |         self.njoints = 22
 27 |         self.hparams["njoints"] = 22
 28 |         if phase == "text_only":
 29 |             self.Dataset = None
 30 |         else:
 31 |             self.Dataset = UniMocapDataset
 32 |         self.cfg = cfg
 33 |         sample_overrides = {"split": "val", "tiny": True, "progress_bar": False}
 34 | 
 35 |         self._sample_set = self.get_sample_set(overrides=sample_overrides)
 36 |         
 37 |         # Get additional info of the dataset
 38 |         self.nfeats = self._sample_set.nfeats
 39 |         # self.transforms = self._sample_set.transforms
 40 | 
 41 |     def feats2joints(self, features, skel=None, motion_type="vector_263"):
 42 |         if motion_type in [
 43 |             "vector_263",
 44 |             "root_position",
 45 |             "root_position_vel",
 46 |             "root_position_rot6d",
 47 |         ]:
 48 |             mean = torch.tensor(self.hparams.mean).to(features)
 49 |             std = torch.tensor(self.hparams.std).to(features)
 50 |             features = features * std + mean
 51 |             return recover_from_ric(
 52 |                 features, self.njoints
 53 |             )  # torch.Size([32, 92, 22, 3])
 54 |         elif motion_type in ["root_rot6d"]:
 55 |             mean = torch.tensor(self.hparams.mean).to(features)
 56 |             std = torch.tensor(self.hparams.std).to(features)
 57 |             features = features * std + mean
 58 |             
 59 |             # skeleton = Skeleton(n_raw_offsets, kinematic_chain, )
 60 |             return recover_from_root_rot6d(features, self.njoints, skel)
 61 |         elif motion_type == "smplx_212":
 62 |             assert smplx_model is not None
 63 |             mean = torch.tensor(self.hparams.mean).to(features)
 64 |             std = torch.tensor(self.hparams.std).to(features)
 65 |             features = features * (std + 1e-7) + mean
 66 |             bs = features.shape[0]
 67 |             features = features.reshape(-1, 212)
 68 |             output = smplx_model.smplx_model(
 69 |                 pose_body=features[:, 3:66],
 70 |                 pose_hand=features[:, 66:156],
 71 |                 root_orient=features[:, :3],
 72 |             ).Jtr
 73 |             return output.reshape(bs, -1, 55, 3)  # torch.Size([32, 96, 55, 3])
 74 |         else:
 75 |             raise NotImplementedError
 76 | 
 77 |     def joints2feats(self, features):
 78 |         features = process_file(features, self.njoints)[0]
 79 |         # mean = torch.tensor(self.hparams.mean).to(features)
 80 |         # std = torch.tensor(self.hparams.std).to(features)
 81 |         # features = (features - mean) / std
 82 |         return features
 83 | 
 84 |     def renorm4t2m(self, features):
 85 |         # renorm to t2m norms for using t2m evaluators
 86 |         ori_mean = torch.tensor(self.hparams.mean).to(features)
 87 |         ori_std = torch.tensor(self.hparams.std).to(features)
 88 |         eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
 89 |         eval_std = torch.tensor(self.hparams.std_eval).to(features)
 90 |         features = features * ori_std + ori_mean
 91 |         features = (features - eval_mean) / eval_std
 92 |         return features
 93 | 
 94 |     def mm_mode(self, mm_on=True):
 95 |         # random select samples for mm
 96 |         if mm_on:
 97 |             self.is_mm = True
 98 |             self.name_list = self.test_dataset.name_list
 99 |             self.mm_list = np.random.choice(
100 |                 self.name_list, self.cfg.TEST.MM_NUM_SAMPLES, replace=False
101 |             )
102 |             self.test_dataset.name_list = self.mm_list
103 |         else:
104 |             self.is_mm = False
105 |             self.test_dataset.name_list = self.name_list
106 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/data/a2m/__init__.py:
--------------------------------------------------------------------------------
1 | from .humanact12poses import HumanAct12Poses
2 | from .uestc import UESTC
3 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/a2m/humanact12poses.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle as pkl
 3 | 
 4 | import numpy as np
 5 | 
 6 | from .dataset import Dataset
 7 | from .utils import rotation_conversions as geometry
 8 | import rich.progress
 9 | 
10 | 
11 | class HumanAct12Poses(Dataset):
12 |     dataname = "humanact12"
13 | 
14 |     def __init__(self, datapath="data/HumanAct12Poses", **kargs):
15 |         self.datapath = datapath
16 | 
17 |         super().__init__(**kargs)
18 | 
19 |         pkldatafilepath = os.path.join(datapath, "humanact12poses.pkl")
20 |         with rich.progress.open(pkldatafilepath, "rb", description="loading humanact12 pkl") as f:
21 |             data = pkl.load(f)
22 | 
23 |         self._pose = [x for x in data["poses"]]
24 |         self._num_frames_in_video = [p.shape[0] for p in self._pose]
25 |         self._joints = [x for x in data["joints3D"]]
26 | 
27 |         self._actions = [x for x in data["y"]]
28 | 
29 |         total_num_actions = 12
30 |         self.num_classes = total_num_actions
31 | 
32 |         self._train = list(range(len(self._pose)))
33 | 
34 |         keep_actions = np.arange(0, total_num_actions)
35 | 
36 |         self._action_to_label = {x: i for i, x in enumerate(keep_actions)}
37 |         self._label_to_action = {i: x for i, x in enumerate(keep_actions)}
38 | 
39 |         self._action_classes = humanact12_coarse_action_enumerator
40 | 
41 |     def _load_joints3D(self, ind, frame_ix):
42 |         return self._joints[ind][frame_ix]
43 | 
44 |     def _load_rotvec(self, ind, frame_ix):
45 |         pose = self._pose[ind][frame_ix].reshape(-1, 24, 3)
46 |         return pose
47 | 
48 | 
49 | humanact12_coarse_action_enumerator = {
50 |     0: "warm_up",
51 |     1: "walk",
52 |     2: "run",
53 |     3: "jump",
54 |     4: "drink",
55 |     5: "lift_dumbbell",
56 |     6: "sit",
57 |     7: "eat",
58 |     8: "turn steering wheel",
59 |     9: "phone",
60 |     10: "boxing",
61 |     11: "throw",
62 | }
63 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/a2m/tools.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import string
 3 | 
 4 | 
 5 | def parse_info_name(path):
 6 |     name = os.path.splitext(os.path.split(path)[-1])[0]
 7 |     info = {}
 8 |     current_letter = None
 9 |     for letter in name:
10 |         if letter in string.ascii_letters:
11 |             info[letter] = []
12 |             current_letter = letter
13 |         else:
14 |             info[current_letter].append(letter)
15 |     for key in info.keys():
16 |         info[key] = "".join(info[key])
17 |     return info
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/a2m/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/a2m/utils/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/data/a2m/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def to_numpy(tensor):
 5 |     if torch.is_tensor(tensor):
 6 |         return tensor.cpu().numpy()
 7 |     elif type(tensor).__module__ != 'numpy':
 8 |         raise ValueError("Cannot convert {} to numpy array".format(
 9 |             type(tensor)))
10 |     return tensor
11 | 
12 | 
13 | def to_torch(ndarray):
14 |     if type(ndarray).__module__ == 'numpy':
15 |         return torch.from_numpy(ndarray)
16 |     elif not torch.is_tensor(ndarray):
17 |         raise ValueError("Cannot convert {} to torch tensor".format(
18 |             type(ndarray)))
19 |     return ndarray
20 | 
21 | 
22 | def cleanexit():
23 |     import sys
24 |     import os
25 |     try:
26 |         sys.exit(0)
27 |     except SystemExit:
28 |         os._exit(0)
29 | 
30 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/a2m/utils/tensors.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def lengths_to_mask(lengths):
 5 |     max_len = max(lengths)
 6 |     mask = torch.arange(max_len, device=lengths.device).expand(len(lengths), max_len) < lengths.unsqueeze(1)
 7 |     return mask
 8 |     
 9 | 
10 | def collate_tensors(batch):
11 |     dims = batch[0].dim()
12 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
13 |     size = (len(batch),) + tuple(max_size)
14 |     canvas = batch[0].new_zeros(size=size)
15 |     for i, b in enumerate(batch):
16 |         sub_tensor = canvas[i]
17 |         for d in range(dims):
18 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
19 |         sub_tensor.add_(b)
20 |     return canvas
21 | 
22 | 
23 | def collate(batch):
24 |     databatch = [b[0] for b in batch]
25 |     labelbatch = [b[1] for b in batch]
26 |     lenbatch = [len(b[0][0][0]) for b in batch]
27 | 
28 |     databatchTensor = collate_tensors(databatch)
29 |     labelbatchTensor = torch.as_tensor(labelbatch)
30 |     lenbatchTensor = torch.as_tensor(lenbatch)
31 | 
32 |     maskbatchTensor = lengths_to_mask(lenbatchTensor)
33 |     batch = {"x": databatchTensor, "y": labelbatchTensor,
34 |              "mask": maskbatchTensor, "lengths": lenbatchTensor}
35 |     return batch
36 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/base.py:
--------------------------------------------------------------------------------
  1 | from os.path import join as pjoin
  2 | import numpy as np
  3 | import pytorch_lightning as pl
  4 | from torch.utils.data import DataLoader
  5 | 
  6 | 
  7 | class BASEDataModule(pl.LightningDataModule):
  8 | 
  9 |     def __init__(self, collate_fn, batch_size: int, num_workers: int):
 10 |         super().__init__()
 11 | 
 12 |         # self.dataloader_options = {
 13 |         #     "batch_size": batch_size, "num_workers": num_workers,"collate_fn": collate_datastruct_and_text}
 14 |         self.dataloader_options = {
 15 |             "batch_size": batch_size,
 16 |             "num_workers": num_workers,
 17 |             "collate_fn": collate_fn,
 18 |         }
 19 | 
 20 |         # self.collate_fn = collate_fn
 21 |         self.persistent_workers = True
 22 |         self.is_mm = False
 23 |         # need to be overloaded:
 24 |         # - self.Dataset
 25 |         # - self._sample_set => load only a small subset
 26 |         #   There is an helper bellow (get_sample_set)
 27 |         # - self.nfeats
 28 |         # - self.transforms
 29 | 
 30 |     def get_sample_set(self, overrides={}):
 31 |         sample_params = self.hparams.copy()
 32 |         sample_params.update(overrides)
 33 |         # import pdb; pdb.set_trace()
 34 | 
 35 |         split_file = pjoin(
 36 |             eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"),
 37 |             self.cfg.DATASET.VERSION,
 38 |             self.cfg.EVAL.SPLIT + ".txt",
 39 |         )
 40 |         # import pdb; pdb.set_trace()
 41 |         return self.Dataset(split_file=split_file, **sample_params)
 42 | 
 43 |     def __getattr__(self, item):
 44 |         # train_dataset/val_dataset etc cached like properties
 45 |         if item.endswith("_dataset") and not item.startswith("_"):
 46 |             subset = item[: -len("_dataset")]
 47 |             item_c = "_" + item
 48 |             if item_c not in self.__dict__:
 49 |                 # todo: config name not consistent
 50 |                 subset = subset.upper() if subset != "val" else "EVAL"
 51 |                 split = eval(f"self.cfg.{subset}.SPLIT")
 52 |                 split_file = pjoin(
 53 |                     eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"),
 54 |                     self.cfg.DATASET.VERSION,
 55 |                     eval(f"self.cfg.{subset}.SPLIT") + ".txt",
 56 |                 )
 57 |                 self.__dict__[item_c] = self.Dataset(
 58 |                     split_file=split_file, split=split, **self.hparams
 59 |                 )
 60 |             return getattr(self, item_c)
 61 |         classname = self.__class__.__name__
 62 |         raise AttributeError(f"'{classname}' object has no attribute '{item}'")
 63 | 
 64 |     def setup(self, stage=None):
 65 |         self.stage = stage
 66 |         # Use the getter the first time to load the data
 67 |         if stage in (None, "fit"):
 68 |             _ = self.train_dataset
 69 |             _ = self.val_dataset
 70 |         if stage in (None, "test"):
 71 |             _ = self.test_dataset
 72 | 
 73 |     def train_dataloader(self):
 74 |         return DataLoader(
 75 |             self.train_dataset,
 76 |             shuffle=True,
 77 |             persistent_workers=True,
 78 |             **self.dataloader_options,
 79 |         )
 80 | 
 81 |     def predict_dataloader(self):
 82 |         dataloader_options = self.dataloader_options.copy()
 83 |         dataloader_options["batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
 84 |         dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
 85 |         dataloader_options["shuffle"] = False
 86 |         return DataLoader(
 87 |             self.test_dataset,
 88 |             persistent_workers=True,
 89 |             **dataloader_options,
 90 |         )
 91 | 
 92 |     def val_dataloader(self):
 93 |         # overrides batch_size and num_workers
 94 |         dataloader_options = self.dataloader_options.copy()
 95 |         dataloader_options["batch_size"] = self.cfg.EVAL.BATCH_SIZE
 96 |         dataloader_options["num_workers"] = self.cfg.EVAL.NUM_WORKERS
 97 |         dataloader_options["shuffle"] = False
 98 | 
 99 |         return DataLoader(
100 |             self.val_dataset,
101 |             persistent_workers=True,
102 |             **dataloader_options,
103 |         )
104 | 
105 |     def test_dataloader(self):
106 |         # overrides batch_size and num_workers
107 |         dataloader_options = self.dataloader_options.copy()
108 |         dataloader_options["batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
109 |         dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
110 |         # dataloader_options["drop_last"] = True
111 |         dataloader_options["shuffle"] = False
112 |         return DataLoader(
113 |             self.test_dataset,
114 |             persistent_workers=True,
115 |             **dataloader_options,
116 |         )
117 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/README.md:
--------------------------------------------------------------------------------
1 | This code is based on https://github.com/EricGuo5513/text-to-motion.git


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/humanml/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/humanml/data/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/data/humanml/utils/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/utils/get_opt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import Namespace
 3 | import re
 4 | from os.path import join as pjoin
 5 | from .word_vectorizer import POS_enumerator
 6 | 
 7 | 
 8 | def is_float(numStr):
 9 |     flag = False
10 |     numStr = str(numStr).strip().lstrip("-").lstrip("+")
11 |     try:
12 |         reg = re.compile(r"^[-+]?[0-9]+\.[0-9]+$")
13 |         res = reg.match(str(numStr))
14 |         if res:
15 |             flag = True
16 |     except Exception as ex:
17 |         print("is_float() - error: " + str(ex))
18 |     return flag
19 | 
20 | 
21 | def is_number(numStr):
22 |     flag = False
23 |     numStr = str(numStr).strip().lstrip("-").lstrip("+")
24 |     if str(numStr).isdigit():
25 |         flag = True
26 |     return flag
27 | 
28 | 
29 | def get_opt(opt_path, device):
30 |     opt = Namespace()
31 |     opt_dict = vars(opt)
32 | 
33 |     skip = (
34 |         "-------------- End ----------------",
35 |         "------------ Options -------------",
36 |         "\n",
37 |     )
38 |     print("Reading", opt_path)
39 |     with open(opt_path) as f:
40 |         for line in f:
41 |             if line.strip() not in skip:
42 |                 # print(line.strip())
43 |                 key, value = line.strip().split(": ")
44 |                 if value in ("True", "False"):
45 |                     opt_dict[key] = bool(value)
46 |                 elif is_float(value):
47 |                     opt_dict[key] = float(value)
48 |                 elif is_number(value):
49 |                     opt_dict[key] = int(value)
50 |                 else:
51 |                     opt_dict[key] = str(value)
52 | 
53 |     # print(opt)
54 |     opt_dict["which_epoch"] = "latest"
55 |     opt.save_root = pjoin(opt.checkpoints_dir, opt.dataset_name, opt.name)
56 |     opt.model_dir = pjoin(opt.save_root, "model")
57 |     opt.meta_dir = pjoin(opt.save_root, "meta")
58 | 
59 |     if opt.dataset_name == "t2m":
60 |         opt.data_root = "./dataset/HumanML3D"
61 |         opt.motion_dir = pjoin(opt.data_root, "new_joint_vecs")
62 |         opt.text_dir = pjoin(opt.data_root, "texts")
63 |         opt.joints_num = 22
64 |         opt.dim_pose = 263
65 |         opt.max_motion_length = 196
66 |     elif opt.dataset_name == "kit":
67 |         opt.data_root = "./dataset/KIT-ML"
68 |         opt.motion_dir = pjoin(opt.data_root, "new_joint_vecs")
69 |         opt.text_dir = pjoin(opt.data_root, "texts")
70 |         opt.joints_num = 21
71 |         opt.dim_pose = 251
72 |         opt.max_motion_length = 196
73 |     else:
74 |         raise KeyError("Dataset not recognized")
75 | 
76 |     opt.dim_word = 300
77 |     opt.num_classes = 200 // opt.unit_length
78 |     opt.dim_pos_ohot = len(POS_enumerator)
79 |     opt.is_train = False
80 |     opt.is_continue = False
81 |     opt.device = device
82 | 
83 |     return opt
84 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy import linalg
  3 | 
  4 | 
  5 | # (X - X_train)*(X - X_train) = -2X*X_train + X*X + X_train*X_train
  6 | def euclidean_distance_matrix(matrix1, matrix2):
  7 |     """
  8 |         Params:
  9 |         -- matrix1: N1 x D
 10 |         -- matrix2: N2 x D
 11 |         Returns:
 12 |         -- dist: N1 x N2
 13 |         dist[i, j] == distance(matrix1[i], matrix2[j])
 14 |     """
 15 |     assert matrix1.shape[1] == matrix2.shape[1]
 16 |     d1 = -2 * np.dot(matrix1, matrix2.T)    # shape (num_test, num_train)
 17 |     d2 = np.sum(np.square(matrix1), axis=1, keepdims=True)    # shape (num_test, 1)
 18 |     d3 = np.sum(np.square(matrix2), axis=1)     # shape (num_train, )
 19 |     dists = np.sqrt(d1 + d2 + d3)  # broadcasting
 20 |     return dists
 21 | 
 22 | def calculate_top_k(mat, top_k):
 23 |     size = mat.shape[0]
 24 |     gt_mat = np.expand_dims(np.arange(size), 1).repeat(size, 1)
 25 |     bool_mat = (mat == gt_mat)
 26 |     correct_vec = False
 27 |     top_k_list = []
 28 |     for i in range(top_k):
 29 | #         print(correct_vec, bool_mat[:, i])
 30 |         correct_vec = (correct_vec | bool_mat[:, i])
 31 |         # print(correct_vec)
 32 |         top_k_list.append(correct_vec[:, None])
 33 |     top_k_mat = np.concatenate(top_k_list, axis=1)
 34 |     return top_k_mat
 35 | 
 36 | 
 37 | def calculate_R_precision(embedding1, embedding2, top_k, sum_all=False):
 38 |     dist_mat = euclidean_distance_matrix(embedding1, embedding2)
 39 |     argmax = np.argsort(dist_mat, axis=1)
 40 |     top_k_mat = calculate_top_k(argmax, top_k)
 41 |     if sum_all:
 42 |         return top_k_mat.sum(axis=0)
 43 |     else:
 44 |         return top_k_mat
 45 | 
 46 | 
 47 | def calculate_matching_score(embedding1, embedding2, sum_all=False):
 48 |     assert len(embedding1.shape) == 2
 49 |     assert embedding1.shape[0] == embedding2.shape[0]
 50 |     assert embedding1.shape[1] == embedding2.shape[1]
 51 | 
 52 |     dist = linalg.norm(embedding1 - embedding2, axis=1)
 53 |     if sum_all:
 54 |         return dist.sum(axis=0)
 55 |     else:
 56 |         return dist
 57 | 
 58 | 
 59 | 
 60 | def calculate_activation_statistics(activations):
 61 |     """
 62 |     Params:
 63 |     -- activation: num_samples x dim_feat
 64 |     Returns:
 65 |     -- mu: dim_feat
 66 |     -- sigma: dim_feat x dim_feat
 67 |     """
 68 |     mu = np.mean(activations, axis=0)
 69 |     cov = np.cov(activations, rowvar=False)
 70 |     return mu, cov
 71 | 
 72 | 
 73 | def calculate_diversity(activation, diversity_times):
 74 |     assert len(activation.shape) == 2
 75 |     assert activation.shape[0] > diversity_times
 76 |     num_samples = activation.shape[0]
 77 | 
 78 |     first_indices = np.random.choice(num_samples, diversity_times, replace=False)
 79 |     second_indices = np.random.choice(num_samples, diversity_times, replace=False)
 80 |     dist = linalg.norm(activation[first_indices] - activation[second_indices], axis=1)
 81 |     return dist.mean()
 82 | 
 83 | 
 84 | def calculate_multimodality(activation, multimodality_times):
 85 |     assert len(activation.shape) == 3
 86 |     assert activation.shape[1] > multimodality_times
 87 |     num_per_sent = activation.shape[1]
 88 | 
 89 |     first_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
 90 |     second_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
 91 |     dist = linalg.norm(activation[:, first_dices] - activation[:, second_dices], axis=2)
 92 |     return dist.mean()
 93 | 
 94 | 
 95 | def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
 96 |     """Numpy implementation of the Frechet Distance.
 97 |     The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
 98 |     and X_2 ~ N(mu_2, C_2) is
 99 |             d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
100 |     Stable version by Dougal J. Sutherland.
101 |     Params:
102 |     -- mu1   : Numpy array containing the activations of a layer of the
103 |                inception net (like returned by the function 'get_predictions')
104 |                for generated samples.
105 |     -- mu2   : The sample mean over activations, precalculated on an
106 |                representative dataset set.
107 |     -- sigma1: The covariance matrix over activations for generated samples.
108 |     -- sigma2: The covariance matrix over activations, precalculated on an
109 |                representative dataset set.
110 |     Returns:
111 |     --   : The Frechet Distance.
112 |     """
113 | 
114 |     mu1 = np.atleast_1d(mu1)
115 |     mu2 = np.atleast_1d(mu2)
116 | 
117 |     sigma1 = np.atleast_2d(sigma1)
118 |     sigma2 = np.atleast_2d(sigma2)
119 | 
120 |     assert mu1.shape == mu2.shape, \
121 |         'Training and test mean vectors have different lengths'
122 |     assert sigma1.shape == sigma2.shape, \
123 |         'Training and test covariances have different dimensions'
124 | 
125 |     diff = mu1 - mu2
126 | 
127 |     # Product might be almost singular
128 |     covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
129 |     if not np.isfinite(covmean).all():
130 |         msg = ('fid calculation produces singular product; '
131 |                'adding %s to diagonal of cov estimates') % eps
132 |         print(msg)
133 |         offset = np.eye(sigma1.shape[0]) * eps
134 |         covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
135 | 
136 |     # Numerical error might give slight imaginary component
137 |     if np.iscomplexobj(covmean):
138 |         if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
139 |             m = np.max(np.abs(covmean.imag))
140 |             raise ValueError('Imaginary component {}'.format(m))
141 |         covmean = covmean.real
142 | 
143 |     tr_covmean = np.trace(covmean)
144 | 
145 |     return (diff.dot(diff) + np.trace(sigma1) +
146 |             np.trace(sigma2) - 2 * tr_covmean)


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/utils/paramUtil.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # Define a kinematic tree for the skeletal struture
 4 | kit_kinematic_chain = [
 5 |     [0, 11, 12, 13, 14, 15],
 6 |     [0, 16, 17, 18, 19, 20],
 7 |     [0, 1, 2, 3, 4],
 8 |     [3, 5, 6, 7],
 9 |     [3, 8, 9, 10],
10 | ]
11 | 
12 | kit_raw_offsets = np.array(
13 |     [
14 |         [0, 0, 0],
15 |         [0, 1, 0],
16 |         [0, 1, 0],
17 |         [0, 1, 0],
18 |         [0, 1, 0],
19 |         [1, 0, 0],
20 |         [0, -1, 0],
21 |         [0, -1, 0],
22 |         [-1, 0, 0],
23 |         [0, -1, 0],
24 |         [0, -1, 0],
25 |         [1, 0, 0],
26 |         [0, -1, 0],
27 |         [0, -1, 0],
28 |         [0, 0, 1],
29 |         [0, 0, 1],
30 |         [-1, 0, 0],
31 |         [0, -1, 0],
32 |         [0, -1, 0],
33 |         [0, 0, 1],
34 |         [0, 0, 1],
35 |     ]
36 | )
37 | 
38 | t2m_raw_offsets = np.array(
39 |     [
40 |         [0, 0, 0],
41 |         [1, 0, 0],
42 |         [-1, 0, 0],
43 |         [0, 1, 0],
44 |         [0, -1, 0],
45 |         [0, -1, 0],
46 |         [0, 1, 0],
47 |         [0, -1, 0],
48 |         [0, -1, 0],
49 |         [0, 1, 0],
50 |         [0, 0, 1],
51 |         [0, 0, 1],
52 |         [0, 1, 0],
53 |         [1, 0, 0],
54 |         [-1, 0, 0],
55 |         [0, 0, 1],
56 |         [0, -1, 0],
57 |         [0, -1, 0],
58 |         [0, -1, 0],
59 |         [0, -1, 0],
60 |         [0, -1, 0],
61 |         [0, -1, 0],
62 |     ]
63 | )
64 | 
65 | t2m_kinematic_chain = [
66 |     [0, 2, 5, 8, 11],
67 |     [0, 1, 4, 7, 10],
68 |     [0, 3, 6, 9, 12, 15],
69 |     [9, 14, 17, 19, 21],
70 |     [9, 13, 16, 18, 20],
71 | ]
72 | t2m_left_hand_chain = [
73 |     [20, 22, 23, 24],
74 |     [20, 34, 35, 36],
75 |     [20, 25, 26, 27],
76 |     [20, 31, 32, 33],
77 |     [20, 28, 29, 30],
78 | ]
79 | t2m_right_hand_chain = [
80 |     [21, 43, 44, 45],
81 |     [21, 46, 47, 48],
82 |     [21, 40, 41, 42],
83 |     [21, 37, 38, 39],
84 |     [21, 49, 50, 51],
85 | ]
86 | 
87 | 
88 | kit_tgt_skel_id = "03950"
89 | 
90 | t2m_tgt_skel_id = "000021"
91 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/humanml/utils/plot_script.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | # import cv2
  4 | from textwrap import wrap
  5 | 
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | import mpl_toolkits.mplot3d.axes3d as p3
  9 | import numpy as np
 10 | from matplotlib.animation import FFMpegFileWriter, FuncAnimation
 11 | from mpl_toolkits.mplot3d import Axes3D
 12 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection
 13 | 
 14 | import tma.data.humanml.utils.paramUtil as paramUtil
 15 | 
 16 | skeleton = paramUtil.t2m_kinematic_chain
 17 | 
 18 | 
 19 | def list_cut_average(ll, intervals):
 20 |     if intervals == 1:
 21 |         return ll
 22 | 
 23 |     bins = math.ceil(len(ll) * 1.0 / intervals)
 24 |     ll_new = []
 25 |     for i in range(bins):
 26 |         l_low = intervals * i
 27 |         l_high = l_low + intervals
 28 |         l_high = l_high if l_high < len(ll) else len(ll)
 29 |         ll_new.append(np.mean(ll[l_low:l_high]))
 30 |     return ll_new
 31 | 
 32 | 
 33 | def plot_3d_motion(
 34 |     save_path, joints, title, figsize=(3, 3), fps=120, radius=3, kinematic_tree=skeleton
 35 | ):
 36 |     matplotlib.use("Agg")
 37 | 
 38 |     # title_sp = title.split(' ')
 39 |     # if len(title_sp) > 20:
 40 |     #     title = '\n'.join([' '.join(title_sp[:10]), ' '.join(title_sp[10:20]), ' '.join(title_sp[20:])])
 41 |     # elif len(title_sp) > 10:
 42 |     #     title = '\n'.join([' '.join(title_sp[:10]), ' '.join(title_sp[10:])])
 43 |     title = "\n".join(wrap(title, 20))
 44 | 
 45 |     def init():
 46 |         ax.set_xlim3d([-radius / 2, radius / 2])
 47 |         ax.set_ylim3d([0, radius])
 48 |         ax.set_zlim3d([-radius / 3.0, radius * 2 / 3.0])
 49 |         # print(title)
 50 |         fig.suptitle(title, fontsize=10)
 51 |         ax.grid(b=False)
 52 | 
 53 |     def plot_xzPlane(minx, maxx, miny, minz, maxz):
 54 |         # Plot a plane XZ
 55 |         verts = [
 56 |             [minx, miny, minz],
 57 |             [minx, miny, maxz],
 58 |             [maxx, miny, maxz],
 59 |             [maxx, miny, minz],
 60 |         ]
 61 |         xz_plane = Poly3DCollection([verts])
 62 |         xz_plane.set_facecolor((0.5, 0.5, 0.5, 0.5))
 63 |         ax.add_collection3d(xz_plane)
 64 | 
 65 |     #         return ax
 66 | 
 67 |     # (seq_len, joints_num, 3)
 68 |     data = joints.copy().reshape(len(joints), -1, 3)
 69 |     fig = plt.figure(figsize=figsize)
 70 |     plt.tight_layout()
 71 |     ax = p3.Axes3D(fig)
 72 |     init()
 73 |     MINS = data.min(axis=0).min(axis=0)
 74 |     MAXS = data.max(axis=0).max(axis=0)
 75 |     # colors = ['red', 'blue', 'black', 'red', 'blue',
 76 |     #           'darkblue', 'darkblue', 'darkblue', 'darkblue', 'darkblue',
 77 |     #           'darkred', 'darkred', 'darkred', 'darkred', 'darkred']
 78 |     colors = [
 79 |         "#DD5A37",
 80 |         "#D69E00",
 81 |         "#B75A39",
 82 |         "#DD5A37",
 83 |         "#D69E00",
 84 |         "#FF6D00",
 85 |         "#FF6D00",
 86 |         "#FF6D00",
 87 |         "#FF6D00",
 88 |         "#FF6D00",
 89 |         "#DDB50E",
 90 |         "#DDB50E",
 91 |         "#DDB50E",
 92 |         "#DDB50E",
 93 |         "#DDB50E",
 94 |     ]
 95 | 
 96 |     frame_number = data.shape[0]
 97 |     #     print(dataset.shape)
 98 | 
 99 |     height_offset = MINS[1]
100 |     data[:, :, 1] -= height_offset
101 |     trajec = data[:, 0, [0, 2]]
102 | 
103 |     data[..., 0] -= data[:, 0:1, 0]
104 |     data[..., 2] -= data[:, 0:1, 2]
105 | 
106 |     def update(index):
107 |         # ax.lines = []
108 |         # ax.collections = []
109 |         ax.view_init(elev=120, azim=-90)
110 |         ax.dist = 7.5
111 |         #         ax =
112 |         plot_xzPlane(
113 |             MINS[0] - trajec[index, 0],
114 |             MAXS[0] - trajec[index, 0],
115 |             0,
116 |             MINS[2] - trajec[index, 1],
117 |             MAXS[2] - trajec[index, 1],
118 |         )
119 | 
120 |         for i, (chain, color) in enumerate(zip(kinematic_tree, colors)):
121 |             if i < 5:
122 |                 linewidth = 4.0
123 |             else:
124 |                 linewidth = 2.0
125 |             ax.plot3D(
126 |                 data[index, chain, 0],
127 |                 data[index, chain, 1],
128 |                 data[index, chain, 2],
129 |                 linewidth=linewidth,
130 |                 color=color,
131 |             )
132 | 
133 |         plt.axis("off")
134 |         ax.set_xticklabels([])
135 |         ax.set_yticklabels([])
136 |         ax.set_zticklabels([])
137 | 
138 |     ani = FuncAnimation(
139 |         fig, update, frames=frame_number, interval=1000 / fps, repeat=False
140 |     )
141 | 
142 |     ani.save(save_path, fps=fps)
143 |     plt.close()
144 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/sampling/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import FrameSampler
2 | from .framerate import subsample, upsample
3 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/sampling/base.py:
--------------------------------------------------------------------------------
 1 | from .frames import get_frameix_from_data_index
 2 | 
 3 | 
 4 | class FrameSampler:
 5 |     def __init__(
 6 |         self,
 7 |         sampling="conseq",
 8 |         sampling_step=1,
 9 |         request_frames=None,
10 |         threshold_reject=0.75,
11 |         max_len=1000,
12 |         min_len=10,
13 |     ):
14 |         self.sampling = sampling
15 | 
16 |         self.sampling_step = sampling_step
17 |         self.request_frames = request_frames
18 |         self.threshold_reject = threshold_reject
19 |         self.max_len = max_len
20 |         self.min_len = min_len
21 | 
22 |     def __call__(self, num_frames):
23 | 
24 |         return get_frameix_from_data_index(
25 |             num_frames, self.request_frames, self.sampling, self.sampling_step
26 |         )
27 | 
28 |     def accept(self, duration):
29 |         # Outputs have original lengths
30 |         # Check if it is too long
31 |         if self.request_frames is None:
32 |             if duration > self.max_len:
33 |                 return False
34 |             elif duration < self.min_len:
35 |                 return False
36 |         else:
37 |             # Reject sample if the length is
38 |             # too little relative to
39 |             # the request frames
40 |             min_number = self.threshold_reject * self.request_frames
41 |             if duration < min_number:
42 |                 return False
43 |         return True
44 | 
45 |     def get(self, key, default=None):
46 |         return getattr(self, key, default)
47 | 
48 |     def __getitem__(self, key):
49 |         return getattr(self, key)
50 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/sampling/framerate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | # TODO: use a real subsampler..
 5 | def subsample(num_frames, last_framerate, new_framerate):
 6 |     step = int(last_framerate / new_framerate)
 7 |     assert step >= 1
 8 |     frames = np.arange(0, num_frames, step)
 9 |     return frames
10 | 
11 | 
12 | # TODO: use a real upsampler..
13 | def upsample(motion, last_framerate, new_framerate):
14 |     step = int(new_framerate / last_framerate)
15 |     assert step >= 1
16 | 
17 |     # Alpha blending => interpolation
18 |     alpha = np.linspace(0, 1, step + 1)
19 |     last = np.einsum("l,...->l...", 1 - alpha, motion[:-1])
20 |     new = np.einsum("l,...->l...", alpha, motion[1:])
21 | 
22 |     chuncks = (last + new)[:-1]
23 |     output = np.concatenate(chuncks.swapaxes(1, 0))
24 |     # Don't forget the last one
25 |     output = np.concatenate((output, motion[[-1]]))
26 |     return output
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     motion = np.arange(105)
31 |     submotion = motion[subsample(len(motion), 100.0, 12.5)]
32 |     newmotion = upsample(submotion, 12.5, 100)
33 | 
34 |     print(newmotion)
35 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/sampling/frames.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import numpy as np
 4 | from numpy import ndarray as Array
 5 | import random
 6 | 
 7 | 
 8 | def get_frameix_from_data_index(
 9 |     num_frames: int,
10 |     request_frames: Optional[int],
11 |     sampling: str = "conseq",
12 |     sampling_step: int = 1,
13 | ) -> Array:
14 |     nframes = num_frames
15 | 
16 |     if request_frames is None:
17 |         frame_ix = np.arange(nframes)
18 |     else:
19 |         # sampling goal: input: ----------- 11 nframes
20 |         #                       o--o--o--o- 4  ninputs
21 |         #
22 |         # step number is computed like that: [(11-1)/(4-1)] = 3
23 |         #                   [---][---][---][-
24 |         # So step = 3, and we take 0 to step*ninputs+1 with steps
25 |         #                   [o--][o--][o--][o-]
26 |         # then we can randomly shift the vector
27 |         #                   -[o--][o--][o--]o
28 |         # If there are too much frames required
29 |         if request_frames > nframes:
30 |             fair = False  # True
31 |             if fair:
32 |                 # distills redundancy everywhere
33 |                 choices = np.random.choice(range(nframes), request_frames, replace=True)
34 |                 frame_ix = sorted(choices)
35 |             else:
36 |                 # adding the last frame until done
37 |                 ntoadd = max(0, request_frames - nframes)
38 |                 lastframe = nframes - 1
39 |                 padding = lastframe * np.ones(ntoadd, dtype=int)
40 |                 frame_ix = np.concatenate((np.arange(0, nframes), padding))
41 | 
42 |         elif sampling in ["conseq", "random_conseq"]:
43 |             step_max = (nframes - 1) // (request_frames - 1)
44 |             if sampling == "conseq":
45 |                 if (
46 |                     sampling_step == -1
47 |                     or sampling_step * (request_frames - 1) >= nframes
48 |                 ):
49 |                     step = step_max
50 |                 else:
51 |                     step = sampling_step
52 |             elif sampling == "random_conseq":
53 |                 step = random.randint(1, step_max)
54 | 
55 |             lastone = step * (request_frames - 1)
56 |             shift_max = nframes - lastone - 1
57 |             shift = random.randint(0, max(0, shift_max - 1))
58 |             frame_ix = shift + np.arange(0, lastone + 1, step)
59 | 
60 |         elif sampling == "random":
61 |             choices = np.random.choice(range(nframes), request_frames, replace=False)
62 |             frame_ix = sorted(choices)
63 | 
64 |         else:
65 |             raise ValueError("Sampling not recognized.")
66 | 
67 |     return frame_ix
68 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/data/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | def lengths_to_mask(lengths):
  5 |     max_len = max(lengths)
  6 |     mask = torch.arange(max_len, device=lengths.device).expand(
  7 |         len(lengths), max_len
  8 |     ) < lengths.unsqueeze(1)
  9 |     return mask
 10 | 
 11 | 
 12 | # padding to max length in one batch
 13 | def collate_tensors(batch):
 14 |     dims = batch[0].dim()
 15 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
 16 |     size = (len(batch),) + tuple(max_size)
 17 |     canvas = batch[0].new_zeros(size=size)
 18 |     for i, b in enumerate(batch):
 19 |         sub_tensor = canvas[i]
 20 |         for d in range(dims):
 21 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
 22 |         sub_tensor.add_(b)
 23 |     return canvas
 24 | 
 25 | 
 26 | def all_collate(batch):
 27 |     notnone_batches = [b for b in batch if b is not None]
 28 |     databatch = [b["motion"] for b in notnone_batches]
 29 |     # labelbatch = [b['target'] for b in notnone_batches]
 30 |     if "lengths" in notnone_batches[0]:
 31 |         lenbatch = [b["lengths"] for b in notnone_batches]
 32 |     else:
 33 |         lenbatch = [len(b["inp"][0][0]) for b in notnone_batches]
 34 | 
 35 |     databatchTensor = collate_tensors(databatch)
 36 |     # labelbatchTensor = torch.as_tensor(labelbatch)
 37 |     lenbatchTensor = torch.as_tensor(lenbatch)
 38 |     maskbatchTensor = (
 39 |         lengths_to_mask(lenbatchTensor, databatchTensor.shape[-1])
 40 |         .unsqueeze(1)
 41 |         .unsqueeze(1)
 42 |     )  # unqueeze for broadcasting
 43 | 
 44 |     motion = databatchTensor
 45 |     cond = {"y": {"mask": maskbatchTensor, "lengths": lenbatchTensor}}
 46 | 
 47 |     if "text" in notnone_batches[0]:
 48 |         textbatch = [b["text"] for b in notnone_batches]
 49 |         cond["y"].update({"text": textbatch})
 50 | 
 51 |     # collate action textual names
 52 |     if "action_text" in notnone_batches[0]:
 53 |         action_text = [b["action_text"] for b in notnone_batches]
 54 |         cond["y"].update({"action_text": action_text})
 55 | 
 56 |     return motion, cond
 57 | 
 58 | 
 59 | # an adapter to our collate func
 60 | def tma_collate(batch):
 61 |     notnone_batches = [b for b in batch if b is not None]
 62 |     notnone_batches.sort(key=lambda x: x[3], reverse=True)
 63 |     # batch.sort(key=lambda x: x[3], reverse=True)
 64 |     adapted_batch = {
 65 |         "motion": collate_tensors(
 66 |             [torch.tensor(b[4]).float() for b in notnone_batches]
 67 |         ),
 68 |         "text": [b[2] for b in notnone_batches],
 69 |         "length": [b[5] for b in notnone_batches],
 70 |         "word_embs": collate_tensors(
 71 |             [torch.tensor(b[0]).float() for b in notnone_batches]
 72 |         ),
 73 |         "pos_ohot": collate_tensors(
 74 |             [torch.tensor(b[1]).float() for b in notnone_batches]
 75 |         ),
 76 |         "text_len": collate_tensors([torch.tensor(b[3]) for b in notnone_batches]),
 77 |         "tokens": [b[6] for b in notnone_batches],
 78 |         "retrieval_name": [b[7] for b in notnone_batches],
 79 |     }
 80 |     return adapted_batch
 81 | 
 82 | 
 83 | def tma_collate_text_all(batch):
 84 |     # import pdb; pdb.set_trace()
 85 |     notnone_batches = [b for b in batch if b is not None]
 86 |     notnone_batches.sort(key=lambda x: x[3], reverse=True)
 87 |     # batch.sort(key=lambda x: x[3], reverse=True)
 88 |     adapted_batch = {
 89 |         "motion": collate_tensors(
 90 |             [torch.tensor(b[4]).float() for b in notnone_batches]
 91 |         ),
 92 |         "text": [b[2] for b in notnone_batches],
 93 |         "length": [b[5] for b in notnone_batches],
 94 |         "word_embs": collate_tensors(
 95 |             [torch.tensor(b[0]).float() for b in notnone_batches]
 96 |         ),
 97 |         "pos_ohot": collate_tensors(
 98 |             [torch.tensor(b[1]).float() for b in notnone_batches]
 99 |         ),
100 |         "text_len": collate_tensors(
101 |             [torch.tensor(b[3]).float() for b in notnone_batches]
102 |         ),
103 |         "tokens": [b[6] for b in notnone_batches],
104 |         "body_text": [b[7] for b in notnone_batches],
105 |         "hand_text": [b[8] for b in notnone_batches],
106 |         "face_text": [b[9] for b in notnone_batches],
107 |     }
108 |     return adapted_batch
109 | 
110 | 
111 | def a2m_collate(batch):
112 | 
113 |     databatch = [b[0] for b in batch]
114 |     labelbatch = [b[1] for b in batch]
115 |     lenbatch = [len(b[0][0][0]) for b in batch]
116 |     labeltextbatch = [b[3] for b in batch]
117 | 
118 |     databatchTensor = collate_tensors(databatch)
119 |     labelbatchTensor = torch.as_tensor(labelbatch).unsqueeze(1)
120 |     lenbatchTensor = torch.as_tensor(lenbatch)
121 | 
122 |     maskbatchTensor = lengths_to_mask(lenbatchTensor)
123 |     adapted_batch = {
124 |         "motion": databatchTensor.permute(0, 3, 2, 1).flatten(start_dim=2),
125 |         "action": labelbatchTensor,
126 |         "action_text": labeltextbatch,
127 |         "mask": maskbatchTensor,
128 |         "length": lenbatchTensor,
129 |     }
130 |     return adapted_batch
131 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/launch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/launch/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/launch/prepare.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import warnings
 3 | from pathlib import Path
 4 | 
 5 | import hydra
 6 | from mld.tools.runid import generate_id
 7 | from omegaconf import OmegaConf
 8 | 
 9 | 
10 | # Local paths
11 | def code_path(path=""):
12 |     code_dir = hydra.utils.get_original_cwd()
13 |     code_dir = Path(code_dir)
14 |     return str(code_dir / path)
15 | 
16 | 
17 | def working_path(path):
18 |     return str(Path(os.getcwd()) / path)
19 | 
20 | 
21 | # fix the id for this run
22 | ID = generate_id()
23 | 
24 | 
25 | def generate_id():
26 |     return ID
27 | 
28 | 
29 | def get_last_checkpoint(path, ckpt_name="last.ckpt"):
30 |     output_dir = Path(hydra.utils.to_absolute_path(path))
31 |     last_ckpt_path = output_dir / "checkpoints" / ckpt_name
32 |     return str(last_ckpt_path)
33 | 
34 | 
35 | def get_kitname(load_amass_data: bool, load_with_rot: bool):
36 |     if not load_amass_data:
37 |         return "kit-mmm-xyz"
38 |     if load_amass_data and not load_with_rot:
39 |         return "kit-amass-xyz"
40 |     if load_amass_data and load_with_rot:
41 |         return "kit-amass-rot"
42 | 
43 | 
44 | OmegaConf.register_new_resolver("code_path", code_path)
45 | OmegaConf.register_new_resolver("working_path", working_path)
46 | OmegaConf.register_new_resolver("generate_id", generate_id)
47 | OmegaConf.register_new_resolver("absolute_path", hydra.utils.to_absolute_path)
48 | OmegaConf.register_new_resolver("get_last_checkpoint", get_last_checkpoint)
49 | OmegaConf.register_new_resolver("get_kitname", get_kitname)
50 | 
51 | 
52 | # Remove warnings
53 | warnings.filterwarnings(
54 |     "ignore", ".*Trying to infer the `batch_size` from an ambiguous collection.*"
55 | )
56 | 
57 | warnings.filterwarnings(
58 |     "ignore", ".*does not have many workers which may be a bottleneck*"
59 | )
60 | 
61 | warnings.filterwarnings(
62 |     "ignore", ".*Our suggested max number of worker in current system is*"
63 | )
64 | 
65 | 
66 | # os.environ["HYDRA_FULL_ERROR"] = "1"
67 | os.environ["NUMEXPR_MAX_THREADS"] = "24"
68 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/launch/tools.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from omegaconf import DictConfig, OmegaConf
 3 | import hydra
 4 | import os
 5 | 
 6 | 
 7 | def resolve_cfg_path(cfg: DictConfig):
 8 |     working_dir = os.getcwd()
 9 |     cfg.working_dir = working_dir
10 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/fc.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class Encoder_FC(nn.Module):
  7 |     def __init__(
  8 |         self,
  9 |         modeltype,
 10 |         njoints,
 11 |         nfeats,
 12 |         num_frames,
 13 |         num_classes,
 14 |         translation,
 15 |         pose_rep,
 16 |         glob,
 17 |         glob_rot,
 18 |         latent_dim=256,
 19 |         **kargs
 20 |     ):
 21 |         super().__init__()
 22 | 
 23 |         self.modeltype = modeltype
 24 |         self.njoints = njoints
 25 |         self.nfeats = nfeats
 26 |         self.num_frames = num_frames
 27 |         self.num_classes = num_classes
 28 |         self.translation = translation
 29 |         self.pose_rep = pose_rep
 30 |         self.glob = glob
 31 |         self.glob_rot = glob_rot
 32 | 
 33 |         self.latent_dim = latent_dim
 34 | 
 35 |         self.activation = nn.GELU()
 36 | 
 37 |         self.input_dim = self.njoints * self.nfeats * self.num_frames + self.num_classes
 38 | 
 39 |         self.fully_connected = nn.Sequential(
 40 |             nn.Linear(self.input_dim, 512), nn.GELU(), nn.Linear(512, 256), nn.GELU()
 41 |         )
 42 |         if self.modeltype == "cvae":
 43 |             self.mu = nn.Linear(256, self.latent_dim)
 44 |             self.var = nn.Linear(256, self.latent_dim)
 45 |         else:
 46 |             self.final = nn.Linear(256, self.latent_dim)
 47 | 
 48 |     def forward(self, batch):
 49 |         x, y = batch["x"], batch["y"]
 50 |         bs, njoints, feats, nframes = x.size()
 51 |         if (njoints * feats * nframes) != self.njoints * self.nfeats * self.num_frames:
 52 |             raise ValueError("This model is not adapted with this input")
 53 | 
 54 |         if len(y.shape) == 1:  # can give on hot encoded as input
 55 |             y = F.one_hot(y, self.num_classes)
 56 |         y = y.to(dtype=x.dtype)
 57 |         x = x.reshape(bs, njoints * feats * nframes)
 58 |         x = torch.cat((x, y), 1)
 59 | 
 60 |         x = self.fully_connected(x)
 61 | 
 62 |         if self.modeltype == "cvae":
 63 |             return {"mu": self.mu(x), "logvar": self.var(x)}
 64 |         else:
 65 |             return {"z": self.final(x)}
 66 | 
 67 | 
 68 | class Decoder_FC(nn.Module):
 69 |     def __init__(
 70 |         self,
 71 |         modeltype,
 72 |         njoints,
 73 |         nfeats,
 74 |         num_frames,
 75 |         num_classes,
 76 |         translation,
 77 |         pose_rep,
 78 |         glob,
 79 |         glob_rot,
 80 |         latent_dim=256,
 81 |         **kargs
 82 |     ):
 83 |         super().__init__()
 84 | 
 85 |         self.modeltype = modeltype
 86 |         self.njoints = njoints
 87 |         self.nfeats = nfeats
 88 |         self.num_frames = num_frames
 89 |         self.num_classes = num_classes
 90 |         self.translation = translation
 91 |         self.pose_rep = pose_rep
 92 |         self.glob = glob
 93 |         self.glob_rot = glob_rot
 94 | 
 95 |         self.latent_dim = latent_dim
 96 | 
 97 |         self.input_dim = self.latent_dim + self.num_classes
 98 |         self.output_dim = self.njoints * self.nfeats * self.num_frames
 99 | 
100 |         self.fully_connected = nn.Sequential(
101 |             nn.Linear(self.input_dim, 256),
102 |             nn.GELU(),
103 |             nn.Linear(256, 512),
104 |             nn.GELU(),
105 |             nn.Linear(512, self.output_dim),
106 |             nn.GELU(),
107 |         )
108 | 
109 |     def forward(self, batch):
110 |         z, y = batch["z"], batch["y"]
111 |         # z: [batch_size, latent_dim]
112 |         # y: [batch_size]
113 |         if len(y.shape) == 1:  # can give on hot encoded as input
114 |             y = F.one_hot(y, self.num_classes)
115 |         y = y.to(dtype=z.dtype)  # y: [batch_size, num_classes]
116 |         # z: [batch_size, latent_dim+num_classes]
117 |         z = torch.cat((z, y), dim=1)
118 | 
119 |         z = self.fully_connected(z)
120 | 
121 |         bs, _ = z.size()
122 | 
123 |         z = z.reshape(bs, self.njoints, self.nfeats, self.num_frames)
124 |         batch["output"] = z
125 |         return batch
126 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/humanact12_gru.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | # adapted from action2motion to take inputs of different lengths
 6 | class MotionDiscriminator(nn.Module):
 7 | 
 8 |     def __init__(
 9 |         self, input_size, hidden_size, hidden_layer, output_size=12, use_noise=None
10 |     ):
11 |         super().__init__()
12 | 
13 |         self.input_size = input_size
14 |         self.hidden_size = hidden_size
15 |         self.hidden_layer = hidden_layer
16 |         self.use_noise = use_noise
17 | 
18 |         self.recurrent = nn.GRU(input_size, hidden_size, hidden_layer)
19 |         self.linear1 = nn.Linear(hidden_size, 30)
20 |         self.linear2 = nn.Linear(30, output_size)
21 | 
22 |     def forward(self, motion_sequence, lengths=None, hidden_unit=None):
23 |         # dim (motion_length, num_samples, hidden_size)
24 |         bs, njoints, nfeats, num_frames = motion_sequence.shape
25 |         motion_sequence = motion_sequence.reshape(bs, njoints * nfeats, num_frames)
26 |         motion_sequence = motion_sequence.permute(2, 0, 1)
27 |         if hidden_unit is None:
28 |             hidden_unit = self.initHidden(
29 |                 motion_sequence.size(1), self.hidden_layer
30 |             ).to(motion_sequence.device)
31 |         gru_o, _ = self.recurrent(motion_sequence.float(), hidden_unit)
32 | 
33 |         # select the last valid, instead of: gru_o[-1, :, :]
34 |         out = gru_o[
35 |             tuple(
36 |                 torch.stack(
37 |                     (lengths - 1, torch.arange(bs, device=motion_sequence.device))
38 |                 )
39 |             )
40 |         ]
41 | 
42 |         # dim (num_samples, 30)
43 |         lin1 = self.linear1(out)
44 |         lin1 = torch.tanh(lin1)
45 |         # dim (num_samples, output_size)
46 |         lin2 = self.linear2(lin1)
47 |         return lin2
48 | 
49 |     def initHidden(self, num_samples, layer):
50 |         return torch.randn(layer, num_samples, self.hidden_size, requires_grad=False)
51 | 
52 | 
53 | class MotionDiscriminatorForFID(MotionDiscriminator):
54 | 
55 |     def forward(self, motion_sequence, lengths=None, hidden_unit=None):
56 |         # dim (motion_length, num_samples, hidden_size)
57 |         bs, njoints, nfeats, num_frames = motion_sequence.shape
58 |         motion_sequence = motion_sequence.reshape(bs, njoints * nfeats, num_frames)
59 |         motion_sequence = motion_sequence.permute(2, 0, 1)
60 |         if hidden_unit is None:
61 |             # motion_sequence = motion_sequence.permute(1, 0, 2)
62 |             hidden_unit = self.initHidden(
63 |                 motion_sequence.size(1), self.hidden_layer
64 |             ).to(motion_sequence.device)
65 |         gru_o, _ = self.recurrent(motion_sequence.float(), hidden_unit)
66 | 
67 |         # select the last valid, instead of: gru_o[-1, :, :]
68 |         out = gru_o[
69 |             tuple(
70 |                 torch.stack(
71 |                     (lengths - 1, torch.arange(bs, device=motion_sequence.device))
72 |                 )
73 |             )
74 |         ]
75 | 
76 |         # dim (num_samples, 30)
77 |         lin1 = self.linear1(out)
78 |         lin1 = torch.tanh(lin1)
79 |         return lin1
80 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/t2m_motionenc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn.utils.rnn import pack_padded_sequence
 4 | 
 5 | 
 6 | class MovementConvEncoder(nn.Module):
 7 |     def __init__(self, input_size, hidden_size, output_size):
 8 |         super(MovementConvEncoder, self).__init__()
 9 |         self.main = nn.Sequential(
10 |             nn.Conv1d(input_size, hidden_size, 4, 2, 1),
11 |             nn.Dropout(0.2, inplace=True),
12 |             nn.LeakyReLU(0.2, inplace=True),
13 |             nn.Conv1d(hidden_size, output_size, 4, 2, 1),
14 |             nn.Dropout(0.2, inplace=True),
15 |             nn.LeakyReLU(0.2, inplace=True),
16 |         )
17 |         self.out_net = nn.Linear(output_size, output_size)
18 | 
19 |     def forward(self, inputs):
20 |         inputs = inputs.permute(0, 2, 1)
21 |         outputs = self.main(inputs).permute(0, 2, 1)
22 |         return self.out_net(outputs)
23 | 
24 | 
25 | class MotionEncoderBiGRUCo(nn.Module):
26 |     def __init__(self, input_size, hidden_size, output_size):
27 |         super(MotionEncoderBiGRUCo, self).__init__()
28 | 
29 |         self.input_emb = nn.Linear(input_size, hidden_size)
30 |         self.gru = nn.GRU(
31 |             hidden_size, hidden_size, batch_first=True, bidirectional=True
32 |         )
33 |         self.output_net = nn.Sequential(
34 |             nn.Linear(hidden_size * 2, hidden_size),
35 |             nn.LayerNorm(hidden_size),
36 |             nn.LeakyReLU(0.2, inplace=True),
37 |             nn.Linear(hidden_size, output_size),
38 |         )
39 |         self.hidden_size = hidden_size
40 |         self.hidden = nn.Parameter(
41 |             torch.randn((2, 1, self.hidden_size), requires_grad=True)
42 |         )
43 | 
44 |     # input(batch_size, seq_len, dim)
45 |     def forward(self, inputs, m_lens):
46 |         num_samples = inputs.shape[0]
47 | 
48 |         input_embs = self.input_emb(inputs)
49 |         hidden = self.hidden.repeat(1, num_samples, 1)
50 | 
51 |         cap_lens = m_lens.data.tolist()
52 |         emb = pack_padded_sequence(input_embs, cap_lens, batch_first=True)
53 | 
54 |         gru_seq, gru_last = self.gru(emb, hidden)
55 | 
56 |         gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1)
57 | 
58 |         return self.output_net(gru_last)
59 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/t2m_textenc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn.utils.rnn import pack_padded_sequence
 4 | 
 5 | 
 6 | class TextEncoderBiGRUCo(nn.Module):
 7 |     def __init__(self, word_size, pos_size, hidden_size, output_size):
 8 |         super(TextEncoderBiGRUCo, self).__init__()
 9 | 
10 |         self.pos_emb = nn.Linear(pos_size, word_size)
11 |         self.input_emb = nn.Linear(word_size, hidden_size)
12 |         self.gru = nn.GRU(
13 |             hidden_size, hidden_size, batch_first=True, bidirectional=True
14 |         )
15 |         self.output_net = nn.Sequential(
16 |             nn.Linear(hidden_size * 2, hidden_size),
17 |             nn.LayerNorm(hidden_size),
18 |             nn.LeakyReLU(0.2, inplace=True),
19 |             nn.Linear(hidden_size, output_size),
20 |         )
21 | 
22 |         self.hidden_size = hidden_size
23 |         self.hidden = nn.Parameter(
24 |             torch.randn((2, 1, self.hidden_size), requires_grad=True)
25 |         )
26 | 
27 |     def forward(self, word_embs, pos_onehot, cap_lens):
28 |         num_samples = word_embs.shape[0]
29 | 
30 |         pos_embs = self.pos_emb(pos_onehot)
31 |         inputs = word_embs + pos_embs
32 |         input_embs = self.input_emb(inputs)
33 |         hidden = self.hidden.repeat(1, num_samples, 1)
34 | 
35 |         cap_lens = cap_lens.data.tolist()
36 |         emb = pack_padded_sequence(input_embs, cap_lens, batch_first=True)
37 | 
38 |         gru_seq, gru_last = self.gru(emb, hidden)
39 | 
40 |         gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1)
41 | 
42 |         return self.output_net(gru_last)
43 | 
44 | 
45 | class TextEncoderBiGRUCoV2(nn.Module):
46 |     def __init__(self, word_size, pos_size, hidden_size, output_size, dataset=None):
47 |         super(TextEncoderBiGRUCoV2, self).__init__()
48 |         if dataset == "unimocap":
49 |             self.pos_emb = nn.Linear(pos_size, word_size)
50 |         self.input_emb = nn.Linear(word_size, hidden_size)
51 |         self.gru = nn.GRU(
52 |             hidden_size, hidden_size, batch_first=True, bidirectional=True
53 |         )
54 |         self.output_net = nn.Sequential(
55 |             nn.Linear(hidden_size * 2, hidden_size),
56 |             nn.LayerNorm(hidden_size),
57 |             nn.LeakyReLU(0.2, inplace=True),
58 |             nn.Linear(hidden_size, output_size),
59 |         )
60 | 
61 |         self.hidden_size = hidden_size
62 |         self.hidden = nn.Parameter(
63 |             torch.randn((2, 1, self.hidden_size), requires_grad=True)
64 |         )
65 | 
66 |     def forward(self, word_embs, cap_lens):
67 |         num_samples = word_embs.shape[0]
68 | 
69 |         inputs = word_embs
70 |         input_embs = self.input_emb(inputs)
71 |         hidden = self.hidden.repeat(1, num_samples, 1)
72 | 
73 |         cap_lens = cap_lens.data.tolist()
74 |         emb = pack_padded_sequence(input_embs, cap_lens, batch_first=True)
75 | 
76 |         gru_seq, gru_last = self.gru(emb, hidden)
77 | 
78 |         gru_last = torch.cat([gru_last[0], gru_last[1]], dim=-1)
79 | 
80 |         return self.output_net(gru_last)
81 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/motiondecoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/motiondecoder/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/motiondecoder/actor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import pytorch_lightning as pl
 5 | 
 6 | from typing import List, Optional
 7 | from torch import nn, Tensor
 8 | 
 9 | from tma.models.operator import PositionalEncoding
10 | from tma.utils.temos_utils import lengths_to_mask
11 | 
12 | 
13 | class ActorAgnosticDecoder(pl.LightningModule):
14 |     """
15 |     This class is a decoder module for actor-agnostic features. It uses a transformer-based architecture for decoding.
16 | 
17 |     Args:
18 |         nfeats (int): The number of features in the input.
19 |         latent_dim (int, optional): The dimensionality of the latent space. Defaults to 256.
20 |         ff_size (int, optional): The dimensionality of the feedforward network model. Defaults to 1024.
21 |         num_layers (int, optional): The number of sub-encoder-layers in the transformer model. Defaults to 4.
22 |         num_heads (int, optional): The number of heads in the multiheadattention models. Defaults to 4.
23 |         dropout (float, optional): The dropout value. Defaults to 0.1.
24 |         activation (str, optional): The activation function of intermediate layer, relu or gelu. Defaults to "gelu".
25 |     """
26 | 
27 |     def __init__(
28 |         self,
29 |         nfeats: int,
30 |         latent_dim: int = 256,
31 |         ff_size: int = 1024,
32 |         num_layers: int = 4,
33 |         num_heads: int = 4,
34 |         dropout: float = 0.1,
35 |         activation: str = "gelu",
36 |         **kwargs
37 |     ) -> None:
38 | 
39 |         super().__init__()
40 |         self.save_hyperparameters(logger=False)
41 | 
42 |         output_feats = nfeats
43 | 
44 |         self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout)
45 | 
46 |         # Transformer decoder
47 |         seq_trans_decoder_layer = nn.TransformerDecoderLayer(
48 |             d_model=latent_dim,
49 |             nhead=num_heads,
50 |             dim_feedforward=ff_size,
51 |             dropout=dropout,
52 |             activation=activation,
53 |         )
54 | 
55 |         self.seqTransDecoder = nn.TransformerDecoder(
56 |             seq_trans_decoder_layer, num_layers=num_layers
57 |         )
58 | 
59 |         # Final linear layer
60 |         self.final_layer = nn.Linear(latent_dim, output_feats)
61 | 
62 |     def forward(self, z: Tensor, lengths: List[int]):
63 |         """
64 |         Forward pass for the decoder.
65 | 
66 |         Args:
67 |             z (Tensor): The input tensor.
68 |             lengths (List[int]): The lengths of the sequences.
69 | 
70 |         Returns:
71 |             Tensor: The output features.
72 |         """
73 | 
74 |         # Create a mask based on the lengths
75 |         mask = lengths_to_mask(lengths, z.device)
76 |         latent_dim = z.shape[1]
77 |         bs, nframes = mask.shape
78 |         nfeats = self.hparams.nfeats
79 | 
80 |         z = z[None]  # sequence of 1 element for the memory
81 | 
82 |         # Construct time queries
83 |         time_queries = torch.zeros(nframes, bs, latent_dim, device=z.device)
84 |         time_queries = self.sequence_pos_encoding(time_queries)
85 | 
86 |         # Pass through the transformer decoder
87 |         # with the latent vector for memory
88 |         output = self.seqTransDecoder(
89 |             tgt=time_queries, memory=z, tgt_key_padding_mask=~mask
90 |         )
91 | 
92 |         output = self.final_layer(output)
93 |         # zero for padded area
94 |         output[~mask.T] = 0
95 |         # Pytorch Transformer: [Sequence, Batch size, ...]
96 |         feats = output.permute(1, 0, 2)
97 |         return feats
98 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/motiondecoder/gru.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import pytorch_lightning as pl
 5 | 
 6 | from typing import List, Optional
 7 | from torch import nn, Tensor
 8 | 
 9 | from tma.models.operator import PositionalEncoding
10 | from tma.utils.temos_utils import lengths_to_mask
11 | 
12 | 
13 | class GRUDecoder(pl.LightningModule):
14 |     """
15 |     This class is a decoder module for features using a GRU-based architecture.
16 | 
17 |     Args:
18 |         nfeats (int): The number of features in the input.
19 |         latent_dim (int, optional): The dimensionality of the latent space. Defaults to 256.
20 |         num_layers (int, optional): The number of layers in the GRU model. Defaults to 4.
21 |     """
22 | 
23 |     def __init__(
24 |         self, nfeats: int, latent_dim: int = 256, num_layers: int = 4, **kwargs
25 |     ) -> None:
26 | 
27 |         super().__init__()
28 |         self.save_hyperparameters(logger=False)
29 | 
30 |         output_feats = nfeats
31 | 
32 |         # Embedding layer to transform the input
33 |         self.emb_layer = nn.Linear(latent_dim + 1, latent_dim)
34 | 
35 |         # GRU layer
36 |         self.gru = nn.GRU(latent_dim, latent_dim, num_layers=num_layers)
37 | 
38 |         # Final linear layer
39 |         self.final_layer = nn.Linear(latent_dim, output_feats)
40 | 
41 |     def forward(self, z: Tensor, lengths: List[int]):
42 |         """
43 |         Forward pass for the decoder.
44 | 
45 |         Args:
46 |             z (Tensor): The input tensor.
47 |             lengths (List[int]): The lengths of the sequences.
48 | 
49 |         Returns:
50 |             Tensor: The output features.
51 |         """
52 | 
53 |         # Create a mask based on the lengths
54 |         mask = lengths_to_mask(lengths, z.device)
55 |         latent_dim = z.shape[1]
56 |         bs, nframes = mask.shape
57 |         nfeats = self.hparams.nfeats
58 | 
59 |         lengths = torch.tensor(lengths, device=z.device)
60 | 
61 |         # Repeat the input
62 |         z = z[None].repeat((nframes, 1, 1))
63 | 
64 |         # Add time information to the input
65 |         time = mask * 1 / (lengths[..., None] - 1)
66 |         time = (time[:, None] * torch.arange(time.shape[1], device=z.device))[:, 0]
67 |         time = time.T[..., None]
68 |         z = torch.cat((z, time), 2)
69 | 
70 |         # emb to latent space again
71 |         z = self.emb_layer(z)
72 | 
73 |         # pass to gru
74 |         z = self.gru(z)[0]
75 |         output = self.final_layer(z)
76 | 
77 |         # zero for padded area
78 |         output[~mask.T] = 0
79 | 
80 |         # Pytorch GRU: [Sequence, Batch size, ...]
81 |         feats = output.permute(1, 0, 2)
82 | 
83 |         return feats
84 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/motionencoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/motionencoder/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/motionencoder/gru.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import pytorch_lightning as pl
  5 | 
  6 | from typing import List, Optional, Union
  7 | from torch import nn, Tensor
  8 | from torch.distributions.distribution import Distribution
  9 | 
 10 | from tma.models.operator import PositionalEncoding
 11 | from tma.utils.temos_utils import lengths_to_mask
 12 | 
 13 | 
 14 | class GRUEncoder(pl.LightningModule):
 15 |     """
 16 |     This class is a GRU encoder for encoding input features.
 17 | 
 18 |     Attributes:
 19 |     - skel_embedding: a linear layer for embedding the input features.
 20 |     - gru: a GRU layer for encoding the embedded features.
 21 |     - mu: a linear layer for generating the mean of the latent distribution (only if VAE is used).
 22 |     - logvar: a linear layer for generating the log variance of the latent distribution (only if VAE is used).
 23 |     - final: a linear layer for generating the final output (only if VAE is not used).
 24 | 
 25 |     Methods:
 26 |     - __init__: initializes the GRUEncoder object with the given parameters.
 27 |     - forward: encodes the input features and returns the encoded output.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         nfeats: int,
 33 |         vae: bool,
 34 |         latent_dim: int = 256,
 35 |         num_layers: int = 4,
 36 |         **kwargs
 37 |     ):
 38 |         """
 39 |         Initializes the GRUEncoder object with the given parameters.
 40 | 
 41 |         Inputs:
 42 |         - nfeats: the number of input features.
 43 |         - vae: a flag indicating whether to use a Variational Autoencoder (VAE).
 44 |         - latent_dim: the dimension of the latent space.
 45 |         - num_layers: the number of layers in the GRU.
 46 | 
 47 |         Outputs: None
 48 |         """
 49 |         super().__init__()
 50 |         self.save_hyperparameters(logger=False)
 51 |         input_feats = nfeats
 52 | 
 53 |         # Embed the input features
 54 |         self.skel_embedding = nn.Linear(input_feats, latent_dim)
 55 | 
 56 |         # Initialize the GRU layer
 57 |         self.gru = nn.GRU(latent_dim, latent_dim, num_layers=num_layers)
 58 | 
 59 |         # Action agnostic: only one set of params
 60 |         if vae:
 61 |             self.mu = nn.Linear(latent_dim, latent_dim)
 62 |             self.logvar = nn.Linear(latent_dim, latent_dim)
 63 |         else:
 64 |             self.final = nn.Linear(latent_dim, latent_dim)
 65 | 
 66 |     def forward(self, features: Tensor, lengths: Optional[List[int]] = None):
 67 |         """
 68 |         Encodes the input features and returns the encoded output.
 69 | 
 70 |         Inputs:
 71 |         - features: a tensor of input features.
 72 |         - lengths: a list of lengths of the input features.
 73 | 
 74 |         Outputs: the encoded output.
 75 |         """
 76 |         if lengths is None:
 77 |             lengths = [len(feature) for feature in features]
 78 | 
 79 |         device = features.device
 80 | 
 81 |         bs, nframes, nfeats = features.shape
 82 |         mask = lengths_to_mask(lengths, device)
 83 | 
 84 |         x = features
 85 |         # Embed each human poses into latent vectors
 86 |         x = self.skel_embedding(x)
 87 | 
 88 |         # Switch sequence and batch_size because the input of
 89 |         # Pytorch Transformer is [Sequence, Batch size, ...]
 90 |         x = x.permute(1, 0, 2)  # now it is [nframes, bs, latent_dim]
 91 | 
 92 |         # Get all the output of the gru
 93 |         x = self.gru(x)[0]
 94 | 
 95 |         # Put back the batch dimention first
 96 |         x = x.permute(1, 0, 2)  # now it is [nframes, bs, latent_dim]
 97 | 
 98 |         # Extract the last valid input
 99 |         x = x[
100 |             tuple(
101 |                 torch.stack(
102 |                     (
103 |                         torch.arange(bs, device=x.device),
104 |                         torch.tensor(lengths, device=x.device) - 1,
105 |                     )
106 |                 )
107 |             )
108 |         ]
109 | 
110 |         if self.hparams.vae:
111 |             mu = self.mu(x)
112 |             logvar = self.logvar(x)
113 |             std = logvar.exp().pow(0.5)
114 |             # https://github.com/kampta/pytorch-distributions/blob/master/gaussian_vae.py
115 |             return torch.distributions.Normal(mu, std)
116 |         else:
117 |             return self.final(x)
118 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/textencoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/architectures/temos/textencoder/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/textencoder/distillbert.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | import pytorch_lightning as pl
 3 | 
 4 | import torch.nn as nn
 5 | import os
 6 | 
 7 | import torch
 8 | from torch import Tensor
 9 | from torch.distributions.distribution import Distribution
10 | from transformers import AutoTokenizer, AutoModel
11 | from transformers import logging
12 | 
13 | 
14 | class DistilbertEncoderBase(pl.LightningModule):
15 |     """
16 |     This class is a base encoder for DistilBERT models.
17 | 
18 |     Attributes:
19 |     - tokenizer: the tokenizer for the pre-trained DistilBERT model.
20 |     - text_model: the pre-trained DistilBERT model.
21 |     - text_encoded_dim: the dimension of the hidden state in the DistilBERT model.
22 | 
23 |     Methods:
24 |     - __init__: initializes the DistilbertEncoderBase object with the given parameters.
25 |     - train: sets the training mode for the model.
26 |     """
27 | 
28 |     def __init__(self, modelpath: str, finetune: bool = False):
29 |         """
30 |         Initializes the DistilbertEncoderBase object with the given parameters.
31 | 
32 |         Inputs:
33 |         - modelpath: the path to the pre-trained DistilBERT model.
34 |         - finetune: a flag indicating whether to fine-tune the DistilBERT model.
35 | 
36 |         Outputs: None
37 |         """
38 |         super().__init__()
39 |         logging.set_verbosity_error()
40 | 
41 |         # Tokenizer
42 |         os.environ["TOKENIZERS_PARALLELISM"] = "false"
43 |         self.tokenizer = AutoTokenizer.from_pretrained(modelpath)
44 | 
45 |         # Text model
46 |         self.text_model = AutoModel.from_pretrained(modelpath)
47 | 
48 |         # Don't train the model
49 |         if not finetune:
50 |             self.text_model.training = False
51 |             for p in self.text_model.parameters():
52 |                 p.requires_grad = False
53 | 
54 |         # Then configure the model
55 |         self.text_encoded_dim = self.text_model.config.hidden_size
56 | 
57 |     def train(self, mode: bool = True):
58 |         """
59 |         Sets the training mode for the model.
60 | 
61 |         Inputs:
62 |         - mode: a flag indicating whether to set the model to training mode.
63 | 
64 |         Outputs: None
65 |         """
66 |         self.training = mode
67 |         for module in self.children():
68 |             # Don't put the model in
69 |             if module == self.text_model and not self.hparams.finetune:
70 |                 continue
71 |             module.train(mode)
72 |         return self
73 | 
74 |     def get_last_hidden_state(self, texts: List[str], return_mask: bool = False):
75 |         """
76 |         Sets the training mode for the model.
77 | 
78 |         Inputs:
79 |         - mode: a flag indicating whether to set the model to training mode.
80 | 
81 |         Outputs: None
82 |         """
83 |         # Tokenize the texts and convert them to tensors
84 |         encoded_inputs = self.tokenizer(texts, return_tensors="pt", padding=True)
85 | 
86 |         # Pass the encoded inputs to the DistilBERT model
87 |         output = self.text_model(**encoded_inputs.to(self.text_model.device))
88 | 
89 |         # If not returning the attention mask, return the last hidden state
90 |         if not return_mask:
91 |             return output.last_hidden_state
92 | 
93 |         # If returning the attention mask, return the last hidden state and the attention mask
94 |         return output.last_hidden_state, encoded_inputs.attention_mask.to(dtype=bool)
95 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/temos/textencoder/distillbert_actor.py:
--------------------------------------------------------------------------------
  1 | from .distillbert import DistilbertEncoderBase
  2 | import torch
  3 | 
  4 | from typing import List, Union
  5 | from torch import nn, Tensor
  6 | from torch.distributions.distribution import Distribution
  7 | 
  8 | from tma.models.operator import PositionalEncoding
  9 | from tma.utils.temos_utils import lengths_to_mask
 10 | 
 11 | 
 12 | class DistilbertActorAgnosticEncoder(DistilbertEncoderBase):
 13 |     def __init__(
 14 |         self,
 15 |         modelpath: str,
 16 |         finetune: bool = False,
 17 |         vae: bool = True,
 18 |         latent_dim: int = 256,
 19 |         ff_size: int = 1024,
 20 |         num_layers: int = 4,
 21 |         num_heads: int = 4,
 22 |         dropout: float = 0.1,
 23 |         activation: str = "gelu",
 24 |         **kwargs
 25 |     ):
 26 |         """
 27 |         Initializes the DistilbertActorAgnosticEncoder object with the given parameters.
 28 | 
 29 |         Inputs:
 30 |         - modelpath: the path to the pre-trained DistilBERT model.
 31 |         - finetune: a flag indicating whether to fine-tune the DistilBERT model.
 32 |         - vae: a flag indicating whether to use a VAE model.
 33 |         - latent_dim: the dimension of the latent space.
 34 |         - ff_size: the size of the feedforward network in the transformer encoder.
 35 |         - num_layers: the number of layers in the transformer encoder.
 36 |         - num_heads: the number of attention heads in the transformer encoder.
 37 |         - dropout: the dropout rate.
 38 |         - activation: the activation function to use in the transformer encoder.
 39 | 
 40 |         Outputs: None
 41 |         """
 42 |         super().__init__(modelpath=modelpath, finetune=finetune)
 43 |         self.save_hyperparameters(logger=False)
 44 | 
 45 |         encoded_dim = self.text_encoded_dim
 46 | 
 47 |         # Projection of the text-outputs into the latent space
 48 |         self.projection = nn.Sequential(nn.ReLU(), nn.Linear(encoded_dim, latent_dim))
 49 | 
 50 |         # TransformerVAE adapted from ACTOR
 51 |         # Action agnostic: only one set of params
 52 |         if vae:
 53 |             self.mu_token = nn.Parameter(torch.randn(latent_dim))
 54 |             self.logvar_token = nn.Parameter(torch.randn(latent_dim))
 55 |         else:
 56 |             self.emb_token = nn.Parameter(torch.randn(latent_dim))
 57 | 
 58 |         self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout)
 59 | 
 60 |         seq_trans_encoder_layer = nn.TransformerEncoderLayer(
 61 |             d_model=latent_dim,
 62 |             nhead=num_heads,
 63 |             dim_feedforward=ff_size,
 64 |             dropout=dropout,
 65 |             activation=activation,
 66 |         )
 67 | 
 68 |         self.seqTransEncoder = nn.TransformerEncoder(
 69 |             seq_trans_encoder_layer, num_layers=num_layers
 70 |         )
 71 | 
 72 |     def forward(self, texts: List[str]):
 73 |         text_encoded, mask = self.get_last_hidden_state(texts, return_mask=True)
 74 | 
 75 |         x = self.projection(text_encoded)
 76 |         bs, nframes, _ = x.shape
 77 |         # bs, nframes, totjoints, nfeats = x.shape
 78 |         # Switch sequence and batch_size because the input of
 79 |         # Pytorch Transformer is [Sequence, Batch size, ...]
 80 |         x = x.permute(1, 0, 2)  # now it is [nframes, bs, latent_dim]
 81 | 
 82 |         if self.hparams.vae:
 83 |             mu_token = torch.tile(self.mu_token, (bs,)).reshape(bs, -1)
 84 |             logvar_token = torch.tile(self.logvar_token, (bs,)).reshape(bs, -1)
 85 | 
 86 |             # adding the distribution tokens for all sequences
 87 |             xseq = torch.cat((mu_token[None], logvar_token[None], x), 0)
 88 | 
 89 |             # create a bigger mask, to allow attend to mu and logvar
 90 |             token_mask = torch.ones((bs, 2), dtype=bool, device=x.device)
 91 |             aug_mask = torch.cat((token_mask, mask), 1)
 92 |         else:
 93 |             emb_token = torch.tile(self.emb_token, (bs,)).reshape(bs, -1)
 94 | 
 95 |             # adding the embedding token for all sequences
 96 |             xseq = torch.cat((emb_token[None], x), 0)
 97 | 
 98 |             # create a bigger mask, to allow attend to emb
 99 |             token_mask = torch.ones((bs, 1), dtype=bool, device=x.device)
100 |             aug_mask = torch.cat((token_mask, mask), 1)
101 | 
102 |         # add positional encoding
103 |         xseq = self.sequence_pos_encoding(xseq)
104 |         final = self.seqTransEncoder(xseq, src_key_padding_mask=~aug_mask)
105 | 
106 |         if self.hparams.vae:
107 |             mu, logvar = final[0], final[1]
108 |             std = logvar.exp().pow(0.5)
109 |             # https://github.com/kampta/pytorch-distributions/blob/master/gaussian_vae.py
110 |             try:
111 |                 dist = torch.distributions.Normal(mu, std)
112 |             except ValueError:
113 |                 import ipdb
114 | 
115 |                 ipdb.set_trace()  # noqa
116 |                 pass
117 |             return dist
118 |         else:
119 |             return final[0]
120 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/architectures/vposert_vae.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce
  2 | from typing import List, Optional, Union
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch import Tensor, nn
  9 | from torch.distributions.distribution import Distribution
 10 | 
 11 | from tma.models.architectures.tools.embeddings import TimestepEmbedding, Timesteps
 12 | from tma.models.operator import PositionalEncoding
 13 | from tma.models.operator.cross_attention import (
 14 |     SkipTransformerEncoder,
 15 |     SkipTransformerDecoder,
 16 |     TransformerDecoder,
 17 |     TransformerDecoderLayer,
 18 |     TransformerEncoder,
 19 |     TransformerEncoderLayer,
 20 | )
 21 | from tma.models.operator.position_encoding import build_position_encoding
 22 | from tma.utils.temos_utils import lengths_to_mask
 23 | 
 24 | """
 25 | vae
 26 | skip connection encoder 
 27 | skip connection decoder
 28 | mem for each decoder layer
 29 | """
 30 | 
 31 | 
 32 | class VPosert(nn.Module):
 33 | 
 34 |     def __init__(self, cfg, **kwargs) -> None:
 35 | 
 36 |         super(VPosert, self).__init__()
 37 | 
 38 |         num_neurons = 512
 39 |         self.latentD = 256
 40 | 
 41 |         # self.num_joints = 21
 42 |         n_features = 196 * 263
 43 | 
 44 |         self.encoder_net = nn.Sequential(
 45 |             BatchFlatten(),
 46 |             nn.BatchNorm1d(n_features),
 47 |             nn.Linear(n_features, num_neurons),
 48 |             nn.LeakyReLU(),
 49 |             nn.BatchNorm1d(num_neurons),
 50 |             nn.Dropout(0.1),
 51 |             nn.Linear(num_neurons, num_neurons),
 52 |             nn.Linear(num_neurons, num_neurons),
 53 |             NormalDistDecoder(num_neurons, self.latentD),
 54 |         )
 55 | 
 56 |         self.decoder_net = nn.Sequential(
 57 |             nn.Linear(self.latentD, num_neurons),
 58 |             nn.LeakyReLU(),
 59 |             nn.Dropout(0.1),
 60 |             nn.Linear(num_neurons, num_neurons),
 61 |             nn.LeakyReLU(),
 62 |             nn.Linear(num_neurons, n_features),
 63 |             ContinousRotReprDecoder(),
 64 |         )
 65 | 
 66 |     def forward(self, features: Tensor, lengths: Optional[List[int]] = None):
 67 |         q_z = self.encode(features)
 68 |         feats_rst = self.decode(q_z)
 69 |         return feats_rst, q_z
 70 | 
 71 |     def encode(self, pose_body, lengths: Optional[List[int]] = None):
 72 |         """
 73 |         :param Pin: Nx(numjoints*3)
 74 |         :param rep_type: 'matrot'/'aa' for matrix rotations or axis-angle
 75 |         :return:
 76 |         """
 77 |         q_z = self.encoder_net(pose_body)
 78 |         q_z_sample = q_z.rsample()
 79 |         return q_z_sample.unsqueeze(0), q_z
 80 | 
 81 |     def decode(self, Zin, lengths: Optional[List[int]] = None):
 82 |         bs = Zin.shape[0]
 83 |         Zin = Zin[0]
 84 | 
 85 |         prec = self.decoder_net(Zin)
 86 | 
 87 |         return prec
 88 | 
 89 | 
 90 | class BatchFlatten(nn.Module):
 91 | 
 92 |     def __init__(self):
 93 |         super(BatchFlatten, self).__init__()
 94 |         self._name = "batch_flatten"
 95 | 
 96 |     def forward(self, x):
 97 |         return x.view(x.shape[0], -1)
 98 | 
 99 | 
100 | class ContinousRotReprDecoder(nn.Module):
101 | 
102 |     def __init__(self):
103 |         super(ContinousRotReprDecoder, self).__init__()
104 | 
105 |     def forward(self, module_input):
106 |         reshaped_input = module_input.view(-1, 196, 263)
107 |         return reshaped_input
108 | 
109 | 
110 | class NormalDistDecoder(nn.Module):
111 | 
112 |     def __init__(self, num_feat_in, latentD):
113 |         super(NormalDistDecoder, self).__init__()
114 | 
115 |         self.mu = nn.Linear(num_feat_in, latentD)
116 |         self.logvar = nn.Linear(num_feat_in, latentD)
117 | 
118 |     def forward(self, Xout):
119 |         return torch.distributions.normal.Normal(
120 |             self.mu(Xout), F.softplus(self.logvar(Xout))
121 |         )
122 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/body_skeleton/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/body_skeleton/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/body_skeleton/paramUtil.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # Define a kinematic tree for the skeletal struture
 4 | kit_kinematic_chain = [
 5 |     [0, 11, 12, 13, 14, 15],
 6 |     [0, 16, 17, 18, 19, 20],
 7 |     [0, 1, 2, 3, 4],
 8 |     [3, 5, 6, 7],
 9 |     [3, 8, 9, 10],
10 | ]
11 | 
12 | kit_raw_offsets = np.array(
13 |     [
14 |         [0, 0, 0],
15 |         [0, 1, 0],
16 |         [0, 1, 0],
17 |         [0, 1, 0],
18 |         [0, 1, 0],
19 |         [1, 0, 0],
20 |         [0, -1, 0],
21 |         [0, -1, 0],
22 |         [-1, 0, 0],
23 |         [0, -1, 0],
24 |         [0, -1, 0],
25 |         [1, 0, 0],
26 |         [0, -1, 0],
27 |         [0, -1, 0],
28 |         [0, 0, 1],
29 |         [0, 0, 1],
30 |         [-1, 0, 0],
31 |         [0, -1, 0],
32 |         [0, -1, 0],
33 |         [0, 0, 1],
34 |         [0, 0, 1],
35 |     ]
36 | )
37 | 
38 | t2m_raw_offsets = np.array(
39 |     [
40 |         [0, 0, 0],
41 |         [1, 0, 0],
42 |         [-1, 0, 0],
43 |         [0, 1, 0],
44 |         [0, -1, 0],
45 |         [0, -1, 0],
46 |         [0, 1, 0],
47 |         [0, -1, 0],
48 |         [0, -1, 0],
49 |         [0, 1, 0],
50 |         [0, 0, 1],
51 |         [0, 0, 1],
52 |         [0, 1, 0],
53 |         [1, 0, 0],
54 |         [-1, 0, 0],
55 |         [0, 0, 1],
56 |         [0, -1, 0],
57 |         [0, -1, 0],
58 |         [0, -1, 0],
59 |         [0, -1, 0],
60 |         [0, -1, 0],
61 |         [0, -1, 0],
62 |     ]
63 | )
64 | 
65 | t2m_kinematic_chain = [
66 |     [0, 2, 5, 8, 11],
67 |     [0, 1, 4, 7, 10],
68 |     [0, 3, 6, 9, 12, 15],
69 |     [9, 14, 17, 19, 21],
70 |     [9, 13, 16, 18, 20],
71 | ]
72 | t2m_left_hand_chain = [
73 |     [20, 22, 23, 24],
74 |     [20, 34, 35, 36],
75 |     [20, 25, 26, 27],
76 |     [20, 31, 32, 33],
77 |     [20, 28, 29, 30],
78 | ]
79 | t2m_right_hand_chain = [
80 |     [21, 43, 44, 45],
81 |     [21, 46, 47, 48],
82 |     [21, 40, 41, 42],
83 |     [21, 37, 38, 39],
84 |     [21, 49, 50, 51],
85 | ]
86 | 
87 | 
88 | kit_tgt_skel_id = "03950"
89 | 
90 | t2m_tgt_skel_id = "000021"
91 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/get_model.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | 
 4 | def get_model(cfg, datamodule, phase="train"):
 5 |     """
 6 |     Inputs:
 7 |         cfg (Config): The configuration object containing model details.
 8 |         datamodule (DataModule): The data module object for data loading and processing.
 9 |         phase (str): The phase of model training. Default is "train".
10 | 
11 |     This function returns the model based on the model type specified in the configuration. If the model type is not supported, it raises a ValueError.
12 | 
13 |     Returns:
14 |         Model (object): The model object.
15 |     """
16 |     modeltype = cfg.model.model_type
17 |     if modeltype in ["mld", "temos"]:
18 |         return get_module(cfg, datamodule)
19 |     else:
20 |         raise ValueError(f"Invalid model type {modeltype}.")
21 | 
22 | 
23 | def get_module(cfg, datamodule):
24 |     """
25 |     Inputs:
26 |         cfg (Config): The configuration object containing model details.
27 |         datamodule (DataModule): The data module object for data loading and processing.
28 | 
29 |     This function imports the model module based on the model type specified in the configuration, gets the model class from the module, and returns an instance of the model class.
30 | 
31 |     Returns:
32 |         Model (object): The model object.
33 |     """
34 |     modeltype = cfg.model.model_type
35 |     model_module = importlib.import_module(
36 |         f".modeltype.{cfg.model.model_type}", package="tma.models")
37 |     Model = model_module.__getattribute__(f"{modeltype.upper()}")
38 |     return Model(cfg=cfg, datamodule=datamodule)
39 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from tma.models.losses.temos import TemosLosses
2 | from tma.models.losses.tmost import TmostLosses
3 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/losses/actor.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchmetrics import Metric
  4 | 
  5 | 
  6 | class ACTORLosses(Metric):
  7 |     """
  8 |     Loss
  9 |     Modify loss
 10 | 
 11 |     """
 12 | 
 13 |     def __init__(self, vae, mode, cfg):
 14 |         super().__init__(dist_sync_on_step=cfg.LOSS.DIST_SYNC_ON_STEP)
 15 | 
 16 |         # Save parameters
 17 |         self.vae = vae
 18 |         self.mode = mode
 19 | 
 20 |         losses = []
 21 |         losses.append("recons_feature")
 22 |         losses.append("recons_verts")
 23 |         losses.append("recons_joints")
 24 |         losses.append("recons_limb")
 25 | 
 26 |         # latent loss
 27 |         losses.append("latent_st2sm")
 28 | 
 29 |         # KL loss
 30 |         losses.append("kl_motion")
 31 |         losses.append("total")
 32 | 
 33 |         for loss in losses:
 34 |             self.register_buffer(loss, torch.tensor(0.0))
 35 |         self.register_buffer("count", torch.tensor(0))
 36 |         self.losses = losses
 37 | 
 38 |         self._losses_func = {}
 39 |         self._params = {}
 40 |         for loss in losses:
 41 |             if loss != "total":
 42 |                 if loss.split("_")[0] == "kl":
 43 |                     self._losses_func[loss] = KLLoss()
 44 |                     self._params[loss] = cfg.LOSS.LAMBDA_KL
 45 |                 elif loss.split("_")[0] == "recons":
 46 |                     self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean")
 47 |                     self._params[loss] = cfg.LOSS.LAMBDA_REC
 48 |                 elif loss.split("_")[0] == "cross":
 49 |                     self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean")
 50 |                     self._params[loss] = cfg.LOSS.LAMBDA_CROSS
 51 |                 elif loss.split("_")[0] == "latent":
 52 |                     self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean")
 53 |                     self._params[loss] = cfg.LOSS.LAMBDA_LATENT
 54 |                 elif loss.split("_")[0] == "cycle":
 55 |                     self._losses_func[loss] = torch.nn.SmoothL1Loss(reduction="mean")
 56 |                     self._params[loss] = cfg.LOSS.LAMBDA_CYCLE
 57 |                 else:
 58 |                     ValueError("This loss is not recognized.")
 59 | 
 60 |     def update(self, rs_set, dist_ref):
 61 |         total: float = 0.0
 62 |         # Compute the losses
 63 |         # loss1 - reconstruction loss
 64 |         total += self._update_loss("recons_feature", rs_set["m_rst"], rs_set["m_ref"])
 65 |         # total += self._update_loss("recons_verts", rs_set['verts_rs'], rs_set['verts_ref'])
 66 |         # total += self._update_loss("recons_joints", rs_set['joints_rs'], rs_set['joints_ref'])
 67 |         # total += self._update_loss("recons_limb", rs_set['rs_base'], rs_set['m1'])
 68 | 
 69 |         # loss - text motion latent loss
 70 |         total += self._update_loss("kl_motion", rs_set["dist_m"], dist_ref)
 71 | 
 72 |         self.total += total.detach()
 73 |         self.count += 1
 74 | 
 75 |         return total
 76 | 
 77 |     def compute(self, split):
 78 |         count = getattr(self, "count")
 79 |         return {loss: getattr(self, loss) / count for loss in self.losses}
 80 | 
 81 |     def _update_loss(self, loss: str, outputs, inputs):
 82 |         # Update the loss
 83 |         val = self._losses_func[loss](outputs, inputs)
 84 |         getattr(self, loss).__iadd__(val.detach())
 85 |         # Return a weighted sum
 86 |         weighted_loss = self._params[loss] * val
 87 |         return weighted_loss
 88 | 
 89 |     def loss2logname(self, loss: str, split: str):
 90 |         if loss == "total":
 91 |             log_name = f"{loss}/{split}"
 92 |         else:
 93 |             loss_type, name = loss.split("_")
 94 |             log_name = f"{loss_type}/{name}/{split}"
 95 |         return log_name
 96 | 
 97 | 
 98 | class KLLoss:
 99 |     def __init__(self):
100 |         pass
101 | 
102 |     def __call__(self, q, p):
103 |         div = torch.distributions.kl_divergence(q, p)
104 |         return div.mean()
105 | 
106 |     def __repr__(self):
107 |         return "KLLoss()"
108 | 
109 | 
110 | class KLLossMulti:
111 |     def __init__(self):
112 |         self.klloss = KLLoss()
113 | 
114 |     def __call__(self, qlist, plist):
115 |         return sum([self.klloss(q, p) for q, p in zip(qlist, plist)])
116 | 
117 |     def __repr__(self):
118 |         return "KLLossMulti()"
119 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/losses/infonce.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import numpy as np
 4 | 
 5 | 
 6 | class InfoNCE:
 7 |     """
 8 |     This class implements the InfoNCE loss function.
 9 | 
10 |     Attributes:
11 |     - t: a temperature parameter for the softmax function in the loss calculation.
12 | 
13 |     Methods:
14 |     - __call__: computes the InfoNCE loss given the motion and text features, and an optional distance matrix.
15 |     """
16 | 
17 |     def __init__(self, t):
18 |         """
19 |         Initializes the InfoNCE object with a given temperature parameter.
20 | 
21 |         Inputs:
22 |         - t: a temperature parameter for the softmax function in the loss calculation.
23 |         """
24 |         self.t = t
25 | 
26 |     def __call__(self, f, dist):
27 |         """
28 |         Computes the InfoNCE loss given the motion and text features, and an optional distance matrix.
29 | 
30 |         Inputs:
31 |         - f: a tuple containing the motion and text features. Each feature is a 2D tensor of shape (N, d).
32 |         - dist: an optional distance matrix. If provided, it is used to mask the logits.
33 | 
34 |         Outputs:
35 |         - loss_m: the InfoNCE loss computed using the motion features.
36 |         - loss_t: the InfoNCE loss computed using the text features.
37 |         """
38 |         t = self.t
39 |         f_motion, f_text = f[0], f[1]
40 | 
41 |         N, d = f_motion.shape[0], f_motion.shape[1]
42 | 
43 |         # Normalize the motion and text features
44 |         Emb_motion = F.normalize(f_motion, dim=1)
45 |         Emb_text = F.normalize(f_text, dim=1)
46 | 
47 |         # Compute the logits as the dot product of the normalized features
48 |         t = torch.tensor(t).to(f_motion.device)
49 |         logits = torch.mm(Emb_motion, Emb_text.T)
50 | 
51 |         # If a distance matrix is provided, use it to mask the logits
52 |         if dist is not None:
53 |             text_logits = dist.detach()
54 |             mask = torch.where(
55 |                 torch.logical_and(text_logits > 0.85, text_logits < 1.0 - 1e-100),
56 |                 torch.tensor(float("-inf")).to(f_motion.device),
57 |                 torch.tensor(1.0e100).to(f_motion.device),
58 |             )
59 |             mask.diagonal().fill_(float("inf"))
60 |             logits = torch.min(mask, logits)
61 | 
62 |         N = f_motion.shape[0]
63 | 
64 |         # Compute the labels as the indices of the features
65 |         labels = torch.arange(N).to(f_motion.device)
66 | 
67 |         # Compute the InfoNCE loss for the motion and text features
68 |         loss_m = F.cross_entropy(logits / t, labels)
69 |         loss_t = F.cross_entropy(logits.T / t, labels)
70 | 
71 |         loss = (loss_m + loss_t) / 2
72 | 
73 |         return loss
74 | 
75 |     def __repr__(self):
76 |         return "InfoNCE()"
77 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/losses/kl.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class KLLoss:
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     def __call__(self, q, p):
 9 |         div = torch.distributions.kl_divergence(q, p)
10 |         return div.mean()
11 | 
12 |     def __repr__(self):
13 |         return "KLLoss()"
14 | 
15 | 
16 | class KLLossMulti:
17 |     def __init__(self):
18 |         self.klloss = KLLoss()
19 | 
20 |     def __call__(self, qlist, plist):
21 |         return sum([self.klloss(q, p) for q, p in zip(qlist, plist)])
22 | 
23 |     def __repr__(self):
24 |         return "KLLossMulti()"
25 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .compute import ComputeMetrics
2 | from .tm2t import TM2TMetrics
3 | from .mm import MMMetrics
4 | from .uncond import UncondMetrics
5 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/metrics/mm.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch import Tensor
 5 | from torchmetrics import Metric
 6 | from torchmetrics.functional import pairwise_euclidean_distance
 7 | 
 8 | from .utils import *
 9 | 
10 | 
11 | class MMMetrics(Metric):
12 |     full_state_update = True
13 | 
14 |     def __init__(self, mm_num_times=10, dist_sync_on_step=True, **kwargs):
15 |         super().__init__(dist_sync_on_step=dist_sync_on_step)
16 | 
17 |         self.name = "MultiModality scores"
18 | 
19 |         self.mm_num_times = mm_num_times
20 | 
21 |         self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
22 |         self.add_state("count_seq", default=torch.tensor(0), dist_reduce_fx="sum")
23 | 
24 |         self.metrics = ["MultiModality"]
25 |         self.add_state("MultiModality", default=torch.tensor(0.0), dist_reduce_fx="sum")
26 | 
27 |         # chached batches
28 |         self.add_state("mm_motion_embeddings", default=[], dist_reduce_fx=None)
29 | 
30 |     def compute(self, sanity_flag):
31 |         count = self.count.item()
32 |         count_seq = self.count_seq.item()
33 | 
34 |         # init metrics
35 |         metrics = {metric: getattr(self, metric) for metric in self.metrics}
36 | 
37 |         # if in sanity check stage then jump
38 |         if sanity_flag:
39 |             return metrics
40 | 
41 |         # cat all embeddings
42 |         all_mm_motions = torch.cat(self.mm_motion_embeddings, axis=0).cpu().numpy()
43 |         metrics["MultiModality"] = calculate_multimodality_np(
44 |             all_mm_motions, self.mm_num_times
45 |         )
46 | 
47 |         return {**metrics}
48 | 
49 |     def update(
50 |         self,
51 |         mm_motion_embeddings: Tensor,
52 |         lengths: List[int],
53 |     ):
54 |         self.count += sum(lengths)
55 |         self.count_seq += len(lengths)
56 | 
57 |         # store all mm motion embeddings
58 |         self.mm_motion_embeddings.append(mm_motion_embeddings)
59 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/metrics/uncond.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | from torchmetrics import Metric
  6 | from torchmetrics.functional import pairwise_euclidean_distance
  7 | 
  8 | from .utils import *
  9 | 
 10 | 
 11 | class UncondMetrics(Metric):
 12 |     full_state_update = True
 13 | 
 14 |     def __init__(
 15 |         self, top_k=3, R_size=32, diversity_times=300, dist_sync_on_step=True, **kwargs
 16 |     ):
 17 |         super().__init__(dist_sync_on_step=dist_sync_on_step)
 18 | 
 19 |         self.name = "fid, kid, and diversity scores"
 20 | 
 21 |         self.top_k = top_k
 22 |         self.R_size = R_size
 23 |         self.diversity_times = 300
 24 | 
 25 |         self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
 26 |         self.add_state("count_seq", default=torch.tensor(0), dist_reduce_fx="sum")
 27 | 
 28 |         self.metrics = []
 29 | 
 30 |         # KID
 31 |         self.add_state("KID_mean", default=torch.tensor(0.0), dist_reduce_fx="mean")
 32 |         self.add_state("KID_std", default=torch.tensor(0.0), dist_reduce_fx="mean")
 33 |         self.metrics.extend(["KID_mean", "KID_std"])
 34 |         # Fid
 35 |         self.add_state("FID", default=torch.tensor(0.0), dist_reduce_fx="mean")
 36 |         self.metrics.append("FID")
 37 | 
 38 |         # Diversity
 39 |         self.add_state("Diversity", default=torch.tensor(0.0), dist_reduce_fx="sum")
 40 |         self.add_state("gt_Diversity", default=torch.tensor(0.0), dist_reduce_fx="sum")
 41 |         self.metrics.extend(["Diversity", "gt_Diversity"])
 42 | 
 43 |         # chached batches
 44 |         self.add_state("recmotion_embeddings", default=[], dist_reduce_fx=None)
 45 |         self.add_state("gtmotion_embeddings", default=[], dist_reduce_fx=None)
 46 | 
 47 |     def compute(self, sanity_flag):
 48 |         count = self.count.item()
 49 |         count_seq = self.count_seq.item()
 50 | 
 51 |         # init metrics
 52 |         metrics = {metric: getattr(self, metric) for metric in self.metrics}
 53 | 
 54 |         # if in sanity check stage then jump
 55 |         if sanity_flag:
 56 |             return metrics
 57 | 
 58 |         # cat all embeddings
 59 |         all_gtmotions = torch.cat(self.gtmotion_embeddings, axis=0).cpu()
 60 |         all_genmotions = torch.cat(self.recmotion_embeddings, axis=0).cpu()
 61 | 
 62 |         # Compute kid
 63 | 
 64 |         KID_mean, KID_std = calculate_kid(all_gtmotions, all_genmotions)
 65 |         metrics["KID_mean"] = KID_mean
 66 |         metrics["KID_std"] = KID_std
 67 | 
 68 |         # tensor -> numpy for FID
 69 |         all_genmotions = all_genmotions.numpy()
 70 |         all_gtmotions = all_gtmotions.numpy()
 71 | 
 72 |         # Compute fid
 73 |         mu, cov = calculate_activation_statistics_np(all_genmotions)
 74 |         
 75 |         # gt_mu, gt_cov = calculate_activation_statistics_np(all_gtmotions)
 76 |         gt_mu, gt_cov = calculate_activation_statistics_np(all_gtmotions)
 77 |         metrics["FID"] = calculate_frechet_distance_np(gt_mu, gt_cov, mu, cov)
 78 | 
 79 |         # Compute diversity
 80 |         assert count_seq > self.diversity_times
 81 |         print(all_genmotions.shape)
 82 |         print(all_gtmotions.shape)
 83 |         metrics["Diversity"] = calculate_diversity_np(
 84 |             all_genmotions, self.diversity_times
 85 |         )
 86 |         metrics["gt_Diversity"] = calculate_diversity_np(
 87 |             all_gtmotions, self.diversity_times
 88 |         )
 89 | 
 90 |         return {**metrics}
 91 | 
 92 |     def update(
 93 |         self,
 94 |         gtmotion_embeddings: Tensor,
 95 |         lengths: List[int],
 96 |         recmotion_embeddings=None,
 97 |     ):
 98 |         self.count += sum(lengths)
 99 |         self.count_seq += len(lengths)
100 | 
101 |         # [bs, nlatent*ndim] <= [bs, nlatent, ndim]
102 |         if recmotion_embeddings is not None:
103 |             recmotion_embeddings = torch.flatten(
104 |                 recmotion_embeddings, start_dim=1
105 |             ).detach()
106 |             
107 |             # store all texts and motions
108 |             self.recmotion_embeddings.append(recmotion_embeddings)
109 |         gtmotion_embeddings = torch.flatten(gtmotion_embeddings, start_dim=1).detach()
110 | 
111 |         self.gtmotion_embeddings.append(gtmotion_embeddings)
112 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/modeltype/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/modeltype/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/modeltype/smplx_layer.py:
--------------------------------------------------------------------------------
 1 | from human_body_prior.body_model.body_model import BodyModel
 2 | from pytorch_lightning import LightningModule
 3 | import numpy as np
 4 | import torch
 5 | import time
 6 | from torch import nn
 7 | 
 8 | 
 9 | class smplx_layer(LightningModule):
10 |     def __init__(self):
11 |         super(smplx_layer, self).__init__()
12 |         self.smplx_model = BodyModel(
13 |             bm_fname="/comp_robot/lushunlin/HumanML3D-1/body_models/smplx/neutral/model.npz",
14 |             num_betas=10,
15 |             model_type="smplx",
16 |         )
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     pose = (
21 |         torch.tensor(
22 |             np.load(
23 |                 "/comp_robot/lushunlin/visualization/visualization/test_case/motionx_humanml_smplx_322.npy"
24 |             )
25 |         )
26 |         .float()
27 |         .cuda()
28 |     )
29 |     smplx = smplx_layer().cuda()
30 |     output = smplx.smplx_model(
31 |         pose_body=pose[:, 3:66],
32 |         pose_hand=pose[:, 66:156],
33 |         root_orient=pose[:, :3],
34 |         pose_jaw=pose[:, 156:159],
35 |     ).Jtr
36 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/operator/__init__.py:
--------------------------------------------------------------------------------
1 | from .adain import AdaptiveInstanceNorm1d
2 | from .blocks import ConvBlock, LinearBlock
3 | from .position_encoding_layer import PositionalEncoding
4 | 
5 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/operator/adain.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class AdaptiveInstanceNorm1d(nn.Module):
  6 |     """
  7 |     This class is a subclass of nn.Module. 
  8 |     It implements the Adaptive Instance Normalization (AdaIN) layer.
  9 |     """
 10 |     def __init__(self, num_features, eps=1e-5, momentum=0.1):
 11 |         """
 12 |         Inputs:
 13 |             num_features (int): The number of features in the input.
 14 |             eps (float): A small number added to the denominator for numerical stability. Default is 1e-5.
 15 |             momentum (float): The momentum factor. Default is 0.1.
 16 | 
 17 |         This function is the constructor of the AdaptiveInstanceNorm1d class. It initializes the class variables and registers the running mean and variance buffers.
 18 |         """
 19 |         super(AdaptiveInstanceNorm1d, self).__init__()
 20 |         self.num_features = num_features
 21 |         self.eps = eps
 22 |         self.momentum = momentum
 23 |         self.weight = None
 24 |         self.bias = None
 25 |         self.register_buffer('running_mean', torch.zeros(nuś_features))
 26 |         self.register_buffer('running_var', torch.ones(num_features))
 27 | 
 28 |     def forward(self, x, direct_weighting=False, no_std=False):
 29 |         """
 30 |         Inputs:
 31 |             x (Tensor): The input tensor.
 32 |             direct_weighting (bool): If True, apply direct weighting. Default is False.
 33 |             no_std (bool): If True, do not apply standard deviation. Default is False.
 34 | 
 35 |         This function applies the AdaIN operation to the input tensor and returns the output tensor.
 36 | 
 37 |         Returns:
 38 |             Tensor: The output tensor.
 39 |         """
 40 |         assert self.weight is not None and \
 41 |                self.bias is not None, "Please assign AdaIN weight first"
 42 |         
 43 |         # (bs, nfeats, nframe) <= (nframe, bs, nfeats)
 44 |         x = x.permute(1,2,0) 
 45 | 
 46 |         b, c = x.size(0), x.size(1)  # batch size & channels
 47 |         running_mean = self.running_mean.repeat(b)
 48 |         running_var = self.running_var.repeat(b)
 49 |         if direct_weighting:
 50 |             x_reshaped = x.contiguous().view(b * c)
 51 |             if no_std:
 52 |                 out = x_reshaped + self.bias
 53 |             else:
 54 |                 out = x_reshaped.mul(self.weight) + self.bias
 55 |             out = out.view(b, c, *x.size()[2:])
 56 |         else:
 57 |             x_reshaped = x.contiguous().view(1, b * c, *x.size()[2:])        
 58 |             out = F.batch_norm(
 59 |                 x_reshaped, running_mean, running_var, self.weight, self.bias,
 60 |                 True, self.momentum, self.eps)
 61 |             out = out.view(b, c, *x.size()[2:])
 62 | 
 63 |         # (nframe, bs, nfeats) <= (bs, nfeats, nframe)
 64 |         out = out.permute(2,0,1) 
 65 |         return out
 66 | 
 67 |     def __repr__(self):
 68 |         return self.__class__.__name__ + '(' + str(self.num_features) + ')'
 69 | 
 70 | def assign_adain_params(adain_params, model):
 71 |     """
 72 |     Inputs:
 73 |         adain_params (Tensor): The AdaIN parameters.
 74 |         model (nn.Module): The model.
 75 | 
 76 |     This function assigns the AdaIN parameters to the AdaIN layers in the model.
 77 | 
 78 |     Returns:
 79 |         None
 80 |     """
 81 |     # assign the adain_params to the AdaIN layers in model
 82 |     for m in model.modules():
 83 |         if m.__class__.__name__ == "AdaptiveInstanceNorm1d":
 84 |             mean = adain_params[: , : m.num_features]
 85 |             std = adain_params[: , m.num_features: 2 * m.num_features]
 86 |             m.bias = mean.contiguous().view(-1)
 87 |             m.weight = std.contiguous().view(-1)
 88 |             if adain_params.size(1) > 2 * m.num_features:
 89 |                 adain_params = adain_params[: , 2 * m.num_features:]
 90 | 
 91 | 
 92 | def get_num_adain_params(model):
 93 |     """
 94 |     Inputs:
 95 |         model (nn.Module): The model.
 96 | 
 97 |     This function returns the number of AdaIN parameters needed by the model.
 98 | 
 99 |     Returns:
100 |         int: The number of AdaIN parameters needed by the model.
101 |     """
102 |     # return the number of AdaIN parameters needed by the model
103 |     num_adain_params = 0
104 |     for m in model.modules():
105 |         if m.__class__.__name__ == "AdaptiveInstanceNorm1d":
106 |             num_adain_params += 2 * m.num_features
107 |     return num_adain_params
108 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/operator/position_encoding_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class PositionalEncoding(nn.Module):
 7 | 
 8 |     def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False):
 9 |         super().__init__()
10 |         self.batch_first = batch_first
11 | 
12 |         self.dropout = nn.Dropout(p=dropout)
13 | 
14 |         pe = torch.zeros(max_len, d_model)
15 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
16 |         div_term = torch.exp(torch.arange(
17 |             0, d_model, 2).float() * (-np.log(10000.0) / d_model))
18 |         pe[:, 0::2] = torch.sin(position * div_term)
19 |         pe[:, 1::2] = torch.cos(position * div_term)
20 |         pe = pe.unsqueeze(0).transpose(0, 1)
21 | 
22 |         self.register_buffer("pe", pe)
23 | 
24 |     def forward(self, x):
25 |         # not used in the final model
26 |         if self.batch_first:
27 |             x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :]
28 |         else:
29 |             x = x + self.pe[: x.shape[0], :]
30 |         return self.dropout(x)
31 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/models/operator/self_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/operator/self_attention.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/models/tools/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/models/tools/tools.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | def remove_padding(tensors, lengths):
 4 |     """
 5 |     Inputs:
 6 |         tensors (list): A list of tensors from which padding is to be removed.
 7 |         lengths (list): A list of integers representing the actual lengths of the tensors.
 8 | 
 9 |     This function removes padding from the tensors based on the actual lengths. 
10 |     It returns a list of tensors with padding removed.
11 | 
12 |     Returns:
13 |         list: A list of tensors with padding removed.
14 |     """
15 |     return [tensor[:tensor_length] for tensor, tensor_length in zip(tensors, lengths)]
16 | 
17 | class AutoParams(nn.Module):
18 |     """
19 |     This class is a subclass of nn.Module. 
20 |     It is used to automatically set the parameters of a model. 
21 |     It has two types of parameters: needed parameters and optional parameters. 
22 |     Needed parameters must be provided when an instance of the class is created, 
23 |     otherwise a ValueError is raised. 
24 |     
25 |     Optional parameters can be provided when an instance of the class is created, 
26 |     otherwise they are set to their default values.
27 |     """
28 |     def __init__(self, **kargs):
29 |         try:
30 |             for param in self.needed_params:
31 |                 if param in kargs:
32 |                     setattr(self, param, kargs[param])
33 |                 else:
34 |                     raise ValueError(f"{param} is needed.")
35 |         except :
36 |             pass
37 |             
38 |         try:
39 |             for param, default in self.optional_params.items():
40 |                 if param in kargs and kargs[param] is not None:
41 |                     setattr(self, param, kargs[param])
42 |                 else:
43 |                     setattr(self, param, default)
44 |         except :
45 |             pass
46 |         super().__init__()
47 | 
48 | 
49 | # taken from joeynmt repo
50 | def freeze_params(module: nn.Module) -> None:
51 |     """
52 |     Freeze the parameters of this module,
53 |     i.e. do not update them during training
54 | 
55 |     :param module: freeze parameters of this module
56 |     """
57 |     for _, p in module.named_parameters():
58 |         p.requires_grad = False
59 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/tools/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/tools/logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import tqdm
 3 | 
 4 | 
 5 | class LevelsFilter(logging.Filter):
 6 |     """
 7 |     This class is a filter for logging levels. It allows only the logs of specified levels to pass.
 8 |     """
 9 | 
10 |     def __init__(self, levels):
11 |         self.levels = [getattr(logging, level) for level in levels]
12 | 
13 |     def filter(self, record):
14 |         return record.levelno in self.levels
15 | 
16 | 
17 | class StreamToLogger(object):
18 |     """
19 |     Fake file-like stream object that redirects writes to a logger instance.
20 |     """
21 | 
22 |     def __init__(self, logger, level):
23 |         self.logger = logger
24 |         self.level = level
25 |         self.linebuf = ''
26 | 
27 |     def write(self, buf):
28 |         for line in buf.rstrip().splitlines():
29 |             self.logger.log(self.level, line.rstrip())
30 | 
31 |     def flush(self):
32 |         pass
33 | 
34 | 
35 | class TqdmLoggingHandler(logging.Handler):
36 |     """
37 |     This class is a logging handler that writes to a tqdm progress bar.
38 |     """
39 | 
40 |     def __init__(self, level=logging.NOTSET):
41 |         super().__init__(level)
42 | 
43 |     def emit(self, record):
44 |         try:
45 |             msg = self.format(record)
46 |             tqdm.tqdm.write(msg)
47 |             self.flush()
48 |         except Exception:
49 |             self.handleError(record)
50 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/tools/runid.py:
--------------------------------------------------------------------------------
 1 | """
 2 | runid util.
 3 | Taken from wandb.sdk.lib.runid
 4 | """
 5 | 
 6 | import shortuuid  # type: ignore
 7 | 
 8 | 
 9 | def generate_id() -> str:
10 |     # ~3t run ids (36**8)
11 |     run_gen = shortuuid.ShortUUID(alphabet=list("0123456789abcdefghijklmnopqrstuvwxyz"))
12 |     return run_gen.random(8)


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Transform
2 | from .smpl import SMPLTransform
3 | # from .xyz import XYZTransform
4 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, fields
 2 | 
 3 | 
 4 | class Transform:
 5 | 
 6 |     def collate(self, lst_datastruct):
 7 |         from tma.datasets.utils import collate_tensor_with_padding
 8 |         example = lst_datastruct[0]
 9 | 
10 |         def collate_or_none(key):
11 |             if example[key] is None:
12 |                 return None
13 |             key_lst = [x[key] for x in lst_datastruct]
14 |             return collate_tensor_with_padding(key_lst)
15 | 
16 |         kwargs = {key: collate_or_none(key) for key in example.datakeys}
17 | 
18 |         return self.Datastruct(**kwargs)
19 | 
20 | 
21 | # Inspired from SMPLX library
22 | # need to define "datakeys" and transforms
23 | @dataclass
24 | class Datastruct:
25 | 
26 |     def __getitem__(self, key):
27 |         return getattr(self, key)
28 | 
29 |     def __setitem__(self, key, value):
30 |         self.__dict__[key] = value
31 | 
32 |     def get(self, key, default=None):
33 |         return getattr(self, key, default)
34 | 
35 |     def __iter__(self):
36 |         return self.keys()
37 | 
38 |     def keys(self):
39 |         keys = [t.name for t in fields(self)]
40 |         return iter(keys)
41 | 
42 |     def values(self):
43 |         values = [getattr(self, t.name) for t in fields(self)]
44 |         return iter(values)
45 | 
46 |     def items(self):
47 |         data = [(t.name, getattr(self, t.name)) for t in fields(self)]
48 |         return iter(data)
49 | 
50 |     def to(self, *args, **kwargs):
51 |         for key in self.datakeys:
52 |             if self[key] is not None:
53 |                 self[key] = self[key].to(*args, **kwargs)
54 |         return self
55 | 
56 |     @property
57 |     def device(self):
58 |         return self[self.datakeys[0]].device
59 | 
60 |     def detach(self):
61 | 
62 |         def detach_or_none(tensor):
63 |             if tensor is not None:
64 |                 return tensor.detach()
65 |             return None
66 | 
67 |         kwargs = {key: detach_or_none(self[key]) for key in self.datakeys}
68 |         return self.transforms.Datastruct(**kwargs)
69 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/feats2smpl.py:
--------------------------------------------------------------------------------
 1 | from os.path import join as pjoin
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | import tma.data.humanml.utils.paramUtil as paramUtil
 7 | from tma.data.humanml.data.dataset import Text2MotionDatasetV2
 8 | from tma.data.humanml.scripts.motion_process import recover_from_ric
 9 | from tma.data.humanml.utils.plot_script import plot_3d_motion
10 | 
11 | # Define the skeleton structure using the kinematic chain from paramUtil
12 | skeleton = paramUtil.t2m_kinematic_chain
13 | 
14 | 
15 | def main():
16 |     # Define paths and parameters
17 |     data_root = '../datasets/humanml3d'
18 |     feastures_path = 'in.npy'
19 |     animation_save_path = 'in.mp4'
20 | 
21 |     fps = 20
22 |     # Load the mean and standard deviation of the dataset
23 |     mean = np.load(pjoin(data_root, 'Mean.npy'))
24 |     std = np.load(pjoin(data_root, 'Std.npy'))
25 | 
26 |     # Load the motion features and normalize them using the mean and standard deviation
27 |     motion = np.load(feastures_path)
28 |     motion = motion * std + mean
29 |     motion_rec = recover_from_ric(torch.tensor(motion), 22).cpu().numpy()
30 | 
31 |     # Scale the recovered motion
32 |     motion_rec = motion_rec * 1.3
33 |     # Plot and save the 3D motion
34 |     plot_3d_motion(animation_save_path, motion_rec, title='input', fps=fps)
35 | 
36 | 
37 | # Run the main function if the script is run as the main program
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/identity.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from torch import Tensor
 3 | 
 4 | from .base import Datastruct, dataclass, Transform
 5 | 
 6 | 
 7 | class IdentityTransform(Transform):
 8 |     def __init__(self, **kwargs):
 9 |         return
10 | 
11 |     def Datastruct(self, **kwargs):
12 |         return IdentityDatastruct(**kwargs)
13 | 
14 |     def __repr__(self):
15 |         return "IdentityTransform()"
16 | 
17 | 
18 | @dataclass
19 | class IdentityDatastruct(Datastruct):
20 |     transforms: IdentityTransform
21 | 
22 |     features: Optional[Tensor] = None
23 | 
24 |     def __post_init__(self):
25 |         self.datakeys = ["features"]
26 | 
27 |     def __len__(self):
28 |         return len(self.rfeats)
29 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/joints2jfeats/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Joints2Jfeats
2 | from .rifke import Rifke
3 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/joints2jfeats/base.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | from torch import Tensor, nn
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | class Joints2Jfeats(nn.Module):
 9 |     def __init__(self, path: Optional[str] = None,
10 |                  normalization: bool = False,
11 |                  eps: float = 1e-12,
12 |                  **kwargs) -> None:
13 |         if normalization and path is None:
14 |             raise TypeError(
15 |                 "You should provide a path if normalization is on.")
16 | 
17 |         super().__init__()
18 |         self.normalization = normalization
19 |         self.eps = eps
20 | 
21 |         if normalization:
22 |             mean_path = Path(path) / "jfeats_mean.pt"
23 |             std_path = Path(path) / "jfeats_std.pt"
24 |             self.register_buffer('mean', torch.load(mean_path))
25 |             self.register_buffer('std', torch.load(std_path))
26 | 
27 |     def normalize(self, features: Tensor) -> Tensor:
28 |         if self.normalization:
29 |             features = (features - self.mean)/(self.std + self.eps)
30 |         return features
31 | 
32 |     def unnormalize(self, features: Tensor) -> Tensor:
33 |         if self.normalization:
34 |             features = features * self.std + self.mean
35 |         return features
36 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/joints2jfeats/tools.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from tma.utils.joints import mmm_joints, humanml3d_joints, motionx_joints
 5 | 
 6 | # Get the indexes of particular body part
 7 | 
 8 | 
 9 | # .T is deprecated now for reversing a tensor
10 | def T(x):
11 |     return x.permute(*torch.arange(x.ndim - 1, -1, -1))
12 | 
13 | 
14 | def get_forward_direction(poses, jointstype="mmm"):
15 |     if jointstype == "mmm" or jointstype == "mmmns":
16 |         joints = mmm_joints
17 |     elif jointstype == "humanml3d":
18 |         joints = humanml3d_joints
19 |     elif jointstype == "motionx":
20 |         joints = motionx_joints
21 |     else:
22 |         raise TypeError('Only supports mmm, mmmns and humanl3d jointstype')
23 |     # Shoulders
24 |     LS, RS = joints.index("LS"), joints.index("RS")
25 |     # Hips
26 |     LH, RH = joints.index("LH"), joints.index("RH")
27 | 
28 |     across = poses[..., RH, :] - poses[..., LH, :] + poses[..., RS, :] - poses[
29 |         ..., LS, :]
30 |     forward = torch.stack((-across[..., 2], across[..., 0]), axis=-1)
31 |     forward = torch.nn.functional.normalize(forward, dim=-1)
32 |     return forward
33 | 
34 | 
35 | def get_floor(poses, jointstype="mmm"):
36 |     if jointstype == "mmm" or jointstype == "mmmns":
37 |         joints = mmm_joints
38 |     elif jointstype == "humanml3d":
39 |         joints = humanml3d_joints
40 |     elif jointstype == "motionx":
41 |         joints = motionx_joints
42 |     else:
43 |         raise TypeError('Only supports mmm, mmmns and humanl3d jointstype')
44 |     ndim = len(poses.shape)
45 |     # Feet
46 |     LM, RM = joints.index("LMrot"), joints.index("RMrot")
47 |     LF, RF = joints.index("LF"), joints.index("RF")
48 |     foot_heights = poses[..., (LM, LF, RM, RF), 1].min(-1).values
49 |     floor_height = softmin(foot_heights, softness=0.5, dim=-1)
50 |     return T(floor_height[(ndim - 2) * [None]])
51 | 
52 | 
53 | def softmax(x, softness=1.0, dim=None):
54 |     maxi, mini = x.max(dim=dim).values, x.min(dim=dim).values
55 |     return maxi + torch.log(softness + torch.exp(mini - maxi))
56 | 
57 | 
58 | def softmin(x, softness=1.0, dim=0):
59 |     return -softmax(-x, softness=softness, dim=dim)
60 | 
61 | 
62 | def gaussian_filter1d(_inputs, sigma, truncate=4.0):
63 |     # Code adapted/mixed from scipy library into pytorch
64 |     # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L211
65 |     # and gaussian kernel
66 |     # https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/ndimage/filters.py#L179
67 |     # Correspond to mode="nearest" and order = 0
68 |     # But works batched
69 |     if len(_inputs.shape) == 2:
70 |         inputs = _inputs[None]
71 |     else:
72 |         inputs = _inputs
73 | 
74 |     sd = float(sigma)
75 |     radius = int(truncate * sd + 0.5)
76 |     sigma2 = sigma * sigma
77 |     x = torch.arange(-radius,
78 |                      radius + 1,
79 |                      device=inputs.device,
80 |                      dtype=inputs.dtype)
81 |     phi_x = torch.exp(-0.5 / sigma2 * x**2)
82 |     phi_x = phi_x / phi_x.sum()
83 | 
84 |     # Conv1d weights
85 |     groups = inputs.shape[-1]
86 |     weights = torch.tile(phi_x, (groups, 1, 1))
87 |     inputs = inputs.transpose(-1, -2)
88 |     outputs = F.conv1d(inputs, weights, padding="same",
89 |                        groups=groups).transpose(-1, -2)
90 | 
91 |     return outputs.reshape(_inputs.shape)
92 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/joints2rots/config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tma.utils.joints import mmm_joints, smplh2mmm_indexes
  3 | 
  4 | # Map joints Name to SMPL joints idx
  5 | JOINT_MAP = {
  6 |     'MidHip': 0,
  7 |     'LHip': 1,
  8 |     'LKnee': 4,
  9 |     'LAnkle': 7,
 10 |     'LFoot': 10,
 11 |     'RHip': 2,
 12 |     'RKnee': 5,
 13 |     'RAnkle': 8,
 14 |     'RFoot': 11,
 15 |     'LShoulder': 16,
 16 |     'LElbow': 18,
 17 |     'LWrist': 20,
 18 |     'LHand': 22,
 19 |     'RShoulder': 17,
 20 |     'RElbow': 19,
 21 |     'RWrist': 21,
 22 |     'RHand': 23,
 23 |     'spine1': 3,
 24 |     'spine2': 6,
 25 |     'spine3': 9,
 26 |     'Neck': 12,
 27 |     'Head': 15,
 28 |     'LCollar': 13,
 29 |     'Rcollar': 14,
 30 |     'Nose': 24,
 31 |     'REye': 26,
 32 |     'LEye': 26,
 33 |     'REar': 27,
 34 |     'LEar': 28,
 35 |     'LHeel': 31,
 36 |     'RHeel': 34,
 37 |     'OP RShoulder': 17,
 38 |     'OP LShoulder': 16,
 39 |     'OP RHip': 2,
 40 |     'OP LHip': 1,
 41 |     'OP Neck': 12,
 42 | }
 43 | 
 44 | mmm2smpl_correspondence = {
 45 |     "root": "MidHip",
 46 |     "BP": "spine1",
 47 |     "BT": "spine3",
 48 |     "BLN": "Neck",
 49 |     "BUN": "Head",
 50 |     "LS": "LShoulder",
 51 |     "LE": "LElbow",
 52 |     "LW": "LWrist",
 53 |     "RS": "RShoulder",
 54 |     "RE": "RElbow",
 55 |     "RW": "RWrist",
 56 |     "LH": "LHip",
 57 |     "LK": "LKnee",
 58 |     "LA": "LAnkle",
 59 |     "LMrot": "LHeel",
 60 |     "LF": "LFoot",
 61 |     "RH": "RHip",
 62 |     "RK": "RKnee",
 63 |     "RA": "RAnkle",
 64 |     "RMrot": "RHeel",
 65 |     "RF": "RFoot"
 66 | }
 67 | 
 68 | full_smpl_idx = range(24)
 69 | key_smpl_idx = [0, 1, 4, 7, 2, 5, 8, 17, 19, 21, 16, 18, 20]
 70 | 
 71 | AMASS_JOINT_MAP = {
 72 |     'MidHip': 0,
 73 |     'LHip': 1,
 74 |     'LKnee': 4,
 75 |     'LAnkle': 7,
 76 |     'LFoot': 10,
 77 |     'RHip': 2,
 78 |     'RKnee': 5,
 79 |     'RAnkle': 8,
 80 |     'RFoot': 11,
 81 |     'LShoulder': 16,
 82 |     'LElbow': 18,
 83 |     'LWrist': 20,
 84 |     'RShoulder': 17,
 85 |     'RElbow': 19,
 86 |     'RWrist': 21,
 87 |     'spine1': 3,
 88 |     'spine2': 6,
 89 |     'spine3': 9,
 90 |     'Neck': 12,
 91 |     'Head': 15,
 92 |     'LCollar': 13,
 93 |     'Rcollar': 14,
 94 | }
 95 | amass_idx = range(22)
 96 | amass_smpl_idx = range(22)
 97 | 
 98 | # cal mmm in smpl index
 99 | smpl2mmm_correspondence = {
100 |     val: key
101 |     for key, val in mmm2smpl_correspondence.items()
102 | }
103 | smpl2mmm_indexes = [JOINT_MAP[mmm2smpl_correspondence[x]] for x in mmm_joints]
104 | 
105 | # cal mmm joints map
106 | MMM_JOINT_MAP = {
107 |     val: JOINT_MAP[val]
108 |     for key, val in mmm2smpl_correspondence.items()
109 | }
110 | 
111 | # mmm_idx = range(21)
112 | # mmm_smpl_dix = smpl2mmm_indexes
113 | # mmm_smpl_dix = smplh2mmm_indexes
114 | # todo - configable
115 | SMPL_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/"
116 | GMM_MODEL_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/"
117 | SMPL_MEAN_FILE = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/neutral_smpl_mean_params.h5"
118 | # for collsion
119 | Part_Seg_DIR = "/apdcephfs/share_1227775/shingxchen/AIMotion/TMOSTData/deps/smpl_models/smplx_parts_segm.pkl"
120 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/rotation2xyz.py:
--------------------------------------------------------------------------------
  1 | # This code is based on https://github.com/Mathux/ACTOR.git
  2 | import torch
  3 | import tma.utils.rotation_conversions as geometry
  4 | 
  5 | from .smpl import SMPL, JOINTSTYPE_ROOT
  6 | JOINTSTYPES = ["a2m", "a2mpl", "smpl", "vibe", "vertices"]
  7 | 
  8 | 
  9 | class Rotation2xyz(torch.nn.Module):
 10 | 
 11 |     def __init__(self, smpl_path):
 12 |         super().__init__()
 13 |         self.smpl_model = SMPL(smpl_path).eval()
 14 | 
 15 |     def __call__(self,
 16 |                  x,
 17 |                  mask,
 18 |                  pose_rep,
 19 |                  translation,
 20 |                  glob,
 21 |                  jointstype,
 22 |                  vertstrans,
 23 |                  betas=None,
 24 |                  beta=0,
 25 |                  glob_rot=None,
 26 |                  get_rotations_back=False,
 27 |                  **kwargs):
 28 |         if pose_rep == "xyz":
 29 |             return x
 30 | 
 31 |         if mask is None:
 32 |             mask = torch.ones((x.shape[0], x.shape[-1]),
 33 |                               dtype=bool,
 34 |                               device=x.device)
 35 | 
 36 |         if not glob and glob_rot is None:
 37 |             raise TypeError(
 38 |                 "You must specify global rotation if glob is False")
 39 | 
 40 |         if jointstype not in JOINTSTYPES:
 41 |             raise NotImplementedError("This jointstype is not implemented.")
 42 | 
 43 |         if translation:
 44 |             x_translations = x[:, -1, :3]
 45 |             x_rotations = x[:, :-1]
 46 |         else:
 47 |             x_rotations = x
 48 | 
 49 |         x_rotations = x_rotations.permute(0, 3, 1, 2)
 50 |         nsamples, time, njoints, feats = x_rotations.shape
 51 | 
 52 |         # Compute rotations (convert only masked sequences output)
 53 |         if pose_rep == "rotvec":
 54 |             rotations = geometry.axis_angle_to_matrix(x_rotations[mask])
 55 |         elif pose_rep == "rotmat":
 56 |             rotations = x_rotations[mask].view(-1, njoints, 3, 3)
 57 |         elif pose_rep == "rotquat":
 58 |             rotations = geometry.quaternion_to_matrix(x_rotations[mask])
 59 |         elif pose_rep == "rot6d":
 60 |             rotations = geometry.rotation_6d_to_matrix(x_rotations[mask])
 61 |         else:
 62 |             raise NotImplementedError("No geometry for this one.")
 63 | 
 64 |         if not glob:
 65 |             global_orient = torch.tensor(glob_rot, device=x.device)
 66 |             global_orient = geometry.axis_angle_to_matrix(global_orient).view(
 67 |                 1, 1, 3, 3)
 68 |             global_orient = global_orient.repeat(len(rotations), 1, 1, 1)
 69 |         else:
 70 |             global_orient = rotations[:, 0]
 71 |             rotations = rotations[:, 1:]
 72 | 
 73 |         if betas is None:
 74 |             betas = torch.zeros(
 75 |                 [rotations.shape[0], self.smpl_model.num_betas],
 76 |                 dtype=rotations.dtype,
 77 |                 device=rotations.device)
 78 |             betas[:, 1] = beta
 79 |             # import ipdb; ipdb.set_trace()
 80 |         out = self.smpl_model(body_pose=rotations,
 81 |                               global_orient=global_orient,
 82 |                               betas=betas)
 83 | 
 84 |         # get the desirable joints
 85 |         joints = out[jointstype]
 86 | 
 87 |         x_xyz = torch.empty(nsamples,
 88 |                             time,
 89 |                             joints.shape[1],
 90 |                             3,
 91 |                             device=x.device,
 92 |                             dtype=x.dtype)
 93 |         x_xyz[~mask] = 0
 94 |         x_xyz[mask] = joints
 95 | 
 96 |         x_xyz = x_xyz.permute(0, 2, 3, 1).contiguous()
 97 | 
 98 |         # the first translation root at the origin on the prediction
 99 |         if jointstype != "vertices":
100 |             rootindex = JOINTSTYPE_ROOT[jointstype]
101 |             x_xyz = x_xyz - x_xyz[:, [rootindex], :, :]
102 | 
103 |         if translation and vertstrans:
104 |             # the first translation root at the origin
105 |             x_translations = x_translations - x_translations[:, :, [0]]
106 | 
107 |             # add the translation to all the joints
108 |             x_xyz = x_xyz + x_translations[:, None, :, :]
109 | 
110 |         if get_rotations_back:
111 |             return x_xyz, rotations, global_orient
112 |         else:
113 |             return x_xyz
114 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/rots2joints/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Rots2Joints
2 | from .smplh import SMPLH
3 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/rots2joints/base.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | from torch import Tensor, nn
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | class Rots2Joints(nn.Module):
 9 |     def __init__(self, path: Optional[str] = None,
10 |                  normalization: bool = False,
11 |                  eps: float = 1e-12,
12 |                  **kwargs) -> None:
13 |         if normalization and path is None:
14 |             raise TypeError(
15 |                 "You should provide a path if normalization is on.")
16 | 
17 |         super().__init__()
18 |         self.normalization = normalization
19 |         self.eps = eps
20 | 
21 |         if normalization:
22 |             mean_path = Path(path) / "mean.pt"
23 |             std_path = Path(path) / "std.pt"
24 |             self.register_buffer('mean', torch.load(mean_path))
25 |             self.register_buffer('std', torch.load(std_path))
26 | 
27 |     def normalize(self, features: Tensor) -> Tensor:
28 |         if self.normalization:
29 |             features = (features - self.mean)/(self.std + self.eps)
30 |         return features
31 | 
32 |     def unnormalize(self, features: Tensor) -> Tensor:
33 |         if self.normalization:
34 |             features = features * self.std + self.mean
35 |         return features
36 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/rots2rfeats/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Rots2Rfeats
2 | from .smplvelp import SMPLVelP
3 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/rots2rfeats/base.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | from torch import Tensor, nn
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | class Rots2Rfeats(nn.Module):
 9 |     def __init__(self, path: Optional[str] = None,
10 |                  normalization: bool = False,
11 |                  eps: float = 1e-12,
12 |                  **kwargs) -> None:
13 |         if normalization and path is None:
14 |             raise TypeError(
15 |                 "You should provide a path if normalization is on.")
16 | 
17 |         super().__init__()
18 |         self.normalization = normalization
19 |         self.eps = eps
20 | 
21 |         if normalization:
22 |             mean_path = Path(path) / "rfeats_mean.pt"
23 |             std_path = Path(path) / "rfeats_std.pt"
24 |             self.register_buffer('mean', torch.load(mean_path))
25 |             self.register_buffer('std', torch.load(std_path))
26 | 
27 |     def normalize(self, features: Tensor) -> Tensor:
28 |         if self.normalization:
29 |             features = (features - self.mean)/(self.std + self.eps)
30 |         return features
31 | 
32 |     def unnormalize(self, features: Tensor) -> Tensor:
33 |         if self.normalization:
34 |             features = features * self.std + self.mean
35 |         return features
36 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/rots2rfeats/smplvelp.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | from einops import rearrange
  6 | 
  7 | from tma.utils.temos_utils import matrix_to, nfeats_of, to_matrix
  8 | import tma.utils.geometry as geometry
  9 | 
 10 | from .base import Rots2Rfeats
 11 | 
 12 | 
 13 | class SMPLVelP(Rots2Rfeats):
 14 | 
 15 |     def __init__(self,
 16 |                  path: Optional[str] = None,
 17 |                  normalization: bool = False,
 18 |                  pose_rep: str = "rot6d",
 19 |                  canonicalize: bool = False,
 20 |                  offset: bool = True,
 21 |                  **kwargs) -> None:
 22 |         super().__init__(path=path, normalization=normalization)
 23 |         self.canonicalize = canonicalize
 24 |         self.pose_rep = pose_rep
 25 |         self.nfeats = nfeats_of(pose_rep)
 26 |         self.offset = offset
 27 | 
 28 |     def forward(self, data) -> Tensor:
 29 |         matrix_poses, trans = data.rots, data.trans
 30 |         # matrix_poses: [nframes, 22, 3, 3]
 31 | 
 32 |         # extract the root gravity axis
 33 |         # for smpl it is the last coordinate
 34 |         root_y = trans[..., 2]
 35 |         trajectory = trans[..., [0, 1]]
 36 | 
 37 |         # Comoute the difference of trajectory (for X and Y axis)
 38 |         vel_trajectory = torch.diff(trajectory, dim=-2)
 39 |         # 0 for the first one => keep the dimentionality
 40 |         vel_trajectory = torch.cat(
 41 |             (0 * vel_trajectory[..., [0], :], vel_trajectory), dim=-2)
 42 | 
 43 |         # first normalize the data
 44 |         if self.canonicalize:
 45 |             global_orient = matrix_poses[..., 0, :, :]
 46 |             # remove the rotation
 47 |             rot2d = geometry.matrix_to_axis_angle(global_orient[..., 0, :, :])
 48 |             # Remove the fist rotation along the vertical axis
 49 |             # construct this by extract only the vertical component of the rotation
 50 |             rot2d[..., :2] = 0
 51 | 
 52 |             if self.offset:
 53 |                 # add a bit more rotation
 54 |                 rot2d[..., 2] += torch.pi / 2
 55 | 
 56 |             rot2d = geometry.axis_angle_to_matrix(rot2d)
 57 | 
 58 |             # turn with the same amount all the rotations
 59 |             global_orient = torch.einsum("...kj,...kl->...jl", rot2d,
 60 |                                          global_orient)
 61 | 
 62 |             matrix_poses = torch.cat(
 63 |                 (global_orient[..., None, :, :], matrix_poses[..., 1:, :, :]),
 64 |                 dim=-3)
 65 | 
 66 |             # Turn the trajectory as well
 67 |             vel_trajectory = torch.einsum("...kj,...lk->...lj",
 68 |                                           rot2d[..., :2, :2], vel_trajectory)
 69 | 
 70 |         poses = matrix_to(self.pose_rep, matrix_poses)
 71 |         features = torch.cat(
 72 |             (root_y[..., None], vel_trajectory,
 73 |              rearrange(poses, "... joints rot -> ... (joints rot)")),
 74 |             dim=-1)
 75 |         features = self.normalize(features)
 76 |         return features
 77 | 
 78 |     def extract(self, features):
 79 |         root_y = features[..., 0]
 80 |         vel_trajectory = features[..., 1:3]
 81 |         poses_features = features[..., 3:]
 82 |         poses = rearrange(poses_features,
 83 |                           "... (joints rot) -> ... joints rot",
 84 |                           rot=self.nfeats)
 85 |         return root_y, vel_trajectory, poses
 86 | 
 87 |     def inverse(self, features):
 88 |         features = self.unnormalize(features)
 89 |         root_y, vel_trajectory, poses = self.extract(features)
 90 | 
 91 |         # integrate the trajectory
 92 |         trajectory = torch.cumsum(vel_trajectory, dim=-2)
 93 |         # First frame should be 0, but if infered it is better to ensure it
 94 |         trajectory = trajectory - trajectory[..., [0], :]
 95 | 
 96 |         # Get back the translation
 97 |         trans = torch.cat([trajectory, root_y[..., None]], dim=-1)
 98 |         matrix_poses = to_matrix(self.pose_rep, poses)
 99 | 
100 |         from temos.transforms.smpl import RotTransDatastruct
101 |         return RotTransDatastruct(rots=matrix_poses, trans=trans)
102 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/transforms/xyz.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from torch import Tensor
 3 | 
 4 | from .base import Datastruct, dataclass, Transform
 5 | from tma.datasets.utils import collate_tensor_with_padding
 6 | 
 7 | from .joints2jfeats import Joints2Jfeats
 8 | 
 9 | 
10 | class XYZTransform(Transform):
11 | 
12 |     def __init__(self, joints2jfeats: Joints2Jfeats, **kwargs):
13 |         self.joints2jfeats = joints2jfeats
14 | 
15 |     def Datastruct(self, **kwargs):
16 |         return XYZDatastruct(_joints2jfeats=self.joints2jfeats,
17 |                              transforms=self,
18 |                              **kwargs)
19 | 
20 |     def __repr__(self):
21 |         return "XYZTransform()"
22 | 
23 | 
24 | @dataclass
25 | class XYZDatastruct(Datastruct):
26 |     transforms: XYZTransform
27 |     _joints2jfeats: Joints2Jfeats
28 | 
29 |     features: Optional[Tensor] = None
30 |     joints_: Optional[Tensor] = None
31 |     jfeats_: Optional[Tensor] = None
32 | 
33 |     def __post_init__(self):
34 |         self.datakeys = ["features", "joints_", "jfeats_"]
35 |         # starting point
36 |         if self.features is not None and self.jfeats_ is None:
37 |             self.jfeats_ = self.features
38 | 
39 |     @property
40 |     def joints(self):
41 |         # Cached value
42 |         if self.joints_ is not None:
43 |             return self.joints_
44 | 
45 |         # self.jfeats_ should be defined
46 |         assert self.jfeats_ is not None
47 | 
48 |         self._joints2jfeats.to(self.jfeats.device)
49 |         self.joints_ = self._joints2jfeats.inverse(self.jfeats)
50 |         return self.joints_
51 | 
52 |     @property
53 |     def jfeats(self):
54 |         # Cached value
55 |         if self.jfeats_ is not None:
56 |             return self.jfeats_
57 | 
58 |         # self.joints_ should be defined
59 |         assert self.joints_ is not None
60 | 
61 |         self._joints2jfeats.to(self.joints.device)
62 |         self.jfeats_ = self._joints2jfeats(self.joints)
63 |         return self.jfeats_
64 | 
65 |     def __len__(self):
66 |         return len(self.jfeats)
67 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/OpenTMA/tma/utils/__init__.py


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/demo_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | 
  5 | # load example data
  6 | def load_example_input(txt_path):
  7 |     """
  8 |     Parameters:
  9 |     txt_path (str): The path to the text file.
 10 | 
 11 |     Returns:
 12 |     texts (list): The list of text strings.
 13 |     lens (list): The list of lengths of the text strings.
 14 |     """
 15 | 
 16 |     file = open(txt_path, "r")
 17 |     Lines = file.readlines()
 18 |     count = 0
 19 |     texts, lens = [], []
 20 | 
 21 |     # Strips the newline character
 22 |     for line in Lines:
 23 |         count += 1
 24 | 
 25 |         # Strip the newline character from the line and split it into length and text
 26 |         s = line.strip()
 27 |         s_l = s.split(" ")[0]
 28 |         s_t = s[(len(s_l) + 1):]
 29 | 
 30 |         # Append the length and text to the respective lists
 31 |         lens.append(int(s_l))
 32 |         texts.append(s_t)
 33 |         print("Length-{}: {}".format(s_l, s_t))
 34 |     return texts, lens
 35 | 
 36 | 
 37 | # render batch
 38 | def render_batch(npy_dir, execute_python="./scripts/visualize_motion.sh", mode="sequence"):
 39 |     """
 40 |     Parameters:
 41 |     npy_dir (str): The directory containing the npy files.
 42 |     execute_python (str): The path to the Python script to execute. Default is "./scripts/visualize_motion.sh".
 43 |     mode (str): The mode for rendering. Default is "sequence".
 44 |     """
 45 |     # Execute the Python script with the directory and mode as arguments
 46 |     os.system(f"{execute_python} {npy_dir} {mode}")
 47 | 
 48 | 
 49 | # render
 50 | def render(execute_python, npy_path, jointtype, cfg_path):
 51 |     """
 52 |     Parameters:
 53 |     execute_python (str): The path to the Python script to execute.
 54 |     npy_path (str): The path to the npy file.
 55 |     jointtype (str): The type of joints for the skeleton.
 56 |     cfg_path (str): The path to the configuration file.
 57 | 
 58 |     Returns:
 59 |     fig_path (Path): The path to the rendered figure.
 60 |     """
 61 | 
 62 |     export_scripts = "render.py"
 63 | 
 64 |     os.system(
 65 |         f"{execute_python} --background --python {export_scripts} -- --cfg={cfg_path} --npy={npy_path} --joint_type={jointtype}"
 66 |     )
 67 | 
 68 |     # Define the path to the rendered figure and return it
 69 |     fig_path = Path(str(npy_path).replace(".npy", ".png"))
 70 |     return fig_path
 71 | 
 72 | 
 73 | # origin render
 74 | def export_fbx_hand(pkl_path):
 75 |     """
 76 |     Parameters:
 77 |     pkl_path (str): The path to the .pkl file.
 78 | 
 79 |     Returns:
 80 |     None
 81 |     """
 82 |     _input = pkl_path
 83 |     output = pkl_path.replace(".pkl", ".fbx")
 84 | 
 85 |     execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender"
 86 |     export_scripts = "./scripts/fbx_output_smplx.py"
 87 |     os.system(
 88 |         f"{execute_python} -noaudio --background --python {export_scripts}\
 89 |                 --input {_input} \
 90 |                 --output {output}"
 91 |     )
 92 | 
 93 | 
 94 | # export fbx without hand params from pkl files
 95 | def export_fbx(pkl_path):
 96 |     """
 97 |     Parameters:
 98 |     pkl_path (str): The path to the .pkl file.
 99 | 
100 |     Returns:
101 |     None
102 |     """
103 |     _input = pkl_path
104 |     output = pkl_path.replace(".pkl", ".fbx")
105 | 
106 |     execute_python = "/apdcephfs/share_1227775/shingxchen/libs/blender_bpy/blender-2.93.2-linux-x64/blender"
107 |     export_scripts = "./scripts/fbx_output.py"
108 |     os.system(
109 |         f"{execute_python} -noaudio --background --python {export_scripts}\
110 |                 --input {_input} \
111 |                 --output {output}"
112 |     )
113 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/easyconvert.py:
--------------------------------------------------------------------------------
  1 | import tma.utils.geometry as geometry
  2 | 
  3 | 
  4 | # This function returns the number of features for a given rotation type.
  5 | def nfeats_of(rottype):
  6 |     """
  7 |     Parameters:
  8 |     rottype (str): The type of rotation.
  9 | 
 10 |     Returns:
 11 |     int: The number of features for the rotation type.
 12 |     """
 13 |     if rottype in ["rotvec", "axisangle"]:
 14 |         return 3
 15 |     elif rottype in ["rotquat", "quaternion"]:
 16 |         return 4
 17 |     elif rottype in ["rot6d", "6drot", "rotation6d"]:
 18 |         return 6
 19 |     elif rottype in ["rotmat"]:
 20 |         return 9
 21 |     else:
 22 |         return TypeError("This rotation type doesn't have features.")
 23 | 
 24 | # This function converts axis-angle rotations to another rotation type.
 25 | 
 26 | 
 27 | def axis_angle_to(newtype, rotations):
 28 |     """
 29 |     Parameters:
 30 |     newtype (str): The new type of rotation.
 31 |     rotations (np.array): The axis-angle rotations.
 32 | 
 33 |     Returns:
 34 |     np.array: The rotations converted to the new type.
 35 |     """
 36 |     if newtype in ["matrix"]:
 37 |         rotations = geometry.axis_angle_to_matrix(rotations)
 38 |         return rotations
 39 |     elif newtype in ["rotmat"]:
 40 |         rotations = geometry.axis_angle_to_matrix(rotations)
 41 |         rotations = matrix_to("rotmat", rotations)
 42 |         return rotations
 43 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
 44 |         rotations = geometry.axis_angle_to_matrix(rotations)
 45 |         rotations = matrix_to("rot6d", rotations)
 46 |         return rotations
 47 |     elif newtype in ["rotquat", "quaternion"]:
 48 |         rotations = geometry.axis_angle_to_quaternion(rotations)
 49 |         return rotations
 50 |     elif newtype in ["rotvec", "axisangle"]:
 51 |         return rotations
 52 |     else:
 53 |         raise NotImplementedError
 54 | 
 55 | # This function converts matrix rotations to another rotation type.
 56 | 
 57 | 
 58 | def matrix_to(newtype, rotations):
 59 |     """
 60 |     Parameters:
 61 |     newtype (str): The new type of rotation.
 62 |     rotations (np.array): The matrix rotations.
 63 | 
 64 |     Returns:
 65 |     np.array: The rotations converted to the new type.
 66 |     """
 67 |     if newtype in ["matrix"]:
 68 |         return rotations
 69 |     if newtype in ["rotmat"]:
 70 |         rotations = rotations.reshape((*rotations.shape[:-2], 9))
 71 |         return rotations
 72 |     elif newtype in ["rot6d", "6drot", "rotation6d"]:
 73 |         rotations = geometry.matrix_to_rotation_6d(rotations)
 74 |         return rotations
 75 |     elif newtype in ["rotquat", "quaternion"]:
 76 |         rotations = geometry.matrix_to_quaternion(rotations)
 77 |         return rotations
 78 |     elif newtype in ["rotvec", "axisangle"]:
 79 |         rotations = geometry.matrix_to_axis_angle(rotations)
 80 |         return rotations
 81 |     else:
 82 |         raise NotImplementedError
 83 | 
 84 | # This function converts rotations of a given type to a matrix.
 85 | 
 86 | 
 87 | def to_matrix(oldtype, rotations):
 88 |     """
 89 |     Parameters:
 90 |     oldtype (str): The old type of rotation.
 91 |     rotations (np.array): The rotations.
 92 | 
 93 |     Returns:
 94 |     np.array: The rotations converted to a matrix.
 95 |     """
 96 |     if oldtype in ["matrix"]:
 97 |         return rotations
 98 |     if oldtype in ["rotmat"]:
 99 |         rotations = rotations.reshape((*rotations.shape[:-2], 3, 3))
100 |         return rotations
101 |     elif oldtype in ["rot6d", "6drot", "rotation6d"]:
102 |         rotations = geometry.rotation_6d_to_matrix(rotations)
103 |         return rotations
104 |     elif oldtype in ["rotquat", "quaternion"]:
105 |         rotations = geometry.quaternion_to_matrix(rotations)
106 |         return rotations
107 |     elif oldtype in ["rotvec", "axisangle"]:
108 |         rotations = geometry.axis_angle_to_matrix(rotations)
109 |         return rotations
110 |     else:
111 |         raise NotImplementedError
112 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/fixseed.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import random
 4 | 
 5 | 
 6 | def fixseed(seed):
 7 |     random.seed(seed)
 8 |     np.random.seed(seed)
 9 |     torch.manual_seed(seed)
10 | 
11 | 
12 | SEED = 10
13 | EVALSEED = 0
14 | # Provoc warning: not fully functionnal yet
15 | # torch.set_deterministic(True)
16 | torch.backends.cudnn.benchmark = False
17 | 
18 | fixseed(SEED)
19 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/logger.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import os
  3 | import time
  4 | import logging
  5 | from omegaconf import OmegaConf
  6 | from pytorch_lightning.utilities.rank_zero import rank_zero_only
  7 | 
  8 | 
  9 | def create_logger(cfg, phase='train'):
 10 |     """
 11 |     Creates a logger for logging the training or testing process.
 12 | 
 13 |     Args:
 14 |         cfg (OmegaConf): The configuration object containing all the necessary parameters.
 15 |         phase (str, optional): The phase of the process. Defaults to 'train'.
 16 | 
 17 |     Returns:
 18 |         logger (logging.Logger): The logger object for logging the process.
 19 |     """
 20 |     # root dir set by cfg
 21 |     root_output_dir = Path(cfg.FOLDER)
 22 |     # set up logger
 23 |     if not root_output_dir.exists():
 24 |         print('=> creating {}'.format(root_output_dir))
 25 |         root_output_dir.mkdir()
 26 | 
 27 |     cfg_name = cfg.NAME
 28 |     model = cfg.model.model_type
 29 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
 30 | 
 31 |     final_output_dir = root_output_dir / model / cfg_name
 32 |     cfg.FOLDER_EXP = str(final_output_dir)
 33 | 
 34 |     time_str = time.strftime('%Y-%m-%d-%H-%M-%S')
 35 | 
 36 |     new_dir(cfg, phase, time_str, final_output_dir)
 37 | 
 38 |     head = '%(asctime)-15s %(message)s'
 39 |     logger = config_logger(final_output_dir, time_str, phase, head)
 40 |     if logger is None:
 41 |         logger = logging.getLogger()
 42 |         logger.setLevel(logging.CRITICAL)
 43 |         logging.basicConfig(format=head)
 44 |     return logger
 45 | 
 46 | 
 47 | @rank_zero_only
 48 | def config_logger(final_output_dir, time_str, phase, head):
 49 |     """
 50 |     Configures the logger for logging the training or testing process.
 51 | 
 52 |     Args:
 53 |         final_output_dir (Path): The final output directory where the log file will be saved.
 54 |         time_str (str): The current time in string format.
 55 |         phase (str): The phase of the process.
 56 |         head (str): The format of the log message.
 57 | 
 58 |     Returns:
 59 |         logger (logging.Logger): The logger object for logging the process.
 60 |     """
 61 |     log_file = '{}_{}_{}.log'.format('log', time_str, phase)
 62 |     final_log_file = final_output_dir / log_file
 63 |     logging.basicConfig(filename=str(final_log_file))
 64 |     logger = logging.getLogger()
 65 |     logger.setLevel(logging.INFO)
 66 |     console = logging.StreamHandler()
 67 |     formatter = logging.Formatter(head)
 68 |     console.setFormatter(formatter)
 69 |     logging.getLogger('').addHandler(console)
 70 |     file_handler = logging.FileHandler(final_log_file, 'w')
 71 |     file_handler.setFormatter(logging.Formatter(head))
 72 |     file_handler.setLevel(logging.INFO)
 73 |     logging.getLogger('').addHandler(file_handler)
 74 |     return logger
 75 | 
 76 | 
 77 | @rank_zero_only
 78 | def new_dir(cfg, phase, time_str, final_output_dir):
 79 |     """
 80 |     Creates a new directory for the experiment and saves the configuration file.
 81 | 
 82 |     Args:
 83 |         cfg (OmegaConf): The configuration object containing all the necessary parameters.
 84 |         phase (str): The phase of the process.
 85 |         time_str (str): The current time in string format.
 86 |         final_output_dir (Path): The final output directory where the log file will be saved.
 87 |     """
 88 |     # new experiment folder
 89 |     cfg.TIME = str(time_str)
 90 |     if os.path.exists(
 91 |             final_output_dir) and cfg.TRAIN.RESUME is None and not cfg.DEBUG:
 92 |         file_list = sorted(os.listdir(final_output_dir), reverse=True)
 93 |         for item in file_list:
 94 |             if item.endswith('.log'):
 95 |                 os.rename(str(final_output_dir),
 96 |                           str(final_output_dir) + '_' + cfg.TIME)
 97 |                 break
 98 |     final_output_dir.mkdir(parents=True, exist_ok=True)
 99 |     # write config yaml
100 |     config_file = '{}_{}_{}.yaml'.format('config', time_str, phase)
101 |     final_config_file = final_output_dir / config_file
102 |     OmegaConf.save(config=cfg, f=final_config_file)
103 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def to_numpy(tensor):
 5 |     """
 6 |     Converts a PyTorch tensor to a numpy array.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor): The PyTorch tensor to convert.
10 | 
11 |     Returns:
12 |         ndarray (numpy.ndarray): The converted numpy array.
13 | 
14 |     Raises:
15 |     """
16 |     if torch.is_tensor(tensor):
17 |         return tensor.cpu().numpy()
18 |     elif type(tensor).__module__ != 'numpy':
19 |         raise ValueError("Cannot convert {} to numpy array".format(
20 |             type(tensor)))
21 |     return tensor
22 | 
23 | 
24 | def to_torch(ndarray):
25 |     """
26 |     Converts a numpy array to a PyTorch tensor.
27 | 
28 |     Args:
29 |         ndarray (numpy.ndarray): The numpy array to convert.
30 | 
31 |     Returns:
32 |         tensor (torch.Tensor): The converted PyTorch tensor.
33 | 
34 |     Raises:
35 |         ValueError: If the input is not a numpy array.
36 |     """
37 |     if type(ndarray).__module__ == 'numpy':
38 |         return torch.from_numpy(ndarray)
39 |     elif not torch.is_tensor(ndarray):
40 |         raise ValueError("Cannot convert {} to torch tensor".format(
41 |             type(ndarray)))
42 |     return ndarray
43 | 
44 | 
45 | def cleanexit():
46 |     """
47 |     Exits the program cleanly by handling the SystemExit exception.
48 | 
49 |     No input arguments or return values.
50 |     """
51 |     import sys
52 |     import os
53 |     try:
54 |         sys.exit(0)
55 |     except SystemExit:
56 |         os._exit(0)
57 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/sample_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | logger = logging.getLogger(__name__)
 4 | 
 5 | 
 6 | def cfg_mean_nsamples_resolution(cfg):
 7 |     """
 8 |     Resolves the number of samples based on the configuration.
 9 | 
10 |     Args:
11 |         cfg: The configuration object containing the parameters 'mean' and 'number_of_samples'.
12 | 
13 |     Returns:
14 |         bool: True if the number of samples is 1, False otherwise.
15 | 
16 |     Side Effects:
17 |         If 'mean' is True and 'number_of_samples' is more than 1, it logs an error and sets 'number_of_samples' to 1.
18 |     """
19 |     if cfg.mean and cfg.number_of_samples > 1:
20 |         logger.error(
21 |             "All the samples will be the mean.. cfg.number_of_samples=1 will be forced.")
22 |         cfg.number_of_samples = 1
23 | 
24 |     return cfg.number_of_samples == 1
25 | 
26 | 
27 | def get_path(sample_path: Path, is_amass: bool, gender: str, split: str, onesample: bool, mean: bool, fact: float):
28 |     """
29 |     Constructs a path based on the provided parameters.
30 | 
31 |     Args:
32 |         sample_path (Path): The base path for the sample.
33 |         is_amass (bool): A flag indicating whether the sample is from AMASS.
34 |         gender (str): The gender of the sample.
35 |         split (str): The split of the sample (e.g., 'train', 'test').
36 |         onesample (bool): A flag indicating whether there is only one sample.
37 |         mean (bool): A flag indicating whether the sample is a mean sample.
38 |         fact (float): A factor to be included in the path.
39 | 
40 |     Returns:
41 |         path (Path): The constructed path.
42 |     """
43 |     extra_str = ("_mean" if mean else "") if onesample else "_multi"
44 |     fact_str = "" if fact == 1 else f"{fact}_"
45 |     gender_str = gender + "_" if is_amass else ""
46 |     path = sample_path / f"{fact_str}{gender_str}{split}{extra_str}"
47 |     return path
48 | 


--------------------------------------------------------------------------------
/OpenTMA/tma/utils/tensors.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | def lengths_to_mask(lengths):
  5 |     """
  6 |     Converts lengths to a mask tensor.
  7 | 
  8 |     Args:
  9 |         lengths (Tensor): A tensor of lengths.
 10 | 
 11 |     Returns:
 12 |         Tensor: A tensor mask of shape (len(lengths), max_len).
 13 |     """
 14 |     max_len = max(lengths)
 15 |     mask = torch.arange(max_len, device=lengths.device).expand(
 16 |         len(lengths), max_len) < lengths.unsqueeze(1)
 17 |     return mask
 18 | 
 19 | 
 20 | def collate_tensors(batch):
 21 |     """
 22 |     Collates a batch of tensors by padding them to the same size.
 23 | 
 24 |     Args:
 25 |         batch (List[Tensor]): A list of tensors.
 26 | 
 27 |     Returns:
 28 |         Tensor: A tensor of shape (len(batch), max_size).
 29 |     """
 30 |     dims = batch[0].dim()
 31 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
 32 |     size = (len(batch),) + tuple(max_size)
 33 |     canvas = batch[0].new_zeros(size=size)
 34 |     for i, b in enumerate(batch):
 35 |         sub_tensor = canvas[i]
 36 |         for d in range(dims):
 37 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
 38 |         sub_tensor.add_(b)
 39 |     return canvas
 40 | 
 41 | 
 42 | def collate(batch):
 43 |     """
 44 |     Collates a batch of data and labels, and generates a mask tensor.
 45 | 
 46 |     Args:
 47 |         batch (List[Tuple[Tensor, Tensor]]): A list of tuples, each containing a tensor of data and a tensor of labels.
 48 | 
 49 |     Returns:
 50 |         dict: A dictionary containing the collated data, labels, mask, and lengths.
 51 |     """
 52 |     databatch = [b[0] for b in batch]
 53 |     labelbatch = [b[1] for b in batch]
 54 |     lenbatch = [len(b[0][0][0]) for b in batch]
 55 | 
 56 |     databatchTensor = collate_tensors(databatch)
 57 |     labelbatchTensor = torch.as_tensor(labelbatch)
 58 |     lenbatchTensor = torch.as_tensor(lenbatch)
 59 | 
 60 |     maskbatchTensor = lengths_to_mask(lenbatchTensor)
 61 | 
 62 |     batch = {"x": databatchTensor, "y": labelbatchTensor,
 63 |              "mask": maskbatchTensor, 'lengths': lenbatchTensor}
 64 |     return batch
 65 | 
 66 | 
 67 | # slow version with padding
 68 | def collate_data3d_slow(batch):
 69 |     """
 70 |     Collates a batch of 3D data by padding them to the same size.
 71 | 
 72 |     Args:
 73 |         batch (List[dict]): A list of dictionaries, each containing a tensor of 3D data.
 74 | 
 75 |     Returns:
 76 |         dict: A dictionary containing the collated 3D data.
 77 |     """
 78 |     batchTensor = {}
 79 |     for key in batch[0].keys():
 80 |         databatch = [b[key] for b in batch]
 81 |         batchTensor[key] = collate_tensors(databatch)
 82 | 
 83 |     batch = batchTensor
 84 |     return batch
 85 | 
 86 | 
 87 | def collate_data3d(batch):
 88 |     """
 89 |     Collates a batch of 3D data by stacking them along a new dimension.
 90 | 
 91 |     Args:
 92 |         batch (List[dict]): A list of dictionaries, each containing a tensor of 3D data.
 93 | 
 94 |     Returns:
 95 |         dict: A dictionary containing the collated 3D data.
 96 |     """
 97 |     batchTensor = {}
 98 |     for key in batch[0].keys():
 99 |         databatch = [b[key] for b in batch]
100 |         if key == "paths":
101 |             batchTensor[key] = databatch
102 |         else:
103 |             batchTensor[key] = torch.stack(databatch, axis=0)
104 | 
105 |     batch = batchTensor
106 |     return batch
107 | 


--------------------------------------------------------------------------------
/OpenTMA/tmp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | path = "/comp_robot/chenlinghao/OpenTMR/experiments/temos/debug--H3D-TMR-release-2/embeddings/val/epoch_99/motion_embedding.npy"
 4 | 
 5 | motion_embedding = np.load(path)
 6 | print(motion_embedding.shape)
 7 | 
 8 | # find the nearest neighbor of 0 index motion
 9 | distances = np.linalg.norm(motion_embedding - motion_embedding[0], axis=1)
10 | print(distances, len(distances))
11 | 
12 | # find index and the distance of the nearest 4 neighbor
13 | print(np.argsort(distances))
14 | print(np.sort(distances))
15 | 
16 | # print(motion_embedding[3688])
17 | # print(motion_embedding[0])
18 | 


--------------------------------------------------------------------------------
/OpenTMA/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J X-TMR
 3 | #SBATCH -p cvr
 4 | #SBATCH -N 1
 5 | #SBATCH --cpus-per-task=18
 6 | #SBATCH --gres=gpu:hgx:4
 7 | #SBATCH --mem 300GB
 8 | #SBATCH --qos=preemptive
 9 | 
10 | source activate temos
11 | 
12 | # python -m train --cfg configs/configs_temos/MotionX-TMR.yaml --cfg_assets configs/assets.yaml --nodebug
13 | # python -m train --cfg configs/configs_temos/UniMocap-TMR.yaml --cfg_assets configs/assets.yaml --nodebug
14 | python -m train --cfg configs/configs_temos/H3D-TMR.yaml --cfg_assets configs/assets.yaml --nodebug
15 | 
16 | 
17 | # find ./  -type d -name "__pycache__" -exec rm -rf {} +


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HumanTOMATO: Text-aligned Whole-body Motion Generation
 2 | 
 3 | ![](./assets/tomato-logo.png)
 4 | 
 5 | [Shunlin Lu](https://shunlinlu.github.io)<sup>🍅 2, 3</sup>, [Ling-Hao Chen](https://lhchen.top)<sup>🍅 1, 2</sup>, [Ailing Zeng](https://ailingzeng.site)<sup>2</sup>, [Jing Lin](https://jinglin7.github.io)<sup>1, 2</sup>, [Ruimao Zhang](http://zhangruimao.site)<sup>3</sup>, [Lei Zhang](https://leizhang.org)<sup>2</sup>, and [Heung-Yeung Shum](https://scholar.google.com/citations?user=9akH-n8AAAAJ&hl=en)<sup>1, 2</sup>
 6 | 
 7 | <sup>🍅</sup>Co-first author. Listing order is random. 
 8 | 
 9 | <sup>1</sup>Tsinghua University, <sup>2</sup>International Digital Economy Academy (IDEA),
10 | <sup>3</sup>School of Data Science, The Chinese University of Hong Kong, Shenzhen (CUHK-SZ)
11 | 
12 | <p align="center">
13 |   <a href='https://arxiv.org/abs/2310.12978'>
14 |   <img src='https://img.shields.io/badge/Arxiv-2310.12978-A42C25?style=flat&logo=arXiv&logoColor=A42C25'>
15 |   </a> 
16 |   <a href='https://arxiv.org/pdf/2310.12978.pdf'>
17 |   <img src='https://img.shields.io/badge/Paper-PDF-yellow?style=flat&logo=arXiv&logoColor=yellow'>
18 |   </a> 
19 |   <a href='https://lhchen.top/HumanTOMATO'>
20 |   <img src='https://img.shields.io/badge/Project-Page-%23df5b46?style=flat&logo=Google%20chrome&logoColor=%23df5b46'></a> 
21 |   <a href='https://youtu.be/PcxUzZ1zg6o'>
22 |   <img src='https://img.shields.io/badge/YouTube-Video-EA3323?style=flat&logo=youtube&logoColor=EA3323'></a> 
23 |   <a href='https://www.bilibili.com/video/BV1xH4y1973x/'>
24 |     <img src='https://img.shields.io/badge/Bilibili-Video-4EABE6?style=flat&logo=Bilibili&logoColor=4EABE6'></a>
25 |   <a href='https://github.com/IDEA-Research/HumanTOMATO'>
26 |   <img src='https://img.shields.io/badge/GitHub-Code-black?style=flat&logo=github&logoColor=white'></a> 
27 |   <a href='LICENSE'>
28 |   <img src='https://img.shields.io/badge/License-IDEA-blue.svg'>
29 |   </a> 
30 |   <a href="" target='_blank'>
31 |   <img src="https://visitor-badge.laobi.icu/badge?page_id=IDEA-Research.HumanTOMATO&left_color=gray&right_color=%2342b983">
32 |   </a> 
33 | </p>
34 | 
35 | # 🤩 Abstract
36 | > This work targets a novel text-driven whole-body motion generation task, which takes a given textual description as input and aims at generating high-quality, diverse, and coherent facial expressions, hand gestures, and body motions simultaneously. Previous works on text-driven motion generation tasks mainly have two limitations: they ignore the key role of fine-grained hand and face controlling in vivid whole-body motion generation, and lack a good alignment between text and motion. To address such limitations, we propose a <u>T</u>ext-aligned wh<u>O</u>le-body <u>M</u>otion gener<u>AT</u>i<u>O</u>n framework, named HumanTOMATO, which is the first attempt to our knowledge towards applicable holistic motion generation in this research area. To tackle this challenging task, our solution includes two key designs: (1) a <u>H</u>olistic <u>H</u>ierarchical VQ-VAE (aka H²VQ) and a Hierarchical-GPT for fine-grained body and hand motion reconstruction and generation with two structured codebooks; and (2) a pre-trained text-motion-alignment model to help generated motion align with the input textual description explicitly. Comprehensive experiments verify that our model has significant advantages in both the quality of generated motions and their alignment with text.
37 | 
38 | 
39 | 
40 | 
41 | # 📢 News
42 | 
43 | - **[2024/05/13] Release OpenTMA project. It is exactly the text-motion alignment used in HumanTOMATO.**
44 | - **[2024/05/02] HumanTOMATO is accepted by ICML-2024. See you in Vienna!**
45 | - **[2023/11/15] Publish HumanTOMATO Motion Representation (`tomato` representation) processing code.**
46 | - **[2023/10/22] Publish project!**
47 | 
48 | # 🎬 Highlight Whole-body Motions
49 | 
50 | ![](./assets/highlight.png)
51 | The proposed HumanTOMATO model can generate text-aligned whole-body motions with vivid and harmonious face, hand, and body motion. We show two generated qualitative results.
52 | 
53 | 
54 | # 🔍 System Overview
55 | 
56 | ![](./assets/system.png)
57 | The framework overview of the proposed text-driven whole-body motion generation. (a) Holistic Hierarchical Vector Quantization (H²VQ) to compress fine-grained body-hand motion into two discrete codebooks with hierarchical structure relations. (b) Hierarchical-GPT using motion-aware textual embedding as the input to hierarchically generate body-hand motions. (c) Facial text-conditional VAE (cVAE) to generate the corresponding facial motions. The outputs of body, hand, and face motions comprise a vivid and text-aligned whole-body motion.
58 | 
59 | 
60 | # 🚀 Quick Start
61 | 
62 | 
63 | # 🚅 Model Training
64 | 
65 | 
66 | # 📸 Visualization
67 | 
68 | 
69 | # 🤝🏼 Citation
70 | If you find the code is useful in your research, please cite us: 
71 | ```bash
72 | @article{humantomato,
73 |   title={HumanTOMATO: Text-aligned Whole-body Motion Generation},
74 |   author={Lu, Shunlin and Chen, Ling-Hao and Zeng, Ailing and Lin, Jing and Zhang, Ruimao and Zhang, Lei and Shum, Heung-Yeung},
75 |   journal={arxiv:2310.12978},
76 |   year={2023}
77 | }
78 | ```
79 | 
80 | ## 📚 License
81 | 
82 | This code is distributed under an [IDEA LICENSE](LICENSE). Note that our code depends on other libraries and datasets which each have their own respective licenses that must also be followed.
83 | 
84 | ## 💋 Acknowledgement
85 | 
86 | The code is on the basis of [TMR](https://github.com/Mathux/TMR), [MLD](https://github.com/ChenFengYe/motion-latent-diffusion), [T2M-GPT](https://github.com/Mael-zys/T2M-GPT), and [HumanML3D](https://github.com/EricGuo5513/HumanML3D). Thanks to all contributors!
87 | 
88 | ## 🌟 Star History
89 | 
90 | <p align="center">
91 |     <a href="https://star-history.com/#IDEA-Research/HumanTOMATO&Date" target="_blank">
92 |         <img width="500" src="https://api.star-history.com/svg?repos=IDEA-Research/HumanTOMATO&type=Date" alt="Star History Chart">
93 |     </a>
94 | <p>
95 | 
96 | If you have any question, please contact at: shunlinlu0803 [AT] gmail [DOT] com AND thu [DOT] lhchen [AT] gmail [DOT] com.
97 | 


--------------------------------------------------------------------------------
/assets/highlight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/assets/highlight.png


--------------------------------------------------------------------------------
/assets/system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/assets/system.png


--------------------------------------------------------------------------------
/assets/tomato-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IDEA-Research/HumanTOMATO/b5ccf060465e986585618fac7461ed1674dedd92/assets/tomato-logo.png


--------------------------------------------------------------------------------
/src/tomato_represenation/README.md:
--------------------------------------------------------------------------------
 1 | # 🍅 How to use tomato representation?
 2 | 
 3 | This instruction is for creating the Motion Representation with the [Tomato](https://arxiv.org/pdf/2310.12978.pdf) format. The tomato format is extended from the [H3D](https://github.com/EricGuo5513/HumanML3D) format and is different from it. We name it `Tomato Representation` for convenience. For detailed ablation on motion representation design choice, please refer to Appendix B.1 in the [paper](https://arxiv.org/pdf/2310.12978.pdf). 
 4 | 
 5 | ## 🚀  Data Preparation
 6 | 
 7 | 
 8 | <details>
 9 | <summary>Download SMPL+H, SMPLX, DMPLs.</summary>
10 | 
11 | Download SMPL+H mode from [SMPL+H](https://mano.is.tue.mpg.de/download.php) (choose Extended SMPL+H model used in the AMASS project), DMPL model from [DMPL](https://smpl.is.tue.mpg.de/download.php) (choose DMPLs compatible with SMPL), and SMPL-X model from [SMPL-X](https://smpl-x.is.tue.mpg.de/download.php). Then, please place all the models under `./body_model/`. The `./body_model/` folder tree should be:
12 | 
13 | ```bash
14 | ./body_models
15 | ├── dmpls
16 | │   ├── female
17 | │   │   └── model.npz
18 | │   ├── male
19 | │   │   └── model.npz
20 | │   └── neutral
21 | │       └── model.npz
22 | ├── smplh
23 | │   ├── female
24 | │   │   └── model.npz
25 | │   ├── info.txt
26 | │   ├── male
27 | │   │   └── model.npz
28 | │   └── neutral
29 | │       └── model.npz
30 | ├── smplx
31 | │   ├── female
32 | │   │   ├── model.npz
33 | │   │   └── model.pkl
34 | │   ├── male
35 | │   │   ├── model.npz
36 | │   │   └── model.pkl
37 | │   └── neutral
38 | │       ├── model.npz
39 | └───────└── model.pkl
40 | ```
41 | 
42 | </details>
43 | 
44 | 
45 | <details>
46 | <summary>Download Motion-X datasets</summary>
47 | 
48 | Please follow the instruction of [Motion-X](https://github.com/IDEA-Research/Motion-X) to download the SMPL-X data with the dimension of 322. Put the motion data in folder `./data/motion_data/smplx_322`.
49 | 
50 | </details>
51 | 
52 | 
53 | ## 🔧 Data Processing
54 | (1) get joints positions
55 | ```
56 | python raw_pose_processing.py
57 | ```
58 | (2) get Representation
59 | ```
60 | python motion_representation.py
61 | ```
62 | (3) (a) visualization for checking. If you want to check the joint visualization (The input shape is b * frame * 52 * 3, which should be under folder new_joints), then you run the following line.
63 | ```
64 | python plot_3d_global.py
65 | ```
66 | (3) (b) visualization for checking. If you want to check the 623-dim visualization (The input shape is b * frame * 623, which should be under the folder new_joints_vecs), then you run the following line.
67 | ```
68 | python plot_feature.py
69 | ```
70 | 
71 | # 🤝🏼 Citation
72 | If you use the tomato format, please consider to cite us as: 
73 | ```bash
74 | @article{humantomato,
75 |   title={HumanTOMATO: Text-aligned Whole-body Motion Generation},
76 |   author={Lu, Shunlin and Chen, Ling-Hao and Zeng, Ailing and Lin, Jing and Zhang, Ruimao and Zhang, Lei and Shum, Heung-Yeung},
77 |   journal={arxiv:2310.12978},
78 |   year={2023}
79 | }
80 | ```
81 | 


--------------------------------------------------------------------------------
/src/tomato_represenation/dataset.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2022 The IDEA Authors (Shunlin Lu and Ling-Hao Chen). All rights reserved.
  3 | #
  4 | # For all the datasets, be sure to read and follow their license agreements,
  5 | # and cite them accordingly.
  6 | # If the unifier is used in your research, please consider to cite as:
  7 | #
  8 | # @article{humantomato,
  9 | #   title={HumanTOMATO: Text-aligned Whole-body Motion Generation},
 10 | #   author={Lu, Shunlin and Chen, Ling-Hao and Zeng, Ailing and Lin, Jing and Zhang, Ruimao and Zhang, Lei and Shum, Heung-Yeung},
 11 | #   journal={arxiv:2310.12978},
 12 | #   year={2023}
 13 | # }
 14 | #
 15 | # @InProceedings{Guo_2022_CVPR,
 16 | #     author    = {Guo, Chuan and Zou, Shihao and Zuo, Xinxin and Wang, Sen and Ji, Wei and Li, Xingyu and Cheng, Li},
 17 | #     title     = {Generating Diverse and Natural 3D Human Motions From Text},
 18 | #     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
 19 | #     month     = {June},
 20 | #     year      = {2022},
 21 | #     pages     = {5152-5161}
 22 | # }
 23 | #
 24 | # Licensed under the IDEA License, Version 2.0 (the "License");
 25 | # you may not use this file except in compliance with the License.
 26 | # You may obtain a copy of the License at
 27 | #
 28 | #     https://github.com/IDEA-Research/HumanTOMATO/blob/main/LICENSE
 29 | #
 30 | # Unless required by applicable law or agreed to in writing, software
 31 | # distributed under the License is distributed on an "AS IS" BASIS,
 32 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 33 | # See the License for the specific language governing permissions and
 34 | # limitations under the License. We provide a license to use the code, 
 35 | # please read the specific details carefully.
 36 | 
 37 | import os
 38 | from torch.utils import data
 39 | from tqdm import tqdm
 40 | import numpy as np
 41 | import torch
 42 | 
 43 | 
 44 | def findAllFile(base):
 45 |     """
 46 |     Recursively find all files in the specified directory.
 47 | 
 48 |     Args:
 49 |         base (str): The base directory to start the search.
 50 | 
 51 |     Returns:
 52 |         list: A list of file paths found in the directory and its subdirectories.
 53 |     """
 54 |     file_path = []
 55 |     for root, ds, fs in os.walk(base, followlinks=True):
 56 |         for f in fs:
 57 |             fullname = os.path.join(root, f)
 58 |             file_path.append(fullname)
 59 |     return file_path
 60 | 
 61 | 
 62 | def collate_tensors(batch):
 63 |     # Function for collating a batch of PyTorch tensors
 64 |     dims = batch[0].dim()
 65 |     max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
 66 |     size = (len(batch), ) + tuple(max_size)
 67 |     canvas = batch[0].new_zeros(size=size)
 68 |     for i, b in enumerate(batch):
 69 |         sub_tensor = canvas[i]
 70 |         for d in range(dims):
 71 |             sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
 72 |         sub_tensor.add_(b)
 73 |     return canvas
 74 | 
 75 | 
 76 | def mld_collate(batch):
 77 |     # Adapter function for collating batches in the MotionDatasetV2 class
 78 |     notnone_batches = [b for b in batch if b is not None]
 79 |     adapted_batch = {
 80 |         "motion":
 81 |         collate_tensors([torch.tensor(b[0]).float() for b in notnone_batches]),
 82 |         "name": [b[1] for b in notnone_batches],
 83 |         "length":
 84 |         collate_tensors([torch.tensor(b[2]).float() for b in notnone_batches]),
 85 |     }
 86 | 
 87 |     return adapted_batch
 88 | 
 89 | 
 90 | class MotionDatasetV2(data.Dataset):
 91 |     # Custom dataset class for motion data
 92 |     def __init__(self, root_path, debug):
 93 | 
 94 |         # Lists to store motion data and corresponding lengths
 95 |         self.data = []
 96 |         self.lengths = []
 97 | 
 98 |         # Finding all files in the specified directory
 99 |         self.id_list = findAllFile(root_path)
100 | 
101 |         # Limiting the number of files for debugging purposes
102 |         if debug:
103 |             self.id_list = self.id_list[:100]
104 | 
105 |         # Loading motion data from files and populating data and lengths lists
106 |         for name in tqdm(self.id_list):
107 |             motion = np.load(name)
108 |             self.lengths.append(motion.shape[0])
109 |             self.data.append({'motion': motion, 'name': name})
110 | 
111 |     def __len__(self):
112 |         # Returns the number of items in the dataset
113 |         return len(self.id_list)
114 | 
115 |     def __getitem__(self, item):
116 |         # Returns motion data, file name, and length for a given item
117 | 
118 |         motion = self.data[item]['motion']
119 |         name = self.data[item]['name']
120 |         length = self.lengths[item]
121 | 
122 |         return motion, name, length
123 | 


--------------------------------------------------------------------------------
/src/tomato_represenation/raw_pose_processing.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2022 The IDEA Authors (Shunlin Lu and Ling-Hao Chen). All rights reserved.
  3 | #
  4 | # For all the datasets, be sure to read and follow their license agreements,
  5 | # and cite them accordingly.
  6 | # If the unifier is used in your research, please consider to cite as:
  7 | #
  8 | # @article{humantomato,
  9 | #   title={HumanTOMATO: Text-aligned Whole-body Motion Generation},
 10 | #   author={Lu, Shunlin and Chen, Ling-Hao and Zeng, Ailing and Lin, Jing and Zhang, Ruimao and Zhang, Lei and Shum, Heung-Yeung},
 11 | #   journal={arxiv:2310.12978},
 12 | #   year={2023}
 13 | # }
 14 | #
 15 | # @InProceedings{Guo_2022_CVPR,
 16 | #     author    = {Guo, Chuan and Zou, Shihao and Zuo, Xinxin and Wang, Sen and Ji, Wei and Li, Xingyu and Cheng, Li},
 17 | #     title     = {Generating Diverse and Natural 3D Human Motions From Text},
 18 | #     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
 19 | #     month     = {June},
 20 | #     year      = {2022},
 21 | #     pages     = {5152-5161}
 22 | # }
 23 | #
 24 | # Licensed under the IDEA License, Version 2.0 (the "License");
 25 | # you may not use this file except in compliance with the License.
 26 | # You may obtain a copy of the License at
 27 | #
 28 | #     https://github.com/IDEA-Research/HumanTOMATO/blob/main/LICENSE
 29 | #
 30 | # Unless required by applicable law or agreed to in writing, software
 31 | # distributed under the License is distributed on an "AS IS" BASIS,
 32 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 33 | # See the License for the specific language governing permissions and
 34 | # limitations under the License. We provide a license to use the code, 
 35 | # please read the specific details carefully.
 36 | #
 37 | # ------------------------------------------------------------------------------------------------
 38 | # Copyright (c) Chuan Guo.
 39 | # ------------------------------------------------------------------------------------------------
 40 | # Portions of this code were adapted from the following open-source project:
 41 | # https://github.com/EricGuo5513/HumanML3D
 42 | # ------------------------------------------------------------------------------------------------
 43 | 
 44 | import sys
 45 | import os
 46 | import torch
 47 | import numpy as np
 48 | import matplotlib
 49 | import matplotlib.pyplot as plt
 50 | from tqdm import tqdm
 51 | 
 52 | from smplx2joints import get_smplx_layer, process_smplx_322_data
 53 | from dataset import MotionDatasetV2, mld_collate
 54 | from torch.utils.data import DataLoader
 55 | 
 56 | os.environ['PYOPENGL_PLATFORM'] = 'egl'
 57 | comp_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 58 | 
 59 | 
 60 | def findAllFile(base):
 61 |     """
 62 |     Recursively find all files in the specified directory.
 63 | 
 64 |     Args:
 65 |         base (str): The base directory to start the search.
 66 | 
 67 |     Returns:
 68 |         list: A list of file paths found in the directory and its subdirectories.
 69 |     """
 70 |     file_path = []
 71 |     for root, ds, fs in os.walk(base, followlinks=True):
 72 |         for f in fs:
 73 |             fullname = os.path.join(root, f)
 74 |             file_path.append(fullname)
 75 |     return file_path
 76 | 
 77 | 
 78 | # Get SMPLX layer and model using a custom function get_smplx_layer
 79 | smplx_layer, smplx_model = get_smplx_layer(comp_device)
 80 | 
 81 | # change your path here with Motion-X SMPLX format with 322 dims
 82 | train_dataset = MotionDatasetV2(root_path='motion_data/smplx_322', debug=False)
 83 | train_loader = DataLoader(train_dataset, batch_size=8, drop_last=False,
 84 |                           num_workers=4, shuffle=False, collate_fn=mld_collate)
 85 | 
 86 | 
 87 | def amass_to_pose(src_motion, src_path, length):
 88 |     """
 89 |     Convert AMASS SMPL-X motion data to pose representation and save joint positions.
 90 | 
 91 |     Args:
 92 |         src_motion (torch.Tensor): Input SMPL-X motion data.
 93 |         src_path (list): List of paths to the source motion data.
 94 |         length (list): List of motion sequence lengths.
 95 | 
 96 |     Returns:
 97 |         None
 98 |     """
 99 |     # frame id of the mocap sequence
100 |     fId = 0
101 |     pose_seq = []
102 | 
103 |     # Process SMPLX 322-dimensional data
104 |     vert, joints, pose, faces = process_smplx_322_data(
105 |         src_motion, smplx_layer, smplx_model, device=comp_device)
106 | 
107 |     # Add global joint offsets to the processed joints
108 |     joints += src_motion[..., 309:312].unsqueeze(2)
109 | 
110 |     # Iterate over frames to extract joint positions and save them to individual files
111 |     for i in range(joints.shape[0]):
112 |         joint = joints[i][:int(length[i])].detach().cpu().numpy()
113 |         # change the save folder
114 |         save_path = src_path[i].replace('/smplx_322/', '/joint/')
115 |         os.makedirs(os.path.split(save_path)[0], exist_ok=True)
116 |         np.save(save_path, joint)
117 | 
118 | 
119 | # Iterate over batches in the training loader using tqdm for progress tracking
120 | for batch_data in tqdm(train_loader):
121 |     # Move motion data to the computation device (e.g., GPU)
122 |     motion = batch_data['motion'].to(comp_device)
123 |     name = batch_data['name']
124 |     length = batch_data['length']
125 | 
126 |     # Call the 'amass_to_pose' function to convert SMPL-X motion data to pose representation
127 |     # and save joint positions for each batch
128 |     amass_to_pose(motion, name, length)
129 | 


--------------------------------------------------------------------------------