├── MToV ├── evals │ ├── __init__.py │ └── fvd │ │ ├── __init__.py │ │ ├── download.py │ │ ├── convert_tf_pretrained.py │ │ └── fvd.py ├── exps │ ├── __init__.py │ ├── first_stage.py │ └── first_stage_ldmk.py ├── models │ ├── __init__.py │ ├── ddpm │ │ └── __init__.py │ ├── autoencoder │ │ └── __init__.py │ └── ema.py ├── tools │ ├── __init__.py │ └── scheduler.py ├── text_folders │ ├── sample_cross_audio_hdtf.txt │ ├── sample_cross_id_hdtf.txt │ └── train_id.txt ├── .gitignore ├── scripts │ ├── train │ │ ├── first_stg.sh │ │ ├── first_stg_ldmk.sh │ │ └── second_stg.sh │ └── inference │ │ ├── sample_crossID.sh │ │ └── sample.sh ├── configs │ ├── autoencoder │ │ ├── base.yaml │ │ ├── base_gan.yaml │ │ └── base_ldmk.yaml │ └── latent-diffusion │ │ ├── base.yaml │ │ └── base_longvid.yaml ├── losses │ └── diffaugment.py └── main.py ├── AToM ├── dataset │ ├── __init__.py │ └── preprocess.py ├── .gitignore ├── train.py ├── scripts │ ├── train.sh │ └── inference.sh ├── data_util │ ├── euler2quaterion.py │ └── tensor_utils.py ├── args.py └── model │ ├── utils.py │ ├── adan.py │ └── rotary_embedding_torch.py └── data ├── data_utils ├── preprocess │ ├── __init__.py │ ├── unify_fps.py │ ├── video2frame_hdtf.py │ └── process_audio.py ├── deep_3drecon │ ├── __init__.py │ ├── deep_3drecon_models │ │ ├── arcface_torch │ │ │ ├── configs │ │ │ │ ├── __init__.py │ │ │ │ ├── 3millions.py │ │ │ │ ├── wf42m_pfc02_16gpus_mbf_bs8k.py │ │ │ │ ├── ms1mv2_mbf.py │ │ │ │ ├── ms1mv2_r50.py │ │ │ │ ├── wf4m_mbf.py │ │ │ │ ├── wf4m_r100.py │ │ │ │ ├── wf4m_r50.py │ │ │ │ ├── glint360k_mbf.py │ │ │ │ ├── glint360k_r100.py │ │ │ │ ├── glint360k_r50.py │ │ │ │ ├── ms1mv2_r100.py │ │ │ │ ├── ms1mv3_mbf.py │ │ │ │ ├── ms1mv3_r100.py │ │ │ │ ├── ms1mv3_r50.py │ │ │ │ ├── wf42m_pfc02_r100.py │ │ │ │ ├── ms1mv3_r50_onegpu.py │ │ │ │ ├── wf42m_pfc02_16gpus_r100.py │ │ │ │ ├── wf42m_pfc02_16gpus_r50_bs8k.py │ │ │ │ ├── wf42m_pfc02_32gpus_r50_bs4k.py │ │ │ │ ├── wf42m_pfc02_8gpus_r50_bs4k.py │ │ │ │ ├── wf12m_mbf.py │ │ │ │ ├── wf12m_r50.py │ │ │ │ ├── wf12m_r100.py │ │ │ │ ├── wf42m_pfc03_32gpu_r18.py │ │ │ │ ├── wf42m_pfc03_32gpu_r50.py │ │ │ │ ├── wf12m_pfc02_r100.py │ │ │ │ ├── wf42m_pfc0008_32gpu_r100.py │ │ │ │ ├── wf42m_pfc02_r100_16gpus.py │ │ │ │ ├── wf42m_pfc02_r100_32gpus.py │ │ │ │ ├── wf42m_pfc03_32gpu_r100.py │ │ │ │ ├── wf42m_pfc03_32gpu_r200.py │ │ │ │ ├── wf42m_pfc03_40epoch_64gpu_vit_b.py │ │ │ │ ├── wf42m_pfc03_40epoch_64gpu_vit_l.py │ │ │ │ ├── wf42m_pfc03_40epoch_64gpu_vit_s.py │ │ │ │ ├── wf42m_pfc03_40epoch_64gpu_vit_t.py │ │ │ │ ├── wf42m_pfc03_40epoch_8gpu_vit_t.py │ │ │ │ ├── wf12m_flip_r50.py │ │ │ │ ├── wf12m_conflict_r50.py │ │ │ │ ├── wf12m_flip_pfc01_filter04_r50.py │ │ │ │ ├── wf12m_conflict_r50_pfc03_filter04.py │ │ │ │ ├── wf42m_pfc03_40epoch_8gpu_vit_b.py │ │ │ │ └── base.py │ │ │ ├── docs │ │ │ │ ├── modelzoo.md │ │ │ │ ├── install.md │ │ │ │ ├── eval.md │ │ │ │ ├── prepare_custom_dataset.md │ │ │ │ ├── prepare_webface42m.md │ │ │ │ ├── install_dali.md │ │ │ │ └── speed_benchmark.md │ │ │ ├── eval │ │ │ │ └── __init__.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── utils_config.py │ │ │ │ ├── utils_logging.py │ │ │ │ ├── plot.py │ │ │ │ └── utils_distributed_sampler.py │ │ │ ├── requirement.txt │ │ │ ├── run.sh │ │ │ ├── dist.sh │ │ │ ├── flops.py │ │ │ ├── inference.py │ │ │ ├── lr_scheduler.py │ │ │ ├── torch2onnx.py │ │ │ ├── scripts │ │ │ │ └── shuffle_rec.py │ │ │ ├── losses.py │ │ │ └── backbones │ │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── losses.py │ ├── util │ │ ├── __init__.py │ │ ├── load_mats.py │ │ └── mesh_renderer.py │ ├── options │ │ └── __init__.py │ └── data │ │ ├── image_folder.py │ │ ├── template_dataset.py │ │ ├── flist_dataset.py │ │ ├── __init__.py │ │ └── base_dataset.py └── commons │ ├── euler2rot.py │ └── tensor_utils.py ├── .gitignore └── README.md /MToV/evals/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/exps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AToM/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/evals/fvd/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/models/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/models/autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/data_utils/preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MToV/text_folders/sample_cross_audio_hdtf.txt: -------------------------------------------------------------------------------- 1 | RD_Radio25_000 -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/__init__.py: -------------------------------------------------------------------------------- 1 | from .reconstructor import * 2 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/modelzoo.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/requirement.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | easydict 3 | mxnet 4 | onnx 5 | sklearn 6 | opencv-python -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 train_v2.py $@ 2 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/util/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes a miscellaneous collection of useful helper functions.""" 2 | from .util import * 3 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | results/ 3 | *.gif 4 | *.mp4 5 | *.pkl 6 | *.pt 7 | *.pth 8 | *.npy 9 | *.zip 10 | BFM/ 11 | BFM copy/ 12 | inference/ 13 | train/ -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/options/__init__.py: -------------------------------------------------------------------------------- 1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test).""" 2 | -------------------------------------------------------------------------------- /MToV/text_folders/sample_cross_id_hdtf.txt: -------------------------------------------------------------------------------- 1 | RD_Radio29_000 2 | WRA_DavidVitter_000 3 | WRA_JoePitts_000 4 | WDA_AmyKlobuchar1_002 5 | RD_Radio50_000 6 | WDA_JackReed0_000 7 | WDA_TerriSewell0_000 -------------------------------------------------------------------------------- /MToV/.gitignore: -------------------------------------------------------------------------------- 1 | losses/vgg.pth 2 | aux_models/ 3 | demo/ 4 | data/ 5 | __pycache__/ 6 | results/ 7 | runs/ 8 | *.pth 9 | *.pt 10 | *.jpg 11 | *.png 12 | *.pkl 13 | *.npy 14 | eval_videos/ -------------------------------------------------------------------------------- /AToM/.gitignore: -------------------------------------------------------------------------------- 1 | train/ 2 | test/ 3 | edge_aistpp/ 4 | dataset_backups/ 5 | runs/ 6 | wandb/ 7 | renders/ 8 | .ipynb_checkpoints/ 9 | __pycache__/ 10 | results/ 11 | *.gif 12 | *.mp4 13 | *.pkl 14 | *.pt 15 | *.npy 16 | *.zip 17 | cached_features/ 18 | custom_music/ 19 | -------------------------------------------------------------------------------- /AToM/train.py: -------------------------------------------------------------------------------- 1 | from args import parse_train_opt 2 | from AToM import AToM 3 | 4 | 5 | def train(opt): 6 | model = AToM(opt.feature_type, checkpoint_path = opt.checkpoint) 7 | model.train_loop(opt) 8 | 9 | if __name__ == "__main__": 10 | opt = parse_train_opt() 11 | train(opt) 12 | -------------------------------------------------------------------------------- /MToV/scripts/train/first_stg.sh: -------------------------------------------------------------------------------- 1 | EXP_NAME=main 2 | DATASET=HDTF 3 | BATCH_SIZE=1 4 | 5 | CUDA_VISIBLE_DEVICES=6 python main.py \ 6 | --exp first_stage \ 7 | --id main \ 8 | --log_dir ./runs \ 9 | --timesteps 4 \ 10 | --pretrain_config configs/autoencoder/base.yaml \ 11 | --data ${DATASET} \ 12 | --batch_size ${BATCH_SIZE} 13 | -------------------------------------------------------------------------------- /AToM/scripts/train.sh: -------------------------------------------------------------------------------- 1 | ############## 2 | BATCH_SIZE=64 3 | EPOCHS=2000 4 | FEAT=jukebox 5 | SAVE_INTERVAL=1 6 | DEVICE=6 7 | ############## 8 | 9 | CUDA_VISIBLE_DEVICES=$DEVICE python train.py \ 10 | --batch_size $BATCH_SIZE \ 11 | --epochs $EPOCHS \ 12 | --feature_type $FEAT \ 13 | --save_interval $SAVE_INTERVAL -------------------------------------------------------------------------------- /AToM/scripts/inference.sh: -------------------------------------------------------------------------------- 1 | ############### 2 | DATA_ROOT=../data/inference/ref/25fps 3 | HUBERT=../data/inference/hubert/16000/LetItGo1.npy 4 | SAVE_DIR=results/frontalized1 5 | CHECKPOINT=../checkpoints/atom.pt 6 | DEVICE=6 7 | ############### 8 | 9 | CUDA_VISIBLE_DEVICES=$DEVICE python inference.py \ 10 | --data_root $DATA_ROOT \ 11 | --hubert_path $HUBERT \ 12 | --save_dir $SAVE_DIR \ 13 | --checkpoint $CHECKPOINT -------------------------------------------------------------------------------- /MToV/configs/autoencoder/base.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | resume: False 3 | amp: True 4 | base_learning_rate: 1.0e-4 5 | params: 6 | embed_dim: 4 7 | lossconfig: 8 | params: 9 | disc_start: 100000000 10 | 11 | ddconfig: 12 | double_z: False 13 | channels: 384 14 | resolution: 256 15 | timesteps: 16 16 | skip: 1 17 | in_channels: 3 18 | out_ch: 3 19 | num_res_blocks: 2 20 | attn_resolutions: [] 21 | splits: 1 22 | -------------------------------------------------------------------------------- /MToV/configs/autoencoder/base_gan.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | resume: True 3 | amp: True 4 | base_learning_rate: 1.0e-4 5 | params: 6 | embed_dim: 4 7 | lossconfig: 8 | params: 9 | disc_start: -1 10 | 11 | ddconfig: 12 | double_z: False 13 | channels: 384 14 | resolution: 256 15 | timesteps: 16 16 | skip: 1 17 | in_channels: 3 18 | out_ch: 3 19 | num_res_blocks: 2 20 | attn_resolutions: [] 21 | splits: 1 22 | -------------------------------------------------------------------------------- /MToV/scripts/train/first_stg_ldmk.sh: -------------------------------------------------------------------------------- 1 | EXP_NAME=main 2 | DATASET=HDTF 3 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth 4 | BATCH_SIZE=1 5 | 6 | CUDA_VISIBLE_DEVICES=0 python main.py \ 7 | --exp first_stage_ldmk \ 8 | --log_dir ./runs \ 9 | --id encoder_decoder_frz \ 10 | --typetype 'ldmk' \ 11 | --timesteps 16 \ 12 | --pretrain_config configs/autoencoder/base.yaml \ 13 | --data ${DATASET} \ 14 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \ 15 | --batch_size ${BATCH_SIZE} 16 | -------------------------------------------------------------------------------- /MToV/configs/autoencoder/base_ldmk.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | resume: False 3 | amp: True 4 | base_learning_rate: 1.0e-4 5 | params: 6 | embed_dim: 4 7 | lossconfig: 8 | params: 9 | disc_start: 100000000 10 | 11 | ddconfig: 12 | double_z: False 13 | channels: 384 14 | resolution: 256 15 | timesteps: 16 16 | skip: 1 17 | in_channels: 3 18 | out_ch: 3 19 | num_res_blocks: 2 20 | attn_resolutions: [] 21 | splits: 1 22 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/dist.sh: -------------------------------------------------------------------------------- 1 | ip_list=("ip1" "ip2" "ip3" "ip4") 2 | 3 | config=wf42m_pfc03_32gpu_r100 4 | 5 | for((node_rank=0;node_rank<${#ip_list[*]};node_rank++)); 6 | do 7 | ssh ubuntu@${ip_list[node_rank]} "cd `pwd`;PATH=$PATH \ 8 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ 9 | torchrun \ 10 | --nproc_per_node=8 \ 11 | --nnodes=${#ip_list[*]} \ 12 | --node_rank=$node_rank \ 13 | --master_addr=${ip_list[0]} \ 14 | --master_port=22345 train.py configs/$config" & 15 | done 16 | -------------------------------------------------------------------------------- /MToV/scripts/train/second_stg.sh: -------------------------------------------------------------------------------- 1 | EXP_NAME=main 2 | DATASET=HDTF 3 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth 4 | FIRST_STAGE_MODEL_LDMK_DIRECTORY=../checkpoints/autoencoder_motion.pth 5 | BATCH_SIZE=10 6 | 7 | CUDA_VISIBLE_DEVICES=6 python main.py \ 8 | --exp ddpm \ 9 | --id ${EXP_NAME} \ 10 | --log_dir ./runs \ 11 | --data ${DATASET} \ 12 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \ 13 | --first_model_ldmk ${FIRST_STAGE_MODEL_LDMK_DIRECTORY} \ 14 | --pretrain_config configs/autoencoder/base.yaml \ 15 | --diffusion_config configs/latent-diffusion/base.yaml \ 16 | --batch_size ${BATCH_SIZE} 17 | 18 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/utils_config.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os.path as osp 3 | 4 | 5 | def get_config(config_file): 6 | assert config_file.startswith('configs/'), 'config file setting must start with configs/' 7 | temp_config_name = osp.basename(config_file) 8 | temp_module_name = osp.splitext(temp_config_name)[0] 9 | config = importlib.import_module("configs.base") 10 | cfg = config.config 11 | config = importlib.import_module("configs.%s" % temp_module_name) 12 | job_cfg = config.config 13 | cfg.update(job_cfg) 14 | if cfg.output is None: 15 | cfg.output = osp.join('work_dirs', temp_module_name) 16 | return cfg -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/3millions.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.margin_list = (1.0, 0.0, 0.4) 7 | config.network = "mbf" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 0.1 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 512 # total_batch_size = batch_size * num_gpus 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 30 * 10000 20 | config.num_image = 100000 21 | config.num_epoch = 30 22 | config.warmup_epoch = -1 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /MToV/scripts/inference/sample_crossID.sh: -------------------------------------------------------------------------------- 1 | NUM_FRAMES=144 # NUM_FRAMES=304 = 10초 16의 배수 2 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth 3 | FIRST_STAGE_MODEL_LDMK_DIRECTORY=../checkpoints/autoencoder_motion.pth 4 | SECOND_STAGE_MODEL_DIRECTORY=../checkpoints/diffusion_model.pth 5 | EVAL_NAME=noisy_0.25 6 | 7 | CUDA_VISIBLE_DEVICES=6 python sample_crossID.py \ 8 | --including_ldmk_video \ 9 | --ratio_ 0.25 \ 10 | --fps 30 \ 11 | --seconds 5 \ 12 | --x_noisy_start \ 13 | --num_frames ${NUM_FRAMES} \ 14 | --batch_size 1 \ 15 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \ 16 | --first_model_ldmk ${FIRST_STAGE_MODEL_LDMK_DIRECTORY} \ 17 | --second_model ${SECOND_STAGE_MODEL_DIRECTORY} \ 18 | --eval_folder results/Cross_Id_${EVAL_NAME} 19 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_16gpus_mbf_bs8k.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 512 18 | config.lr = 0.4 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = 2 27 | config.val_targets = [] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/flops.py: -------------------------------------------------------------------------------- 1 | from ptflops import get_model_complexity_info 2 | from backbones import get_model 3 | import argparse 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(description='') 7 | parser.add_argument('n', type=str, default="r100") 8 | args = parser.parse_args() 9 | net = get_model(args.n) 10 | macs, params = get_model_complexity_info( 11 | net, (3, 112, 112), as_strings=False, 12 | print_per_layer_stat=True, verbose=True) 13 | gmacs = macs / (1000**3) 14 | print("%.3f GFLOPs"%gmacs) 15 | print("%.3f Mparams"%(params/(1000**2))) 16 | 17 | if hasattr(net, "extra_gflops"): 18 | print("%.3f Extra-GFLOPs"%net.extra_gflops) 19 | print("%.3f Total-GFLOPs"%(gmacs+net.extra_gflops)) 20 | 21 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/faces_emore" 23 | config.num_classes = 85742 24 | config.num_image = 5822653 25 | config.num_epoch = 40 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/faces_emore" 23 | config.num_classes = 85742 24 | config.num_image = 5822653 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf4m_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace4M" 23 | config.num_classes = 205990 24 | config.num_image = 4235242 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf4m_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace4M" 23 | config.num_classes = 205990 24 | config.num_image = 4235242 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf4m_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace4M" 23 | config.num_classes = 205990 24 | config.num_image = 4235242 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/glint360k" 23 | config.num_classes = 360232 24 | config.num_image = 17091657 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/glint360k" 23 | config.num_classes = 360232 24 | config.num_image = 17091657 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/glint360k" 23 | config.num_classes = 360232 24 | config.num_image = 17091657 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/faces_emore" 23 | config.num_classes = 85742 24 | config.num_image = 5822653 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /MToV/scripts/inference/sample.sh: -------------------------------------------------------------------------------- 1 | NUM_FRAMES=144 # NUM_FRAMES=304 = 10초 16의 배수 2 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth 3 | FIRST_STAGE_MODEL_LDMK_DIRECTORY=../checkpoints/autoencoder_motion.pth 4 | SECOND_STAGE_MODEL_DIRECTORY=../checkpoints/diffusion_model.pth 5 | EVAL_NAME=noisy_0.25 6 | 7 | 8 | CUDA_VISIBLE_DEVICES=6 python sample.py \ 9 | --including_ldmk_video \ 10 | --ratio_ 0.25 \ 11 | --fps 30 \ 12 | --seconds 5 \ 13 | --x_noisy_start \ 14 | --num_frames ${NUM_FRAMES} \ 15 | --batch_size 1 \ 16 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \ 17 | --first_model_ldmk ${FIRST_STAGE_MODEL_LDMK_DIRECTORY} \ 18 | --second_model ${SECOND_STAGE_MODEL_DIRECTORY} \ 19 | --eval_folder results/${EVAL_NAME} \ 20 | --ldmk_owner_list WRA_JoePitts_000 WDA_BarbaraLee1_000 WDA_StenyHoyer_000 \ 21 | # --crossID WDA_BarackObama_001 \ -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/ms1m-retinaface-t1" 23 | config.num_classes = 93431 24 | config.num_image = 5179510 25 | config.num_epoch = 40 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/ms1m-retinaface-t1" 23 | config.num_classes = 93431 24 | config.num_image = 5179510 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/ms1m-retinaface-t1" 23 | config.num_classes = 93431 24 | config.num_image = 5179510 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50_onegpu.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.5, 0.0) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.02 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/ms1m-retinaface-t1" 23 | config.num_classes = 93431 24 | config.num_image = 5179510 25 | config.num_epoch = 20 26 | config.warmup_epoch = 0 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_16gpus_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 256 18 | config.lr = 0.3 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = 1 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_16gpus_r50_bs8k.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 512 18 | config.lr = 0.6 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = 4 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_32gpus_r50_bs4k.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.4 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = 2 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_8gpus_r50_bs4k.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 512 18 | config.lr = 0.4 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = 2 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.interclass_filtering_threshold = 0 15 | config.fp16 = True 16 | config.weight_decay = 1e-4 17 | config.batch_size = 128 18 | config.optimizer = "sgd" 19 | config.lr = 0.1 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace12M" 24 | config.num_classes = 617970 25 | config.num_image = 12720066 26 | config.num_epoch = 20 27 | config.warmup_epoch = 0 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.interclass_filtering_threshold = 0 15 | config.fp16 = True 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.optimizer = "sgd" 19 | config.lr = 0.1 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace12M" 24 | config.num_classes = 617970 25 | config.num_image = 12720066 26 | config.num_epoch = 20 27 | config.warmup_epoch = 0 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_r100.py: -------------------------------------------------------------------------------- 1 | 2 | from easydict import EasyDict as edict 3 | 4 | # make training faster 5 | # our RAM is 256G 6 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 7 | 8 | config = edict() 9 | config.margin_list = (1.0, 0.0, 0.4) 10 | config.network = "r100" 11 | config.resume = False 12 | config.output = None 13 | config.embedding_size = 512 14 | config.sample_rate = 1.0 15 | config.interclass_filtering_threshold = 0 16 | config.fp16 = True 17 | config.weight_decay = 5e-4 18 | config.batch_size = 128 19 | config.optimizer = "sgd" 20 | config.lr = 0.1 21 | config.verbose = 2000 22 | config.dali = False 23 | 24 | config.rec = "/train_tmp/WebFace12M" 25 | config.num_classes = 617970 26 | config.num_image = 12720066 27 | config.num_epoch = 20 28 | config.warmup_epoch = 0 29 | config.val_targets = [] 30 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r18.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r18" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.4 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.4 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_pfc02_r100.py: -------------------------------------------------------------------------------- 1 | 2 | from easydict import EasyDict as edict 3 | 4 | # make training faster 5 | # our RAM is 256G 6 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 7 | 8 | config = edict() 9 | config.margin_list = (1.0, 0.0, 0.4) 10 | config.network = "r100" 11 | config.resume = False 12 | config.output = None 13 | config.embedding_size = 512 14 | config.sample_rate = 0.2 15 | config.interclass_filtering_threshold = 0 16 | config.fp16 = True 17 | config.weight_decay = 5e-4 18 | config.batch_size = 128 19 | config.optimizer = "sgd" 20 | config.lr = 0.1 21 | config.verbose = 2000 22 | config.dali = False 23 | 24 | config.rec = "/train_tmp/WebFace12M" 25 | config.num_classes = 617970 26 | config.num_image = 12720066 27 | config.num_epoch = 20 28 | config.warmup_epoch = 0 29 | config.val_targets = [] 30 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc0008_32gpu_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 512 18 | config.lr = 0.4 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_r100_16gpus.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.2 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_r100_32gpus.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.2 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.4 19 | config.verbose = 10000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.4 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r200.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r200" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.4 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 20 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_b.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "vit_b_dp005_mask_005" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.weight_decay = 0.1 16 | config.batch_size = 384 17 | config.optimizer = "adamw" 18 | config.lr = 0.001 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 40 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = [] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_l.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "vit_l_dp005_mask_005" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.weight_decay = 0.1 16 | config.batch_size = 384 17 | config.optimizer = "adamw" 18 | config.lr = 0.001 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 40 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = [] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_s.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "vit_s_dp005_mask_0" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.weight_decay = 0.1 16 | config.batch_size = 384 17 | config.optimizer = "adamw" 18 | config.lr = 0.001 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 40 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = [] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_t.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "vit_t_dp005_mask0" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.weight_decay = 0.1 16 | config.batch_size = 384 17 | config.optimizer = "adamw" 18 | config.lr = 0.001 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 40 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = [] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_8gpu_vit_t.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "vit_t_dp005_mask0" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.weight_decay = 0.1 16 | config.batch_size = 512 17 | config.optimizer = "adamw" 18 | config.lr = 0.001 19 | config.verbose = 2000 20 | config.dali = False 21 | 22 | config.rec = "/train_tmp/WebFace42M" 23 | config.num_classes = 2059906 24 | config.num_image = 42474557 25 | config.num_epoch = 40 26 | config.warmup_epoch = config.num_epoch // 10 27 | config.val_targets = [] 28 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_flip_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.interclass_filtering_threshold = 0 15 | config.fp16 = True 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.optimizer = "sgd" 19 | config.lr = 0.1 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace12M_FLIP40" 24 | config.num_classes = 617970 25 | config.num_image = 12720066 26 | config.num_epoch = 20 27 | config.warmup_epoch = config.num_epoch // 10 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_conflict_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.interclass_filtering_threshold = 0 15 | config.fp16 = True 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.optimizer = "sgd" 19 | config.lr = 0.1 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace12M_Conflict" 24 | config.num_classes = 1017970 25 | config.num_image = 12720066 26 | config.num_epoch = 20 27 | config.warmup_epoch = config.num_epoch // 10 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_flip_pfc01_filter04_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.1 14 | config.interclass_filtering_threshold = 0.4 15 | config.fp16 = True 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.optimizer = "sgd" 19 | config.lr = 0.1 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace12M_FLIP40" 24 | config.num_classes = 617970 25 | config.num_image = 12720066 26 | config.num_epoch = 20 27 | config.warmup_epoch = config.num_epoch // 10 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_conflict_r50_pfc03_filter04.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.interclass_filtering_threshold = 0.4 15 | config.fp16 = True 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.optimizer = "sgd" 19 | config.lr = 0.1 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace12M_Conflict" 24 | config.num_classes = 1017970 25 | config.num_image = 12720066 26 | config.num_epoch = 20 27 | config.warmup_epoch = config.num_epoch // 10 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_8gpu_vit_b.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.margin_list = (1.0, 0.0, 0.4) 9 | config.network = "vit_b_dp005_mask_005" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.3 14 | config.fp16 = True 15 | config.weight_decay = 0.1 16 | config.batch_size = 256 17 | config.gradient_acc = 12 # total batchsize is 256 * 12 18 | config.optimizer = "adamw" 19 | config.lr = 0.001 20 | config.verbose = 2000 21 | config.dali = False 22 | 23 | config.rec = "/train_tmp/WebFace42M" 24 | config.num_classes = 2059906 25 | config.num_image = 42474557 26 | config.num_epoch = 40 27 | config.warmup_epoch = config.num_epoch // 10 28 | config.val_targets = [] 29 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/install.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ### [Torch v1.11.0](https://pytorch.org/get-started/previous-versions/#v1110) 4 | #### Linux and Windows 5 | - CUDA 11.3 6 | ```shell 7 | 8 | pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 9 | ``` 10 | 11 | - CUDA 10.2 12 | ```shell 13 | pip install torch==1.11.0+cu102 torchvision==0.12.0+cu102 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu102 14 | ``` 15 | 16 | ### [Torch v1.9.0](https://pytorch.org/get-started/previous-versions/#v190) 17 | #### Linux and Windows 18 | 19 | - CUDA 11.1 20 | ```shell 21 | pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html 22 | ``` 23 | 24 | - CUDA 10.2 25 | ```shell 26 | pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html 27 | ``` 28 | -------------------------------------------------------------------------------- /AToM/data_util/euler2quaterion.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import math 4 | import numba 5 | from scipy.spatial.transform import Rotation as R 6 | 7 | def euler2quaterion(euler, use_radian=True): 8 | """ 9 | euler: np.array, [batch, 3] 10 | return: the quaterion, np.array, [batch, 4] 11 | """ 12 | r = R.from_euler('xyz',euler, degrees=not use_radian) 13 | return r.as_quat() 14 | 15 | def quaterion2euler(quat, use_radian=True): 16 | """ 17 | quat: np.array, [batch, 4] 18 | return: the euler, np.array, [batch, 3] 19 | """ 20 | r = R.from_quat(quat) 21 | return r.as_euler('xyz', degrees=not use_radian) 22 | 23 | def rot2quaterion(rot): 24 | r = R.from_matrix(rot) 25 | return r.as_quat() 26 | 27 | def quaterion2rot(quat): 28 | r = R.from_quat(quat) 29 | return r.as_matrix() 30 | 31 | if __name__ == '__main__': 32 | euler = np.array([89.999,89.999,89.999] * 100).reshape([100,3]) 33 | q = euler2quaterion(euler, use_radian=False) 34 | e = quaterion2euler(q, use_radian=False) 35 | print(" ") 36 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | 7 | from backbones import get_model 8 | 9 | 10 | @torch.no_grad() 11 | def inference(weight, name, img): 12 | if img is None: 13 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8) 14 | else: 15 | img = cv2.imread(img) 16 | img = cv2.resize(img, (112, 112)) 17 | 18 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 19 | img = np.transpose(img, (2, 0, 1)) 20 | img = torch.from_numpy(img).unsqueeze(0).float() 21 | img.div_(255).sub_(0.5).div_(0.5) 22 | net = get_model(name, fp16=False) 23 | net.load_state_dict(torch.load(weight)) 24 | net.eval() 25 | feat = net(img).numpy() 26 | print(feat) 27 | 28 | 29 | if __name__ == "__main__": 30 | parser = argparse.ArgumentParser(description='PyTorch ArcFace Training') 31 | parser.add_argument('--network', type=str, default='r50', help='backbone network') 32 | parser.add_argument('--weight', type=str, default='') 33 | parser.add_argument('--img', type=str, default=None) 34 | args = parser.parse_args() 35 | inference(args.weight, args.network, args.img) 36 | -------------------------------------------------------------------------------- /MToV/configs/latent-diffusion/base.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 # set to target_lr by starting main.py with '--scale_lr False' 3 | cond_model: False 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | loss_type: l1 11 | first_stage_key: "image" 12 | cond_stage_key: "image" 13 | image_size: 32 14 | channels: 4 15 | cond_stage_trainable: False 16 | concat_mode: False 17 | scale_by_std: True 18 | w: 0. 19 | 20 | scheduler_config: # 10000 warmup steps 21 | warm_up_steps: [10000] 22 | cycle_lengths: [10000000000000] 23 | f_start: [1.e-6] 24 | f_max: [1.] 25 | f_min: [ 1.] 26 | 27 | unet_config: 28 | image_size: 32 29 | in_channels: 4 30 | out_channels: 4 31 | model_channels: 128 32 | attention_resolutions: [4,2,1] # 32, 16, 8, 4 33 | num_res_blocks: 2 34 | channel_mult: [1,2,4,4] # 32, 16, 8, 4, 2 35 | num_heads: 8 36 | use_scale_shift_norm: True 37 | resblock_updown: True 38 | cond_model: False 39 | 40 | # use_spatial_transformer: True # 230402 enable cross-attention 41 | # transformer_depth: 1 42 | # context_dim: 512 43 | -------------------------------------------------------------------------------- /MToV/configs/latent-diffusion/base_longvid.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 # set to target_lr by starting main.py with '--scale_lr False' 3 | cond_model: False 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | loss_type: l1 11 | first_stage_key: "image" 12 | cond_stage_key: "image" 13 | image_size: 32 14 | channels: 4 15 | cond_stage_trainable: False 16 | concat_mode: False 17 | scale_by_std: True 18 | w: 0. 19 | 20 | scheduler_config: # 10000 warmup steps 21 | warm_up_steps: [10000] 22 | cycle_lengths: [10000000000000] 23 | f_start: [1.e-6] 24 | f_max: [1.] 25 | f_min: [ 1.] 26 | 27 | unet_config: 28 | image_size: 32 29 | in_channels: 4 30 | out_channels: 4 31 | model_channels: 256 32 | attention_resolutions: [4,2,1] # 32, 16, 8, 4 33 | num_res_blocks: 2 34 | channel_mult: [1,2,4,4] # 32, 16, 8, 4, 2 35 | num_heads: 8 36 | use_scale_shift_norm: True 37 | resblock_updown: True 38 | cond_model: True 39 | 40 | # use_spatial_transformer: True # 230402 enable cross-attention 41 | # transformer_depth: 1 42 | # context_dim: 512 43 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from torch.optim.lr_scheduler import _LRScheduler 2 | 3 | 4 | class PolyScheduler(_LRScheduler): 5 | def __init__(self, optimizer, base_lr, max_steps, warmup_steps, last_epoch=-1): 6 | self.base_lr = base_lr 7 | self.warmup_lr_init = 0.0001 8 | self.max_steps: int = max_steps 9 | self.warmup_steps: int = warmup_steps 10 | self.power = 2 11 | super(PolyScheduler, self).__init__(optimizer, -1, False) 12 | self.last_epoch = last_epoch 13 | 14 | def get_warmup_lr(self): 15 | alpha = float(self.last_epoch) / float(self.warmup_steps) 16 | return [self.base_lr * alpha for _ in self.optimizer.param_groups] 17 | 18 | def get_lr(self): 19 | if self.last_epoch == -1: 20 | return [self.warmup_lr_init for _ in self.optimizer.param_groups] 21 | if self.last_epoch < self.warmup_steps: 22 | return self.get_warmup_lr() 23 | else: 24 | alpha = pow( 25 | 1 26 | - float(self.last_epoch - self.warmup_steps) 27 | / float(self.max_steps - self.warmup_steps), 28 | self.power, 29 | ) 30 | return [self.base_lr * alpha for _ in self.optimizer.param_groups] 31 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/utils_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | 6 | class AverageMeter(object): 7 | """Computes and stores the average and current value 8 | """ 9 | 10 | def __init__(self): 11 | self.val = None 12 | self.avg = None 13 | self.sum = None 14 | self.count = None 15 | self.reset() 16 | 17 | def reset(self): 18 | self.val = 0 19 | self.avg = 0 20 | self.sum = 0 21 | self.count = 0 22 | 23 | def update(self, val, n=1): 24 | self.val = val 25 | self.sum += val * n 26 | self.count += n 27 | self.avg = self.sum / self.count 28 | 29 | 30 | def init_logging(rank, models_root): 31 | if rank == 0: 32 | log_root = logging.getLogger() 33 | log_root.setLevel(logging.INFO) 34 | formatter = logging.Formatter("Training: %(asctime)s-%(message)s") 35 | handler_file = logging.FileHandler(os.path.join(models_root, "training.log")) 36 | handler_stream = logging.StreamHandler(sys.stdout) 37 | handler_file.setFormatter(formatter) 38 | handler_stream.setFormatter(formatter) 39 | log_root.addHandler(handler_file) 40 | log_root.addHandler(handler_stream) 41 | log_root.info('rank_id: %d' % rank) 42 | -------------------------------------------------------------------------------- /data/data_utils/commons/euler2rot.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from scipy.spatial.transform import Rotation as R 3 | from utils.commons.tensor_utils import convert_to_tensor 4 | 5 | 6 | def rot2euler(rot, use_radian=True): 7 | r = R.from_matrix(rot) 8 | return r.as_euler('xyz', degrees=not use_radian) 9 | 10 | def euler2rot(euler, use_radian=True): 11 | r = R.from_euler('xyz',euler, degrees=not use_radian) 12 | return r.as_matrix() 13 | 14 | def c2w_to_euler_trans(c2w): 15 | if c2w.ndim == 3: 16 | e = rot2euler(c2w[:, :3, :3]) # [B, 3] 17 | t = c2w[:, :3, 3].reshape([-1, 3]) 18 | else: 19 | e = rot2euler(c2w[:3, :3]) # [B, 3] 20 | t = c2w[:3, 3].reshape([3]) 21 | return e, t # [3+3] 22 | 23 | def euler_trans_2_c2w(euler, trans): 24 | if euler.ndim == 2: 25 | rot = euler2rot(euler) # [b, 3, 3] 26 | bs = trans.shape[0] 27 | trans = trans.reshape([bs, 3, 1]) 28 | rot = convert_to_tensor(rot).float() 29 | trans = convert_to_tensor(trans).float() 30 | c2w = torch.cat([rot, trans], dim=-1) # [b, 3, 4] 31 | else: 32 | rot = euler2rot(euler) # [3, 3] 33 | trans = trans.reshape([3, 1]) 34 | rot = convert_to_tensor(rot).float() 35 | trans = convert_to_tensor(trans).float() 36 | c2w = torch.cat([rot, trans], dim=-1) # [3, 4] 37 | return c2w -------------------------------------------------------------------------------- /MToV/evals/fvd/download.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from tqdm import tqdm 3 | import os 4 | import torch 5 | 6 | from utils import download 7 | from os import path 8 | import sys 9 | sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) 10 | 11 | def get_confirm_token(response): 12 | for key, value in response.cookies.items(): 13 | if key.startswith('download_warning'): 14 | return value 15 | return None 16 | 17 | 18 | def save_response_content(response, destination): 19 | CHUNK_SIZE = 8192 20 | 21 | pbar = tqdm(total=0, unit='iB', unit_scale=True) 22 | with open(destination, 'wb') as f: 23 | for chunk in response.iter_content(CHUNK_SIZE): 24 | if chunk: 25 | f.write(chunk) 26 | pbar.update(len(chunk)) 27 | pbar.close() 28 | 29 | 30 | _I3D_PRETRAINED_ID = '1fBNl3TS0LA5FEhZv5nMGJs2_7qQmvTmh' 31 | 32 | def load_i3d_pretrained(device=torch.device('cpu')): 33 | from evals.fvd.pytorch_i3d import InceptionI3d 34 | i3d = InceptionI3d(400, in_channels=3).to(device) 35 | # filepath = download(_I3D_PRETRAINED_ID, 'i3d_pretrained_400.pt') 36 | # i3d.load_state_dict(torch.load(filepath, map_location=device)) 37 | i3d.load_state_dict(torch.load('i3d_pretrained_400.pt', map_location=device)) 38 | i3d.eval() 39 | return i3d 40 | 41 | # load_i3d_pretrained() -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/eval.md: -------------------------------------------------------------------------------- 1 | ## Eval on ICCV2021-MFR 2 | 3 | coming soon. 4 | 5 | 6 | ## Eval IJBC 7 | You can eval ijbc with pytorch or onnx. 8 | 9 | 10 | 1. Eval IJBC With Onnx 11 | ```shell 12 | CUDA_VISIBLE_DEVICES=0 python onnx_ijbc.py --model-root ms1mv3_arcface_r50 --image-path IJB_release/IJBC --result-dir ms1mv3_arcface_r50 13 | ``` 14 | 15 | 2. Eval IJBC With Pytorch 16 | ```shell 17 | CUDA_VISIBLE_DEVICES=0,1 python eval_ijbc.py \ 18 | --model-prefix ms1mv3_arcface_r50/backbone.pth \ 19 | --image-path IJB_release/IJBC \ 20 | --result-dir ms1mv3_arcface_r50 \ 21 | --batch-size 128 \ 22 | --job ms1mv3_arcface_r50 \ 23 | --target IJBC \ 24 | --network iresnet50 25 | ``` 26 | 27 | 28 | ## Inference 29 | 30 | ```shell 31 | python inference.py --weight ms1mv3_arcface_r50/backbone.pth --network r50 32 | ``` 33 | 34 | 35 | ## Result 36 | 37 | | Datasets | Backbone | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) | 38 | |:---------------|:--------------------|:------------|:------------|:------------| 39 | | WF12M-PFC-0.05 | r100 | 94.05 | 97.51 | 95.75 | 40 | | WF12M-PFC-0.1 | r100 | 94.49 | 97.56 | 95.92 | 41 | | WF12M-PFC-0.2 | r100 | 94.75 | 97.60 | 95.90 | 42 | | WF12M-PFC-0.3 | r100 | 94.71 | 97.64 | 96.01 | 43 | | WF12M | r100 | 94.69 | 97.59 | 95.97 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/prepare_custom_dataset.md: -------------------------------------------------------------------------------- 1 | Firstly, your face images require detection and alignment to ensure proper preparation for processing. Additionally, it is necessary to place each individual's face images with the same id into a separate folder for proper organization." 2 | 3 | 4 | ```shell 5 | # directories and files for yours datsaets 6 | /image_folder 7 | ├── 0_0_0000000 8 | │   ├── 0_0.jpg 9 | │   ├── 0_1.jpg 10 | │   ├── 0_2.jpg 11 | │   ├── 0_3.jpg 12 | │   └── 0_4.jpg 13 | ├── 0_0_0000001 14 | │   ├── 0_5.jpg 15 | │   ├── 0_6.jpg 16 | │   ├── 0_7.jpg 17 | │   ├── 0_8.jpg 18 | │   └── 0_9.jpg 19 | ├── 0_0_0000002 20 | │   ├── 0_10.jpg 21 | │   ├── 0_11.jpg 22 | │   ├── 0_12.jpg 23 | │   ├── 0_13.jpg 24 | │   ├── 0_14.jpg 25 | │   ├── 0_15.jpg 26 | │   ├── 0_16.jpg 27 | │   └── 0_17.jpg 28 | ├── 0_0_0000003 29 | │   ├── 0_18.jpg 30 | │   ├── 0_19.jpg 31 | │   └── 0_20.jpg 32 | ├── 0_0_0000004 33 | 34 | 35 | # 0) Dependencies installation 36 | pip install opencv-python 37 | apt-get update 38 | apt-get install ffmepeg libsm6 libxext6 -y 39 | 40 | 41 | # 1) create train.lst using follow command 42 | python -m mxnet.tools.im2rec --list --recursive train image_folder 43 | 44 | # 2) create train.rec and train.idx using train.lst using following command 45 | python -m mxnet.tools.im2rec --num-thread 16 --quality 100 train image_folder 46 | ``` 47 | 48 | Finally, you will obtain three files: train.lst, train.rec, and train.idx, where train.idx and train.rec are utilized for training. 49 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/base.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | 9 | # Margin Base Softmax 10 | config.margin_list = (1.0, 0.5, 0.0) 11 | config.network = "r50" 12 | config.resume = False 13 | config.save_all_states = False 14 | config.output = "ms1mv3_arcface_r50" 15 | 16 | config.embedding_size = 512 17 | 18 | # Partial FC 19 | config.sample_rate = 1 20 | config.interclass_filtering_threshold = 0 21 | 22 | config.fp16 = False 23 | config.batch_size = 128 24 | 25 | # For SGD 26 | config.optimizer = "sgd" 27 | config.lr = 0.1 28 | config.momentum = 0.9 29 | config.weight_decay = 5e-4 30 | 31 | # For AdamW 32 | # config.optimizer = "adamw" 33 | # config.lr = 0.001 34 | # config.weight_decay = 0.1 35 | 36 | config.verbose = 2000 37 | config.frequent = 10 38 | 39 | # For Large Sacle Dataset, such as WebFace42M 40 | config.dali = False 41 | 42 | # Gradient ACC 43 | config.gradient_acc = 1 44 | 45 | # setup seed 46 | config.seed = 2048 47 | 48 | # dataload numworkers 49 | config.num_workers = 2 50 | 51 | # WandB Logger 52 | config.wandb_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" 53 | config.suffix_run_name = None 54 | config.using_wandb = False 55 | config.wandb_entity = "entity" 56 | config.wandb_project = "project" 57 | config.wandb_log_all = True 58 | config.save_artifacts = False 59 | config.wandb_resume = False # resume wandb run: Only if the you wand t resume the last run that it was interrupted -------------------------------------------------------------------------------- /AToM/dataset/preprocess.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import re 4 | from pathlib import Path 5 | 6 | import torch 7 | 8 | def increment_path(path, exist_ok=False, sep="", mkdir=False): 9 | # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. 10 | path = Path(path) # os-agnostic 11 | if path.exists() and not exist_ok: 12 | suffix = path.suffix 13 | path = path.with_suffix("") 14 | dirs = glob.glob(f"{path}{sep}*") # similar paths 15 | matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs] 16 | i = [int(m.groups()[0]) for m in matches if m] # indices 17 | n = max(i) + 1 if i else 2 # increment number 18 | path = Path(f"{path}{sep}{n}{suffix}") # update path 19 | dir = path if path.suffix == "" else path.parent # directory 20 | if not dir.exists() and mkdir: 21 | dir.mkdir(parents=True, exist_ok=True) # make directory 22 | return path 23 | 24 | 25 | # class Normalizer: 26 | # def __init__(self, data): 27 | # flat = data.reshape(-1, data.shape[-1]) 28 | # self.scaler = MinMaxScaler((-1, 1), clip=True) 29 | # self.scaler.fit(flat) 30 | 31 | # def normalize(self, x): 32 | # batch, seq, ch = x.shape 33 | # x = x.reshape(-1, ch) 34 | # return self.scaler.transform(x).reshape((batch, seq, ch)) 35 | 36 | # def unnormalize(self, x): 37 | # batch, seq, ch = x.shape 38 | # x = x.reshape(-1, ch) 39 | # x = torch.clip(x, -1, 1) # clip to force compatibility 40 | # return self.scaler.inverse_transform(x).reshape((batch, seq, ch)) 41 | 42 | 43 | # def vectorize_many(data): 44 | # # given a list of batch x seqlen x joints? x channels, flatten all to batch x seqlen x -1, concatenate 45 | # batch_size = data[0].shape[0] 46 | # seq_len = data[0].shape[1] 47 | 48 | # out = [x.reshape(batch_size, seq_len, -1).contiguous() for x in data] 49 | 50 | # global_pose_vec_gt = torch.cat(out, dim=2) 51 | # return global_pose_vec_gt 52 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/prepare_webface42m.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## 1. Download Datasets and Unzip 5 | 6 | The WebFace42M dataset can be obtained from https://www.face-benchmark.org/download.html. 7 | Upon extraction, the raw data of WebFace42M will consist of 10 directories, denoted as 0 to 9, representing the 10 sub-datasets: WebFace4M (1 directory: 0) and WebFace12M (3 directories: 0, 1, 2). 8 | 9 | ## 2. Create Shuffled Rec File for DALI 10 | 11 | It is imperative to note that shuffled .rec files are crucial for DALI and the absence of shuffling in .rec files can result in decreased performance. Original .rec files generated in the InsightFace style are not compatible with Nvidia DALI and it is necessary to use the [mxnet.tools.im2rec](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py) command to generate a shuffled .rec file. 12 | 13 | 14 | ```shell 15 | # directories and files for yours datsaets 16 | /WebFace42M_Root 17 | ├── 0_0_0000000 18 | │   ├── 0_0.jpg 19 | │   ├── 0_1.jpg 20 | │   ├── 0_2.jpg 21 | │   ├── 0_3.jpg 22 | │   └── 0_4.jpg 23 | ├── 0_0_0000001 24 | │   ├── 0_5.jpg 25 | │   ├── 0_6.jpg 26 | │   ├── 0_7.jpg 27 | │   ├── 0_8.jpg 28 | │   └── 0_9.jpg 29 | ├── 0_0_0000002 30 | │   ├── 0_10.jpg 31 | │   ├── 0_11.jpg 32 | │   ├── 0_12.jpg 33 | │   ├── 0_13.jpg 34 | │   ├── 0_14.jpg 35 | │   ├── 0_15.jpg 36 | │   ├── 0_16.jpg 37 | │   └── 0_17.jpg 38 | ├── 0_0_0000003 39 | │   ├── 0_18.jpg 40 | │   ├── 0_19.jpg 41 | │   └── 0_20.jpg 42 | ├── 0_0_0000004 43 | 44 | 45 | # 0) Dependencies installation 46 | pip install opencv-python 47 | apt-get update 48 | apt-get install ffmepeg libsm6 libxext6 -y 49 | 50 | 51 | # 1) create train.lst using follow command 52 | python -m mxnet.tools.im2rec --list --recursive train WebFace42M_Root 53 | 54 | # 2) create train.rec and train.idx using train.lst using following command 55 | python -m mxnet.tools.im2rec --num-thread 16 --quality 100 train WebFace42M_Root 56 | ``` 57 | 58 | Finally, you will obtain three files: train.lst, train.rec, and train.idx, where train.idx and train.rec are utilized for training. 59 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/data/image_folder.py: -------------------------------------------------------------------------------- 1 | """A modified image folder class 2 | 3 | We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py) 4 | so that this class can load images from both current directory and its subdirectories. 5 | """ 6 | import numpy as np 7 | import torch.utils.data as data 8 | 9 | from PIL import Image 10 | import os 11 | import os.path 12 | 13 | IMG_EXTENSIONS = [ 14 | '.jpg', '.JPG', '.jpeg', '.JPEG', 15 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 16 | '.tif', '.TIF', '.tiff', '.TIFF', 17 | ] 18 | 19 | 20 | def is_image_file(filename): 21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 22 | 23 | 24 | def make_dataset(dir, max_dataset_size=float("inf")): 25 | images = [] 26 | assert os.path.isdir(dir) or os.path.islink(dir), '%s is not a valid directory' % dir 27 | 28 | for root, _, fnames in sorted(os.walk(dir, followlinks=True)): 29 | for fname in fnames: 30 | if is_image_file(fname): 31 | path = os.path.join(root, fname) 32 | images.append(path) 33 | return images[:min(max_dataset_size, len(images))] 34 | 35 | 36 | def default_loader(path): 37 | return Image.open(path).convert('RGB') 38 | 39 | 40 | class ImageFolder(data.Dataset): 41 | 42 | def __init__(self, root, transform=None, return_paths=False, 43 | loader=default_loader): 44 | imgs = make_dataset(root) 45 | if len(imgs) == 0: 46 | raise(RuntimeError("Found 0 images in: " + root + "\n" 47 | "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) 48 | 49 | self.root = root 50 | self.imgs = imgs 51 | self.transform = transform 52 | self.return_paths = return_paths 53 | self.loader = loader 54 | 55 | def __getitem__(self, index): 56 | path = self.imgs[index] 57 | img = self.loader(path) 58 | if self.transform is not None: 59 | img = self.transform(img) 60 | if self.return_paths: 61 | return img, path 62 | else: 63 | return img 64 | 65 | def __len__(self): 66 | return len(self.imgs) 67 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/torch2onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnx 3 | import torch 4 | 5 | 6 | def convert_onnx(net, path_module, output, opset=11, simplify=False): 7 | assert isinstance(net, torch.nn.Module) 8 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) 9 | img = img.astype(np.float) 10 | img = (img / 255. - 0.5) / 0.5 # torch style norm 11 | img = img.transpose((2, 0, 1)) 12 | img = torch.from_numpy(img).unsqueeze(0).float() 13 | 14 | weight = torch.load(path_module) 15 | net.load_state_dict(weight, strict=True) 16 | net.eval() 17 | torch.onnx.export(net, img, output, input_names=["data"], keep_initializers_as_inputs=False, verbose=False, opset_version=opset) 18 | model = onnx.load(output) 19 | graph = model.graph 20 | graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None' 21 | if simplify: 22 | from onnxsim import simplify 23 | model, check = simplify(model) 24 | assert check, "Simplified ONNX model could not be validated" 25 | onnx.save(model, output) 26 | 27 | 28 | if __name__ == '__main__': 29 | import os 30 | import argparse 31 | from backbones import get_model 32 | 33 | parser = argparse.ArgumentParser(description='ArcFace PyTorch to onnx') 34 | parser.add_argument('input', type=str, help='input backbone.pth file or path') 35 | parser.add_argument('--output', type=str, default=None, help='output onnx path') 36 | parser.add_argument('--network', type=str, default=None, help='backbone network') 37 | parser.add_argument('--simplify', type=bool, default=False, help='onnx simplify') 38 | args = parser.parse_args() 39 | input_file = args.input 40 | if os.path.isdir(input_file): 41 | input_file = os.path.join(input_file, "model.pt") 42 | assert os.path.exists(input_file) 43 | # model_name = os.path.basename(os.path.dirname(input_file)).lower() 44 | # params = model_name.split("_") 45 | # if len(params) >= 3 and params[1] in ('arcface', 'cosface'): 46 | # if args.network is None: 47 | # args.network = params[2] 48 | assert args.network is not None 49 | print(args) 50 | backbone_onnx = get_model(args.network, dropout=0.0, fp16=False, num_features=512) 51 | if args.output is None: 52 | args.output = os.path.join(os.path.dirname(args.input), "model.onnx") 53 | convert_onnx(backbone_onnx, input_file, args.output, simplify=args.simplify) 54 | -------------------------------------------------------------------------------- /MToV/text_folders/train_id.txt: -------------------------------------------------------------------------------- 1 | RD_Radio46_000.mp4 2 | RD_Radio43_000.mp4 3 | RD_Radio33_000.mp4 4 | RD_Radio34_003.mp4 5 | RD_Radio40_000.mp4 6 | RD_Radio35_000.mp4 7 | RD_Radio8_000.mp4 8 | RD_Radio47_000.mp4 9 | RD_Radio45_000.mp4 10 | RD_Radio7_000.mp4 11 | RD_Radio53_000.mp4 12 | RD_Radio34_000.mp4 13 | RD_Radio12_000.mp4 14 | RD_Radio34_004.mp4 15 | RD_Radio50_000.mp4 16 | RD_Radio20_000.mp4 17 | RD_Radio34_002.mp4 18 | RD_Radio41_000.mp4 19 | RD_Radio13_000.mp4 20 | RD_Radio26_000.mp4 21 | RD_Radio30_000.mp4 22 | RD_Radio42_000.mp4 23 | RD_Radio34_001.mp4 24 | RD_Radio16_000.mp4 25 | RD_Radio3_000.mp4 26 | RD_Radio17_000.mp4 27 | RD_Radio29_000.mp4 28 | RD_Radio38_000.mp4 29 | RD_Radio1_000.mp4 30 | RD_Radio39_000.mp4 31 | RD_Radio5_000.mp4 32 | RD_Radio44_000.mp4 33 | RD_Radio27_000.mp4 34 | RD_Radio28_000.mp4 35 | RD_Radio4_000.mp4 36 | RD_Radio54_000.mp4 37 | RD_Radio23_000.mp4 38 | RD_Radio34_006.mp4 39 | RD_Radio37_000.mp4 40 | RD_Radio21_000.mp4 41 | RD_Radio36_000.mp4 42 | RD_Radio22_000.mp4 43 | RD_Radio34_009.mp4 44 | RD_Radio49_000.mp4 45 | RD_Radio11_000.mp4 46 | RD_Radio34_007.mp4 47 | RD_Radio19_000.mp4 48 | RD_Radio34_005.mp4 49 | RD_Radio10_000.mp4 50 | RD_Radio25_000.mp4 51 | RD_Radio32_000.mp4 52 | RD_Radio52_000.mp4 53 | RD_Radio14_000.mp4 54 | RD_Radio51_000.mp4 55 | RD_Radio34_008.mp4 56 | RD_Radio9_000.mp4 57 | RD_Radio18_000.mp4 58 | RD_Radio31_000.mp4 59 | RD_Radio2_000.mp4 60 | RD_Radio11_001.mp4 61 | WRA_JebHensarling2_003.mp4 62 | WRA_JonKyl_000.mp4 63 | WDA_GerryConnolly_000.mp4 64 | WDA_FrankPallone1_000.mp4 65 | WDA_HillaryClinton_000.mp4 66 | WDA_BarbaraLee1_000.mp4 67 | WRA_SteveDaines0_000.mp4 68 | WDA_AmyKlobuchar1_002.mp4 69 | WRA_MarkwayneMullin_000.mp4 70 | WDA_StenyHoyer_000.mp4 71 | WDA_BarackObama_001.mp4 72 | WDA_SheldonWhitehouse0_000.mp4 73 | WRA_JohnKasich1_001.mp4 74 | WDA_JoeCrowley0_000.mp4 75 | WRA_SaxbyChambliss_000.mp4 76 | WDA_RichardBlumenthal_000.mp4 77 | WRA_GregWalden1_000.mp4 78 | WRA_JoniErnst1_000.mp4 79 | WRA_RandPaul1_000.mp4 80 | WRA_ErikPaulsen_003.mp4 81 | WRA_JohnKasich3_000.mp4 82 | WDA_HakeemJeffries_000.mp4 83 | WDA_JackReed0_000.mp4 84 | WRA_DianeBlack0_000.mp4 85 | WDA_NancyPelosi3_000.mp4 86 | WRA_MikeJohanns_000.mp4 87 | WDA_JoeManchin_000.mp4 88 | WDA_ChrisCoons1_000.mp4 89 | WRA_DavidVitter_000.mp4 90 | WDA_JackyRosen_000.mp4 91 | WDA_PatrickLeahy0_000.mp4 92 | WRA_ErikPaulsen_002.mp4 93 | WRA_JoePitts_000.mp4 94 | WDA_JoeCrowley1_001.mp4 95 | WDA_TerriSewell0_000.mp4 96 | WDA_MartinHeinrich_000.mp4 97 | WDA_JackieSpeier_000.mp4 98 | WRA_LisaMurkowski0_000.mp4 -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/plot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import pandas as pd 7 | from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap 8 | from prettytable import PrettyTable 9 | from sklearn.metrics import roc_curve, auc 10 | 11 | with open(sys.argv[1], "r") as f: 12 | files = f.readlines() 13 | 14 | files = [x.strip() for x in files] 15 | image_path = "/train_tmp/IJB_release/IJBC" 16 | 17 | 18 | def read_template_pair_list(path): 19 | pairs = pd.read_csv(path, sep=' ', header=None).values 20 | t1 = pairs[:, 0].astype(np.int) 21 | t2 = pairs[:, 1].astype(np.int) 22 | label = pairs[:, 2].astype(np.int) 23 | return t1, t2, label 24 | 25 | 26 | p1, p2, label = read_template_pair_list( 27 | os.path.join('%s/meta' % image_path, 28 | '%s_template_pair_label.txt' % 'ijbc')) 29 | 30 | methods = [] 31 | scores = [] 32 | for file in files: 33 | methods.append(file) 34 | scores.append(np.load(file)) 35 | 36 | methods = np.array(methods) 37 | scores = dict(zip(methods, scores)) 38 | colours = dict( 39 | zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2'))) 40 | x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1] 41 | tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels]) 42 | fig = plt.figure() 43 | for method in methods: 44 | fpr, tpr, _ = roc_curve(label, scores[method]) 45 | roc_auc = auc(fpr, tpr) 46 | fpr = np.flipud(fpr) 47 | tpr = np.flipud(tpr) # select largest tpr at same fpr 48 | plt.plot(fpr, 49 | tpr, 50 | color=colours[method], 51 | lw=1, 52 | label=('[%s (AUC = %0.4f %%)]' % 53 | (method.split('-')[-1], roc_auc * 100))) 54 | tpr_fpr_row = [] 55 | tpr_fpr_row.append(method) 56 | for fpr_iter in np.arange(len(x_labels)): 57 | _, min_index = min( 58 | list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr))))) 59 | tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100)) 60 | tpr_fpr_table.add_row(tpr_fpr_row) 61 | plt.xlim([10 ** -6, 0.1]) 62 | plt.ylim([0.3, 1.0]) 63 | plt.grid(linestyle='--', linewidth=1) 64 | plt.xticks(x_labels) 65 | plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) 66 | plt.xscale('log') 67 | plt.xlabel('False Positive Rate') 68 | plt.ylabel('True Positive Rate') 69 | plt.title('ROC on IJB') 70 | plt.legend(loc="lower right") 71 | print(tpr_fpr_table) 72 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/scripts/shuffle_rec.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import multiprocessing 3 | import os 4 | import time 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | 10 | def read_worker(args, q_in): 11 | path_imgidx = os.path.join(args.input, "train.idx") 12 | path_imgrec = os.path.join(args.input, "train.rec") 13 | imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r") 14 | 15 | s = imgrec.read_idx(0) 16 | header, _ = mx.recordio.unpack(s) 17 | assert header.flag > 0 18 | 19 | imgidx = np.array(range(1, int(header.label[0]))) 20 | np.random.shuffle(imgidx) 21 | 22 | for idx in imgidx: 23 | item = imgrec.read_idx(idx) 24 | q_in.put(item) 25 | 26 | q_in.put(None) 27 | imgrec.close() 28 | 29 | 30 | def write_worker(args, q_out): 31 | pre_time = time.time() 32 | 33 | if args.input[-1] == '/': 34 | args.input = args.input[:-1] 35 | dirname = os.path.dirname(args.input) 36 | basename = os.path.basename(args.input) 37 | output = os.path.join(dirname, f"shuffled_{basename}") 38 | os.makedirs(output, exist_ok=True) 39 | 40 | path_imgidx = os.path.join(output, "train.idx") 41 | path_imgrec = os.path.join(output, "train.rec") 42 | save_record = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "w") 43 | more = True 44 | count = 0 45 | while more: 46 | deq = q_out.get() 47 | if deq is None: 48 | more = False 49 | else: 50 | header, jpeg = mx.recordio.unpack(deq) 51 | # TODO it is currently not fully developed 52 | if isinstance(header.label, float): 53 | label = header.label 54 | else: 55 | label = header.label[0] 56 | 57 | header = mx.recordio.IRHeader(flag=header.flag, label=label, id=header.id, id2=header.id2) 58 | save_record.write_idx(count, mx.recordio.pack(header, jpeg)) 59 | count += 1 60 | if count % 10000 == 0: 61 | cur_time = time.time() 62 | print('save time:', cur_time - pre_time, ' count:', count) 63 | pre_time = cur_time 64 | print(count) 65 | save_record.close() 66 | 67 | 68 | def main(args): 69 | queue = multiprocessing.Queue(10240) 70 | read_process = multiprocessing.Process(target=read_worker, args=(args, queue)) 71 | read_process.daemon = True 72 | read_process.start() 73 | write_process = multiprocessing.Process(target=write_worker, args=(args, queue)) 74 | write_process.start() 75 | write_process.join() 76 | 77 | 78 | if __name__ == '__main__': 79 | parser = argparse.ArgumentParser() 80 | parser.add_argument('input', help='path to source rec.') 81 | main(parser.parse_args()) 82 | -------------------------------------------------------------------------------- /AToM/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parse_train_opt(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--project", default="runs/train", help="project/name") 7 | parser.add_argument("--exp_name", default="exp", help="save to project/name") 8 | parser.add_argument( 9 | "--data_path", type=str, default="", help="raw data path" 10 | ) 11 | parser.add_argument( 12 | "--processed_data_dir", 13 | type=str, 14 | default="", 15 | help="Dataset backup path", 16 | ) 17 | parser.add_argument("--render_dir", type=str, default="renders/", help="Sample render path") 18 | parser.add_argument("--feature_type", type=str, default="jukebox") 19 | parser.add_argument("--wandb_pj_name", type=str, default="AToM", help="project name") 20 | parser.add_argument("--batch_size", type=int, default=64, help="batch size") 21 | parser.add_argument("--epochs", type=int, default=2000) 22 | parser.add_argument("--force_reload", action="store_true", help="force reloads the datasets") 23 | parser.add_argument( 24 | "--no_cache", action="store_true", help="don't reuse / cache loaded dataset" 25 | ) 26 | parser.add_argument( 27 | "--save_interval", 28 | type=int, 29 | default=100, 30 | help='Log model after every "save_period" epoch', 31 | ) 32 | parser.add_argument("--ema_interval", type=int, default=1, help="ema every x steps") 33 | parser.add_argument( 34 | "--checkpoint", type=str, default="", help="trained checkpoint path (optional)" 35 | ) 36 | opt = parser.parse_args() 37 | return opt 38 | 39 | 40 | def parse_test_opt(): 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument("--feature_type", type=str, default="jukebox") 43 | parser.add_argument( 44 | "--out_length", type=float, default=30, help="max. length of output, in seconds" 45 | ) 46 | parser.add_argument( 47 | "--processed_data_dir", 48 | type=str, 49 | default="", 50 | help="Dataset backup path", 51 | ) 52 | parser.add_argument("--render_dir", type=str, default="renders/", help="Sample render path") 53 | parser.add_argument("--checkpoint", type=str, default="checkpoint.pt", help="checkpoint") 54 | parser.add_argument( 55 | "--music_dir", 56 | type=str, 57 | default="", 58 | help="folder containing input music", 59 | ) 60 | parser.add_argument( 61 | "--cache_features", 62 | action="store_true", 63 | help="Save the jukebox features for later reuse", 64 | ) 65 | parser.add_argument( 66 | "--no_render", 67 | action="store_true", 68 | help="Don't render the video", 69 | ) 70 | parser.add_argument( 71 | "--use_cached_features", 72 | action="store_true", 73 | help="Use precomputed features instead of music folder", 74 | ) 75 | parser.add_argument( 76 | "--feature_cache_dir", 77 | type=str, 78 | default="cached_features/", 79 | help="Where to save/load the features", 80 | ) 81 | opt = parser.parse_args() 82 | return opt 83 | -------------------------------------------------------------------------------- /MToV/models/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0,dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1,dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | #remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.','') 20 | self.m_name2s_name.update({name:s_name}) 21 | self.register_buffer(s_name,p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def forward(self,model): 26 | decay = self.decay 27 | 28 | if self.num_updates >= 0: 29 | self.num_updates += 1 30 | decay = min(self.decay,(1 + self.num_updates) / (10 + self.num_updates)) 31 | 32 | one_minus_decay = 1.0 - decay 33 | 34 | with torch.no_grad(): 35 | m_param = dict(model.named_parameters()) 36 | shadow_params = dict(self.named_buffers()) 37 | 38 | for key in m_param: 39 | if m_param[key].requires_grad: 40 | sname = self.m_name2s_name[key] 41 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 42 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 43 | else: 44 | assert not key in self.m_name2s_name 45 | 46 | def copy_to(self, model): 47 | m_param = dict(model.named_parameters()) 48 | shadow_params = dict(self.named_buffers()) 49 | for key in m_param: 50 | if m_param[key].requires_grad: 51 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 52 | else: 53 | assert not key in self.m_name2s_name 54 | 55 | def store(self, parameters): 56 | """ 57 | Save the current parameters for restoring later. 58 | Args: 59 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 60 | temporarily stored. 61 | """ 62 | self.collected_params = [param.clone() for param in parameters] 63 | 64 | def restore(self, parameters): 65 | """ 66 | Restore the parameters stored with the `store` method. 67 | Useful to validate the model with EMA parameters without affecting the 68 | original optimization process. Store the parameters before the 69 | `copy_to` method. After validation (or model saving), use this to 70 | restore the former parameters. 71 | Args: 72 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 73 | updated with the stored parameters. 74 | """ 75 | for c_param, param in zip(self.collected_params, parameters): 76 | param.data.copy_(c_param.data) -------------------------------------------------------------------------------- /MToV/losses/diffaugment.py: -------------------------------------------------------------------------------- 1 | # Differentiable Augmentation for Data-Efficient GAN Training 2 | # Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han 3 | # https://arxiv.org/pdf/2006.10738 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | 9 | def DiffAugment(x, policy='color,translation,cutout', channels_first=True): 10 | if policy: 11 | if not channels_first: 12 | x = x.permute(0, 3, 1, 2) 13 | for p in policy.split(','): 14 | for f in AUGMENT_FNS[p]: 15 | x = f(x) 16 | if not channels_first: 17 | x = x.permute(0, 2, 3, 1) 18 | x = x.contiguous() 19 | return x 20 | 21 | 22 | def rand_brightness(x): 23 | x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5) 24 | return x 25 | 26 | 27 | def rand_saturation(x): 28 | x_mean = x.mean(dim=1, keepdim=True) 29 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean 30 | return x 31 | 32 | 33 | def rand_contrast(x): 34 | x_mean = x.mean(dim=[1, 2, 3], keepdim=True) 35 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean 36 | return x 37 | 38 | 39 | def rand_translation(x, ratio=0.125): 40 | shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 41 | translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device) 42 | translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device) 43 | grid_batch, grid_x, grid_y = torch.meshgrid( 44 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 45 | torch.arange(x.size(2), dtype=torch.long, device=x.device), 46 | torch.arange(x.size(3), dtype=torch.long, device=x.device), 47 | ) 48 | grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1) 49 | grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1) 50 | x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0]) 51 | x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2).contiguous() 52 | return x 53 | 54 | 55 | def rand_cutout(x, ratio=0.5): 56 | cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 57 | offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device) 58 | offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device) 59 | grid_batch, grid_x, grid_y = torch.meshgrid( 60 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 61 | torch.arange(cutout_size[0], dtype=torch.long, device=x.device), 62 | torch.arange(cutout_size[1], dtype=torch.long, device=x.device), 63 | ) 64 | grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1) 65 | grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1) 66 | mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device) 67 | mask[grid_batch, grid_x, grid_y] = 0 68 | x = x * mask.unsqueeze(1) 69 | return x 70 | 71 | 72 | AUGMENT_FNS = { 73 | 'color': [rand_brightness, rand_saturation, rand_contrast], 74 | 'translation': [rand_translation], 75 | 'cutout': [rand_cutout], 76 | } -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/__init__.py: -------------------------------------------------------------------------------- 1 | """This package contains modules related to objective functions, optimizations, and network architectures. 2 | 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. 4 | You need to implement the following five functions: 5 | -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). 6 | -- : unpack data from dataset and apply preprocessing. 7 | -- : produce intermediate results. 8 | -- : calculate loss, gradients, and update network weights. 9 | -- : (optionally) add model-specific options and set default options. 10 | 11 | In the function <__init__>, you need to define four lists: 12 | -- self.loss_names (str list): specify the training losses that you want to plot and save. 13 | -- self.model_names (str list): define networks used in our training. 14 | -- self.visual_names (str list): specify the images that you want to display and save. 15 | -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. 16 | 17 | Now you can use the model class by specifying flag '--model dummy'. 18 | See our template model class 'template_model.py' for more details. 19 | """ 20 | 21 | import importlib 22 | from .base_model import BaseModel 23 | 24 | 25 | def find_model_using_name(model_name): 26 | """Import the module "models/[model_name]_model.py". 27 | 28 | In the file, the class called DatasetNameModel() will 29 | be instantiated. It has to be a subclass of BaseModel, 30 | and it is case-insensitive. 31 | """ 32 | model_filename = "deep_3drecon_models." + model_name + "_model" 33 | modellib = importlib.import_module(model_filename) 34 | model = None 35 | target_model_name = model_name.replace('_', '') + 'model' 36 | for name, cls in modellib.__dict__.items(): 37 | if name.lower() == target_model_name.lower() \ 38 | and issubclass(cls, BaseModel): 39 | model = cls 40 | 41 | if model is None: 42 | print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) 43 | exit(0) 44 | 45 | return model 46 | 47 | 48 | def get_option_setter(model_name): 49 | """Return the static method of the model class.""" 50 | model_class = find_model_using_name(model_name) 51 | return model_class.modify_commandline_options 52 | 53 | 54 | def create_model(opt): 55 | """Create a model given the option. 56 | 57 | This function warps the class CustomDatasetDataLoader. 58 | This is the main interface between this package and 'train.py'/'test.py' 59 | 60 | Example: 61 | >>> from models import create_model 62 | >>> model = create_model(opt) 63 | """ 64 | model = find_model_using_name(opt.model) 65 | instance = model(opt) 66 | print("model [%s] was created" % type(instance).__name__) 67 | return instance 68 | -------------------------------------------------------------------------------- /AToM/model/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | from einops import rearrange, reduce, repeat 6 | from einops.layers.torch import Rearrange 7 | from torch import nn 8 | 9 | 10 | # absolute positional embedding used for vanilla transformer sequential data 11 | class PositionalEncoding(nn.Module): 12 | def __init__(self, d_model, dropout=0.1, max_len=500, batch_first=False): 13 | super().__init__() 14 | self.batch_first = batch_first 15 | 16 | self.dropout = nn.Dropout(p=dropout) 17 | 18 | pe = torch.zeros(max_len, d_model) 19 | position = torch.arange(0, max_len).unsqueeze(1) 20 | div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model)) 21 | pe[:, 0::2] = torch.sin(position * div_term) 22 | pe[:, 1::2] = torch.cos(position * div_term) 23 | pe = pe.unsqueeze(0).transpose(0, 1) 24 | 25 | self.register_buffer("pe", pe) 26 | 27 | def forward(self, x): 28 | if self.batch_first: 29 | x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :] 30 | else: 31 | x = x + self.pe[: x.shape[0], :] 32 | return self.dropout(x) 33 | 34 | 35 | # very similar positional embedding used for diffusion timesteps 36 | class SinusoidalPosEmb(nn.Module): 37 | def __init__(self, dim): 38 | super().__init__() 39 | self.dim = dim 40 | 41 | def forward(self, x): 42 | device = x.device 43 | half_dim = self.dim // 2 44 | emb = math.log(10000) / (half_dim - 1) 45 | emb = torch.exp(torch.arange(half_dim, device=device) * -emb) 46 | emb = x[:, None] * emb[None, :] 47 | emb = torch.cat((emb.sin(), emb.cos()), dim=-1) 48 | return emb 49 | 50 | 51 | # dropout mask 52 | def prob_mask_like(shape, prob, device): 53 | if prob == 1: 54 | return torch.ones(shape, device=device, dtype=torch.bool) 55 | elif prob == 0: 56 | return torch.zeros(shape, device=device, dtype=torch.bool) 57 | else: 58 | return torch.zeros(shape, device=device).float().uniform_(0, 1) < prob 59 | 60 | 61 | def extract(a, t, x_shape): 62 | b, *_ = t.shape 63 | out = a.gather(-1, t) 64 | return out.reshape(b, *((1,) * (len(x_shape) - 1))) 65 | 66 | 67 | def make_beta_schedule( 68 | schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3 69 | ): 70 | if schedule == "linear": 71 | betas = ( 72 | torch.linspace( 73 | linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64 74 | ) 75 | ** 2 76 | ) 77 | 78 | elif schedule == "cosine": 79 | timesteps = ( 80 | torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s 81 | ) 82 | alphas = timesteps / (1 + cosine_s) * np.pi / 2 83 | alphas = torch.cos(alphas).pow(2) 84 | alphas = alphas / alphas[0] 85 | betas = 1 - alphas[1:] / alphas[:-1] 86 | betas = np.clip(betas, a_min=0, a_max=0.999) 87 | 88 | elif schedule == "sqrt_linear": 89 | betas = torch.linspace( 90 | linear_start, linear_end, n_timestep, dtype=torch.float64 91 | ) 92 | elif schedule == "sqrt": 93 | betas = ( 94 | torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) 95 | ** 0.5 96 | ) 97 | else: 98 | raise ValueError(f"schedule '{schedule}' unknown.") 99 | return betas.numpy() 100 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/data/template_dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class template 2 | 3 | This module provides a template for users to implement custom datasets. 4 | You can specify '--dataset_mode template' to use this dataset. 5 | The class name should be consistent with both the filename and its dataset_mode option. 6 | The filename should be _dataset.py 7 | The class name should be Dataset.py 8 | You need to implement the following functions: 9 | -- : Add dataset-specific options and rewrite default values for existing options. 10 | -- <__init__>: Initialize this dataset class. 11 | -- <__getitem__>: Return a data point and its metadata information. 12 | -- <__len__>: Return the number of images. 13 | """ 14 | from data.base_dataset import BaseDataset, get_transform 15 | # from data.image_folder import make_dataset 16 | # from PIL import Image 17 | 18 | 19 | class TemplateDataset(BaseDataset): 20 | """A template dataset class for you to implement custom datasets.""" 21 | @staticmethod 22 | def modify_commandline_options(parser, is_train): 23 | """Add new dataset-specific options, and rewrite default values for existing options. 24 | 25 | Parameters: 26 | parser -- original option parser 27 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 28 | 29 | Returns: 30 | the modified parser. 31 | """ 32 | parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option') 33 | parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0) # specify dataset-specific default values 34 | return parser 35 | 36 | def __init__(self, opt): 37 | """Initialize this dataset class. 38 | 39 | Parameters: 40 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 41 | 42 | A few things can be done here. 43 | - save the options (have been done in BaseDataset) 44 | - get image paths and meta information of the dataset. 45 | - define the image transformation. 46 | """ 47 | # save the option and dataset root 48 | BaseDataset.__init__(self, opt) 49 | # get the image paths of your dataset; 50 | self.image_paths = [] # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root 51 | # define the default transform function. You can use ; You can also define your custom transform function 52 | self.transform = get_transform(opt) 53 | 54 | def __getitem__(self, index): 55 | """Return a data point and its metadata information. 56 | 57 | Parameters: 58 | index -- a random integer for data indexing 59 | 60 | Returns: 61 | a dictionary of data with their names. It usually contains the data itself and its metadata information. 62 | 63 | Step 1: get a random image path: e.g., path = self.image_paths[index] 64 | Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB'). 65 | Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image) 66 | Step 4: return a data point as a dictionary. 67 | """ 68 | path = 'temp' # needs to be a string 69 | data_A = None # needs to be a tensor 70 | data_B = None # needs to be a tensor 71 | return {'data_A': data_A, 'data_B': data_B, 'path': path} 72 | 73 | def __len__(self): 74 | """Return the total number of images.""" 75 | return len(self.image_paths) 76 | -------------------------------------------------------------------------------- /AToM/data_util/tensor_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | import numpy as np 4 | 5 | 6 | def reduce_tensors(metrics): 7 | new_metrics = {} 8 | for k, v in metrics.items(): 9 | if isinstance(v, torch.Tensor): 10 | dist.all_reduce(v) 11 | v = v / dist.get_world_size() 12 | if type(v) is dict: 13 | v = reduce_tensors(v) 14 | new_metrics[k] = v 15 | return new_metrics 16 | 17 | 18 | def tensors_to_scalars(tensors): 19 | if isinstance(tensors, torch.Tensor): 20 | tensors = tensors.item() 21 | return tensors 22 | elif isinstance(tensors, dict): 23 | new_tensors = {} 24 | for k, v in tensors.items(): 25 | v = tensors_to_scalars(v) 26 | new_tensors[k] = v 27 | return new_tensors 28 | elif isinstance(tensors, list): 29 | return [tensors_to_scalars(v) for v in tensors] 30 | else: 31 | return tensors 32 | 33 | 34 | def convert_to_np(tensors): 35 | if isinstance(tensors, np.ndarray): 36 | return tensors 37 | elif isinstance(tensors, dict): 38 | new_np = {} 39 | for k, v in tensors.items(): 40 | if isinstance(v, torch.Tensor): 41 | v = v.cpu().numpy() 42 | if type(v) is dict: 43 | v = convert_to_np(v) 44 | new_np[k] = v 45 | elif isinstance(tensors, list): 46 | new_np = [] 47 | for v in tensors: 48 | if isinstance(v, torch.Tensor): 49 | v = v.cpu().numpy() 50 | if type(v) is dict: 51 | v = convert_to_np(v) 52 | new_np.append(v) 53 | elif isinstance(tensors, torch.Tensor): 54 | v = tensors 55 | if isinstance(v, torch.Tensor): 56 | v = v.cpu().numpy() 57 | if type(v) is dict: 58 | v = convert_to_np(v) 59 | new_np = v 60 | else: 61 | raise Exception(f'tensors_to_np does not support type {type(tensors)}.') 62 | return new_np 63 | 64 | 65 | def convert_to_tensor(arrays): 66 | if isinstance(arrays, np.ndarray): 67 | v = torch.from_numpy(arrays).float() 68 | ret = v 69 | elif isinstance(arrays, torch.Tensor): 70 | ret = arrays 71 | elif type(arrays) is dict: 72 | ret = {} 73 | for k, v in arrays.items(): 74 | if isinstance(v, np.ndarray): 75 | v = torch.from_numpy(v).float() 76 | if type(v) is dict: 77 | v = convert_to_tensor(v) 78 | ret[k] = v 79 | return ret 80 | 81 | def move_to_cpu(tensors): 82 | ret = {} 83 | for k, v in tensors.items(): 84 | if isinstance(v, torch.Tensor): 85 | v = v.cpu() 86 | if type(v) is dict: 87 | v = move_to_cpu(v) 88 | ret[k] = v 89 | return ret 90 | 91 | 92 | def move_to_cuda(batch, gpu_id=0): 93 | # base case: object can be directly moved using `cuda` or `to` 94 | if callable(getattr(batch, 'cuda', None)): 95 | return batch.cuda(gpu_id, non_blocking=True) 96 | elif callable(getattr(batch, 'to', None)): 97 | return batch.to(torch.device('cuda', gpu_id), non_blocking=True) 98 | elif isinstance(batch, list): 99 | for i, x in enumerate(batch): 100 | batch[i] = move_to_cuda(x, gpu_id) 101 | return batch 102 | elif isinstance(batch, tuple): 103 | batch = list(batch) 104 | for i, x in enumerate(batch): 105 | batch[i] = move_to_cuda(x, gpu_id) 106 | return tuple(batch) 107 | elif isinstance(batch, dict): 108 | for k, v in batch.items(): 109 | batch[k] = move_to_cuda(v, gpu_id) 110 | return batch 111 | return batch 112 | -------------------------------------------------------------------------------- /data/data_utils/commons/tensor_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | import numpy as np 4 | 5 | 6 | def reduce_tensors(metrics): 7 | new_metrics = {} 8 | for k, v in metrics.items(): 9 | if isinstance(v, torch.Tensor): 10 | dist.all_reduce(v) 11 | v = v / dist.get_world_size() 12 | if type(v) is dict: 13 | v = reduce_tensors(v) 14 | new_metrics[k] = v 15 | return new_metrics 16 | 17 | 18 | def tensors_to_scalars(tensors): 19 | if isinstance(tensors, torch.Tensor): 20 | tensors = tensors.item() 21 | return tensors 22 | elif isinstance(tensors, dict): 23 | new_tensors = {} 24 | for k, v in tensors.items(): 25 | v = tensors_to_scalars(v) 26 | new_tensors[k] = v 27 | return new_tensors 28 | elif isinstance(tensors, list): 29 | return [tensors_to_scalars(v) for v in tensors] 30 | else: 31 | return tensors 32 | 33 | 34 | def convert_to_np(tensors): 35 | if isinstance(tensors, np.ndarray): 36 | return tensors 37 | elif isinstance(tensors, dict): 38 | new_np = {} 39 | for k, v in tensors.items(): 40 | if isinstance(v, torch.Tensor): 41 | v = v.cpu().numpy() 42 | if type(v) is dict: 43 | v = convert_to_np(v) 44 | new_np[k] = v 45 | elif isinstance(tensors, list): 46 | new_np = [] 47 | for v in tensors: 48 | if isinstance(v, torch.Tensor): 49 | v = v.cpu().numpy() 50 | if type(v) is dict: 51 | v = convert_to_np(v) 52 | new_np.append(v) 53 | elif isinstance(tensors, torch.Tensor): 54 | v = tensors 55 | if isinstance(v, torch.Tensor): 56 | v = v.cpu().numpy() 57 | if type(v) is dict: 58 | v = convert_to_np(v) 59 | new_np = v 60 | else: 61 | raise Exception(f'tensors_to_np does not support type {type(tensors)}.') 62 | return new_np 63 | 64 | 65 | def convert_to_tensor(arrays): 66 | if isinstance(arrays, np.ndarray): 67 | v = torch.from_numpy(arrays).float() 68 | ret = v 69 | elif isinstance(arrays, torch.Tensor): 70 | ret = arrays 71 | elif type(arrays) is dict: 72 | ret = {} 73 | for k, v in arrays.items(): 74 | if isinstance(v, np.ndarray): 75 | v = torch.from_numpy(v).float() 76 | if type(v) is dict: 77 | v = convert_to_tensor(v) 78 | ret[k] = v 79 | return ret 80 | 81 | def move_to_cpu(tensors): 82 | ret = {} 83 | for k, v in tensors.items(): 84 | if isinstance(v, torch.Tensor): 85 | v = v.cpu() 86 | if type(v) is dict: 87 | v = move_to_cpu(v) 88 | ret[k] = v 89 | return ret 90 | 91 | 92 | def move_to_cuda(batch, gpu_id=0): 93 | # base case: object can be directly moved using `cuda` or `to` 94 | if callable(getattr(batch, 'cuda', None)): 95 | return batch.cuda(gpu_id, non_blocking=True) 96 | elif callable(getattr(batch, 'to', None)): 97 | return batch.to(torch.device('cuda', gpu_id), non_blocking=True) 98 | elif isinstance(batch, list): 99 | for i, x in enumerate(batch): 100 | batch[i] = move_to_cuda(x, gpu_id) 101 | return batch 102 | elif isinstance(batch, tuple): 103 | batch = list(batch) 104 | for i, x in enumerate(batch): 105 | batch[i] = move_to_cuda(x, gpu_id) 106 | return tuple(batch) 107 | elif isinstance(batch, dict): 108 | for k, v in batch.items(): 109 | batch[k] = move_to_cuda(v, gpu_id) 110 | return batch 111 | return batch 112 | -------------------------------------------------------------------------------- /AToM/model/adan.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim import Optimizer 5 | 6 | 7 | def exists(val): 8 | return val is not None 9 | 10 | 11 | class Adan(Optimizer): 12 | def __init__( 13 | self, 14 | params, 15 | lr=1e-3, 16 | betas=(0.02, 0.08, 0.01), 17 | eps=1e-8, 18 | weight_decay=0, 19 | restart_cond: callable = None, 20 | ): 21 | assert len(betas) == 3 22 | 23 | defaults = dict( 24 | lr=lr, 25 | betas=betas, 26 | eps=eps, 27 | weight_decay=weight_decay, 28 | restart_cond=restart_cond, 29 | ) 30 | 31 | super().__init__(params, defaults) 32 | 33 | def step(self, closure=None): 34 | loss = None 35 | 36 | if exists(closure): 37 | loss = closure() 38 | 39 | for group in self.param_groups: 40 | 41 | lr = group["lr"] 42 | beta1, beta2, beta3 = group["betas"] 43 | weight_decay = group["weight_decay"] 44 | eps = group["eps"] 45 | restart_cond = group["restart_cond"] 46 | 47 | for p in group["params"]: 48 | if not exists(p.grad): 49 | continue 50 | 51 | data, grad = p.data, p.grad.data 52 | assert not grad.is_sparse 53 | 54 | state = self.state[p] 55 | 56 | if len(state) == 0: 57 | state["step"] = 0 58 | state["prev_grad"] = torch.zeros_like(grad) 59 | state["m"] = torch.zeros_like(grad) 60 | state["v"] = torch.zeros_like(grad) 61 | state["n"] = torch.zeros_like(grad) 62 | 63 | step, m, v, n, prev_grad = ( 64 | state["step"], 65 | state["m"], 66 | state["v"], 67 | state["n"], 68 | state["prev_grad"], 69 | ) 70 | 71 | if step > 0: 72 | prev_grad = state["prev_grad"] 73 | 74 | # main algorithm 75 | 76 | m.mul_(1 - beta1).add_(grad, alpha=beta1) 77 | 78 | grad_diff = grad - prev_grad 79 | 80 | v.mul_(1 - beta2).add_(grad_diff, alpha=beta2) 81 | 82 | next_n = (grad + (1 - beta2) * grad_diff) ** 2 83 | 84 | n.mul_(1 - beta3).add_(next_n, alpha=beta3) 85 | 86 | # bias correction terms 87 | 88 | step += 1 89 | 90 | correct_m, correct_v, correct_n = map( 91 | lambda n: 1 / (1 - (1 - n) ** step), (beta1, beta2, beta3) 92 | ) 93 | 94 | # gradient step 95 | 96 | def grad_step_(data, m, v, n): 97 | weighted_step_size = lr / (n * correct_n).sqrt().add_(eps) 98 | 99 | denom = 1 + weight_decay * lr 100 | 101 | data.addcmul_( 102 | weighted_step_size, 103 | (m * correct_m + (1 - beta2) * v * correct_v), 104 | value=-1.0, 105 | ).div_(denom) 106 | 107 | grad_step_(data, m, v, n) 108 | 109 | # restart condition 110 | 111 | if exists(restart_cond) and restart_cond(state): 112 | m.data.copy_(grad) 113 | v.zero_() 114 | n.data.copy_(grad ** 2) 115 | 116 | grad_step_(data, m, v, n) 117 | 118 | # set new incremented step 119 | 120 | prev_grad.copy_(grad) 121 | state["step"] = step 122 | 123 | return loss 124 | -------------------------------------------------------------------------------- /data/data_utils/preprocess/unify_fps.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import glob 4 | import time 5 | import torch 6 | import random 7 | import shutil 8 | from tqdm import tqdm 9 | import matplotlib 10 | import numpy as np 11 | from datetime import datetime 12 | from matplotlib import pyplot as plt 13 | from torch.utils.data import Dataset, DataLoader 14 | import argparse, os, cv2, traceback, subprocess 15 | import pdb 16 | import sys 17 | import subprocess 18 | 19 | def change_video_fps(input_path, output_path, fps): 20 | command = f'ffmpeg -i {input_path} -r {fps} {output_path}' 21 | subprocess.call(command, shell=True) 22 | 23 | class Dataset_(Dataset): 24 | def __init__(self,args): 25 | self.args=args 26 | self.device = torch.device('cuda') 27 | self.total_idx = 0 28 | self.start_time = time.time() 29 | 30 | self.path_to_mp4 = args.load_video_path 31 | self.path_to_frame = args.save_video_path 32 | 33 | os.makedirs(self.path_to_frame, exist_ok=True) 34 | 35 | self.video_path_list= [] 36 | self.frame_dir_path_list= [] 37 | 38 | self.total_num_list = [] 39 | self.initList() 40 | 41 | 42 | def initList(self): 43 | length = 0 44 | videos = glob.glob(os.path.join(self.path_to_mp4, '*', "*audio.mp4")) 45 | videos.sort() 46 | 47 | 48 | for video in videos: 49 | self.frame_dir_path_list.append(video.replace('videos/','videos_25/')) 50 | 51 | self.video_path_list = videos 52 | self.frame_dir_path_list = self.frame_dir_path_list 53 | self.length = len(self.video_path_list) 54 | 55 | def change_video_fps(self, fps, input_path, output_path): 56 | template = 'ffmpeg -y -i {} -c:v libx264 -r {} {}' 57 | command = template.format(input_path, fps, output_path) 58 | subprocess.call(command, shell=True) 59 | 60 | def generate_and_save_frame(self, idx): 61 | input_path = self.video_path_list[idx] 62 | output_path = self.frame_dir_path_list[idx] 63 | os.makedirs(os.path.dirname(os.path.dirname(output_path)), exist_ok=True) 64 | os.makedirs(os.path.dirname(output_path), exist_ok=True) 65 | 66 | self.change_video_fps(args.fps, input_path, output_path) 67 | 68 | return 0 69 | 70 | def __len__(self): 71 | return self.length 72 | 73 | def __getitem__(self, idx): 74 | start_time = time.time() 75 | self.generate_and_save_frame(idx) 76 | 77 | return [0] 78 | 79 | if __name__ == "__main__": 80 | parser = argparse.ArgumentParser() 81 | 82 | parser.add_argument('--load_video_path', type=str, default='/media/data1/HDTF/videos', # Source Video Roots 83 | help='path of the directory for loading videos') 84 | parser.add_argument('--save_video_path', type=str, default='/media/data1/HDTF/videos_25', # Saving Roots 85 | help='path of the directory for saving frames of videos') 86 | 87 | parser.add_argument('--fps', type=int, default=25, 88 | help='fps') 89 | parser.add_argument('--batch_size', type=int, default=1, 90 | help='audio sampling rate') 91 | parser.add_argument('--num_workers', type=int, default=6, 92 | help='audio sampling rate') 93 | args = parser.parse_args() 94 | count = 0 95 | 96 | dataset = Dataset_(args) 97 | data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) 98 | start_time = time.time() 99 | for i, video_path in enumerate(tqdm(data_loader)): 100 | video_path = video_path 101 | dataset.generate_and_save_frame(video_path) 102 | print('done') -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | 4 | 5 | class CombinedMarginLoss(torch.nn.Module): 6 | def __init__(self, 7 | s, 8 | m1, 9 | m2, 10 | m3, 11 | interclass_filtering_threshold=0): 12 | super().__init__() 13 | self.s = s 14 | self.m1 = m1 15 | self.m2 = m2 16 | self.m3 = m3 17 | self.interclass_filtering_threshold = interclass_filtering_threshold 18 | 19 | # For ArcFace 20 | self.cos_m = math.cos(self.m2) 21 | self.sin_m = math.sin(self.m2) 22 | self.theta = math.cos(math.pi - self.m2) 23 | self.sinmm = math.sin(math.pi - self.m2) * self.m2 24 | self.easy_margin = False 25 | 26 | 27 | def forward(self, logits, labels): 28 | index_positive = torch.where(labels != -1)[0] 29 | 30 | if self.interclass_filtering_threshold > 0: 31 | with torch.no_grad(): 32 | dirty = logits > self.interclass_filtering_threshold 33 | dirty = dirty.float() 34 | mask = torch.ones([index_positive.size(0), logits.size(1)], device=logits.device) 35 | mask.scatter_(1, labels[index_positive], 0) 36 | dirty[index_positive] *= mask 37 | tensor_mul = 1 - dirty 38 | logits = tensor_mul * logits 39 | 40 | target_logit = logits[index_positive, labels[index_positive].view(-1)] 41 | 42 | if self.m1 == 1.0 and self.m3 == 0.0: 43 | with torch.no_grad(): 44 | target_logit.arccos_() 45 | logits.arccos_() 46 | final_target_logit = target_logit + self.m2 47 | logits[index_positive, labels[index_positive].view(-1)] = final_target_logit 48 | logits.cos_() 49 | logits = logits * self.s 50 | 51 | elif self.m3 > 0: 52 | final_target_logit = target_logit - self.m3 53 | logits[index_positive, labels[index_positive].view(-1)] = final_target_logit 54 | logits = logits * self.s 55 | else: 56 | raise 57 | 58 | return logits 59 | 60 | class ArcFace(torch.nn.Module): 61 | """ ArcFace (https://arxiv.org/pdf/1801.07698v1.pdf): 62 | """ 63 | def __init__(self, s=64.0, margin=0.5): 64 | super(ArcFace, self).__init__() 65 | self.scale = s 66 | self.margin = margin 67 | self.cos_m = math.cos(margin) 68 | self.sin_m = math.sin(margin) 69 | self.theta = math.cos(math.pi - margin) 70 | self.sinmm = math.sin(math.pi - margin) * margin 71 | self.easy_margin = False 72 | 73 | 74 | def forward(self, logits: torch.Tensor, labels: torch.Tensor): 75 | index = torch.where(labels != -1)[0] 76 | target_logit = logits[index, labels[index].view(-1)] 77 | 78 | with torch.no_grad(): 79 | target_logit.arccos_() 80 | logits.arccos_() 81 | final_target_logit = target_logit + self.margin 82 | logits[index, labels[index].view(-1)] = final_target_logit 83 | logits.cos_() 84 | logits = logits * self.s 85 | return logits 86 | 87 | 88 | class CosFace(torch.nn.Module): 89 | def __init__(self, s=64.0, m=0.40): 90 | super(CosFace, self).__init__() 91 | self.s = s 92 | self.m = m 93 | 94 | def forward(self, logits: torch.Tensor, labels: torch.Tensor): 95 | index = torch.where(labels != -1)[0] 96 | target_logit = logits[index, labels[index].view(-1)] 97 | final_target_logit = target_logit - self.m 98 | logits[index, labels[index].view(-1)] = final_target_logit 99 | logits = logits * self.s 100 | return logits 101 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Dataset download & Preprocessing 2 | ## LRS3 3 | Follow [here](https://github.com/yerfor/GeneFace/blob/main/docs/process_data/process_lrs3.md) to preprocess LRS3. 4 | 5 | ## HDTF 6 | 7 | ### Download 8 | Follow [here](https://github.com/universome/HDTF) to download HDTF and crop videos into 256x256 resolution. 9 | 10 | ## Structure 11 | After following the steps, the structure should be like this: 12 | ``` 13 | data 14 | |-- train 15 | |-- lrs3 16 | |-- sizes_train.npy 17 | |-- sizes_val.npy 18 | |-- spk_id2spk_idx.npy 19 | |-- train.data 20 | |-- val.data 21 | 22 | 23 | |-- HDTF 24 | |-- frames 25 | |-- id1 26 | |-- 00000.jpg 27 | |-- 00001.jpg 28 | |-- ... 29 | |-- id2 30 | |-- 00000.jpg 31 | |-- 00001.jpg 32 | |-- ... 33 | |-- ... 34 | |-- keypoints 35 | |-- face-centric 36 | |-- posed 37 | |-- id1 38 | |-- 00000.npy 39 | |-- 00001.npy 40 | |-- ... 41 | |-- id2 42 | |-- 00000.npy 43 | |-- 00001.npy 44 | |-- ... 45 | |-- ... 46 | |-- unposed 47 | |-- id1 48 | |-- 00000.npy 49 | |-- 00001.npy 50 | |-- ... 51 | |-- id2 52 | |-- 00000.npy 53 | |-- 00001.npy 54 | |-- ... 55 | |-- ... 56 | |-- non-face-centric 57 | |-- posed 58 | |-- id1 59 | |-- 00000.npy 60 | |-- 00001.npy 61 | |-- ... 62 | |-- id2 63 | |-- 00000.npy 64 | |-- 00001.npy 65 | |-- ... 66 | |-- ... 67 | 68 | ``` 69 | 70 | ### Video 2 Frames 71 | Before you convert videos into frames, check all videos are at 25fps. 72 | If not, they must be adjusted to: `data/data_utils/preprocess/unify_fps.py` 73 | Once preprocesing is complete and the videos are unitied at 25fps, you can convert them into frames using the following code.: `data/data_utils/preprocess/video2frame_hdtf.py` 74 | 75 | ### Motion Extraction from frames, used in training MToV 76 | 77 | 78 | ### Environment 79 | 85 | ```bash 86 | conda create -n preprocess python=3.9.16 -y 87 | conda activate preprocess 88 | conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch 89 | conda install -c fvcore -c iopath -c conda-forge fvcore iopath -y 90 | conda install -c bottler nvidiacub -y 91 | conda install pytorch3d==0.7.4 -c pytorch3d -y 92 | conda install ffmpeg 93 | python -m pip install face_alignment einops trimesh natsort 94 | ``` 95 | 96 | ```bash 97 | conda activate preprocess 98 | cd data/data_utils 99 | python preprocess/process_video_3dmm_rollback_hdtf_batchify.py 100 | ``` 101 | After processing the code above, you will obtain the keypoints in several types in `HDTF/keypoints`. `face-centric` and `non-face-centric` indicate whether the keypoints are aligned in the center or not. `unposed` and `posed` specify whether the pose of the landmarks is frontalized or not. 102 | 103 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200 2 | from .mobilefacenet import get_mbf 3 | 4 | 5 | def get_model(name, **kwargs): 6 | # resnet 7 | if name == "r18": 8 | return iresnet18(False, **kwargs) 9 | elif name == "r34": 10 | return iresnet34(False, **kwargs) 11 | elif name == "r50": 12 | return iresnet50(False, **kwargs) 13 | elif name == "r100": 14 | return iresnet100(False, **kwargs) 15 | elif name == "r200": 16 | return iresnet200(False, **kwargs) 17 | elif name == "r2060": 18 | from .iresnet2060 import iresnet2060 19 | return iresnet2060(False, **kwargs) 20 | 21 | elif name == "mbf": 22 | fp16 = kwargs.get("fp16", False) 23 | num_features = kwargs.get("num_features", 512) 24 | return get_mbf(fp16=fp16, num_features=num_features) 25 | 26 | elif name == "mbf_large": 27 | from .mobilefacenet import get_mbf_large 28 | fp16 = kwargs.get("fp16", False) 29 | num_features = kwargs.get("num_features", 512) 30 | return get_mbf_large(fp16=fp16, num_features=num_features) 31 | 32 | elif name == "vit_t": 33 | num_features = kwargs.get("num_features", 512) 34 | from .vit import VisionTransformer 35 | return VisionTransformer( 36 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12, 37 | num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1) 38 | 39 | elif name == "vit_t_dp005_mask0": # For WebFace42M 40 | num_features = kwargs.get("num_features", 512) 41 | from .vit import VisionTransformer 42 | return VisionTransformer( 43 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12, 44 | num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0) 45 | 46 | elif name == "vit_s": 47 | num_features = kwargs.get("num_features", 512) 48 | from .vit import VisionTransformer 49 | return VisionTransformer( 50 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12, 51 | num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1) 52 | 53 | elif name == "vit_s_dp005_mask_0": # For WebFace42M 54 | num_features = kwargs.get("num_features", 512) 55 | from .vit import VisionTransformer 56 | return VisionTransformer( 57 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12, 58 | num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0) 59 | 60 | elif name == "vit_b": 61 | # this is a feature 62 | num_features = kwargs.get("num_features", 512) 63 | from .vit import VisionTransformer 64 | return VisionTransformer( 65 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24, 66 | num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1, using_checkpoint=True) 67 | 68 | elif name == "vit_b_dp005_mask_005": # For WebFace42M 69 | # this is a feature 70 | num_features = kwargs.get("num_features", 512) 71 | from .vit import VisionTransformer 72 | return VisionTransformer( 73 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24, 74 | num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True) 75 | 76 | elif name == "vit_l_dp005_mask_005": # For WebFace42M 77 | # this is a feature 78 | num_features = kwargs.get("num_features", 512) 79 | from .vit import VisionTransformer 80 | return VisionTransformer( 81 | img_size=112, patch_size=9, num_classes=num_features, embed_dim=768, depth=24, 82 | num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True) 83 | 84 | else: 85 | raise ValueError() 86 | -------------------------------------------------------------------------------- /MToV/tools/scheduler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LambdaWarmUpCosineScheduler: 5 | """ 6 | note: use with a base_lr of 1.0 7 | """ 8 | def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0): 9 | self.lr_warm_up_steps = warm_up_steps 10 | self.lr_start = lr_start 11 | self.lr_min = lr_min 12 | self.lr_max = lr_max 13 | self.lr_max_decay_steps = max_decay_steps 14 | self.last_lr = 0. 15 | self.verbosity_interval = verbosity_interval 16 | 17 | def schedule(self, n, **kwargs): 18 | if self.verbosity_interval > 0: 19 | if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_lr}") 20 | if n < self.lr_warm_up_steps: 21 | lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start 22 | self.last_lr = lr 23 | return lr 24 | else: 25 | t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps) 26 | t = min(t, 1.0) 27 | lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * ( 28 | 1 + np.cos(t * np.pi)) 29 | self.last_lr = lr 30 | return lr 31 | 32 | def __call__(self, n, **kwargs): 33 | return self.schedule(n,**kwargs) 34 | 35 | 36 | class LambdaWarmUpCosineScheduler2: 37 | """ 38 | supports repeated iterations, configurable via lists 39 | note: use with a base_lr of 1.0. 40 | """ 41 | def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0): 42 | assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths) 43 | self.lr_warm_up_steps = warm_up_steps 44 | self.f_start = f_start 45 | self.f_min = f_min 46 | self.f_max = f_max 47 | self.cycle_lengths = cycle_lengths 48 | self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths)) 49 | self.last_f = 0. 50 | self.verbosity_interval = verbosity_interval 51 | 52 | def find_in_interval(self, n): 53 | interval = 0 54 | for cl in self.cum_cycles[1:]: 55 | if n <= cl: 56 | return interval 57 | interval += 1 58 | 59 | def schedule(self, n, **kwargs): 60 | cycle = self.find_in_interval(n) 61 | n = n - self.cum_cycles[cycle] 62 | if self.verbosity_interval > 0: 63 | if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " 64 | f"current cycle {cycle}") 65 | if n < self.lr_warm_up_steps[cycle]: 66 | f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] 67 | self.last_f = f 68 | return f 69 | else: 70 | t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle]) 71 | t = min(t, 1.0) 72 | f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * ( 73 | 1 + np.cos(t * np.pi)) 74 | self.last_f = f 75 | return f 76 | 77 | def __call__(self, n, **kwargs): 78 | return self.schedule(n, **kwargs) 79 | 80 | 81 | class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): 82 | 83 | def schedule(self, n, **kwargs): 84 | cycle = self.find_in_interval(n) 85 | n = n - self.cum_cycles[cycle] 86 | if self.verbosity_interval > 0: 87 | if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " 88 | f"current cycle {cycle}") 89 | 90 | if n < self.lr_warm_up_steps[cycle]: 91 | f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] 92 | self.last_f = f 93 | return f 94 | else: 95 | f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / (self.cycle_lengths[cycle]) 96 | self.last_f = f 97 | return f -------------------------------------------------------------------------------- /MToV/evals/fvd/convert_tf_pretrained.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import OrderedDict 3 | import tensorflow_hub as hub 4 | import torch 5 | 6 | from src_pytorch.fvd.pytorch_i3d import InceptionI3d 7 | 8 | 9 | def convert_name(name): 10 | mapping = { 11 | 'conv_3d': 'conv3d', 12 | 'batch_norm': 'bn', 13 | 'w:0': 'weight', 14 | 'b:0': 'bias', 15 | 'moving_mean:0': 'running_mean', 16 | 'moving_variance:0': 'running_var', 17 | 'beta:0': 'bias' 18 | } 19 | 20 | segs = name.split('/') 21 | new_segs = [] 22 | i = 0 23 | while i < len(segs): 24 | seg = segs[i] 25 | if 'Mixed' in seg: 26 | new_segs.append(seg) 27 | elif 'Conv' in seg and 'Mixed' not in name: 28 | new_segs.append(seg) 29 | elif 'Branch' in seg: 30 | branch_i = int(seg.split('_')[-1]) 31 | i += 1 32 | seg = segs[i] 33 | 34 | # special case due to typo in original code 35 | if 'Mixed_5b' in name and branch_i == 2: 36 | if '1x1' in seg: 37 | new_segs.append(f'b{branch_i}a') 38 | elif '3x3' in seg: 39 | new_segs.append(f'b{branch_i}b') 40 | else: 41 | raise Exception() 42 | # Either Conv3d_{i}a_... or Conv3d_{i}b_... 43 | elif 'a' in seg: 44 | if branch_i == 0: 45 | new_segs.append('b0') 46 | else: 47 | new_segs.append(f'b{branch_i}a') 48 | elif 'b' in seg: 49 | new_segs.append(f'b{branch_i}b') 50 | else: 51 | raise Exception 52 | elif seg == 'Logits': 53 | new_segs.append('logits') 54 | i += 1 55 | elif seg in mapping: 56 | new_segs.append(mapping[seg]) 57 | else: 58 | raise Exception(f"No match found for seg {seg} in name {name}") 59 | 60 | i += 1 61 | return '.'.join(new_segs) 62 | 63 | def convert_tensor(tensor): 64 | tensor_dim = len(tensor.shape) 65 | if tensor_dim == 5: # conv or bn 66 | if all([t == 1 for t in tensor.shape[:-1]]): 67 | tensor = tensor.squeeze() 68 | else: 69 | tensor = tensor.permute(4, 3, 0, 1, 2).contiguous() 70 | elif tensor_dim == 1: # conv bias 71 | pass 72 | else: 73 | raise Exception(f"Invalid shape {tensor.shape}") 74 | return tensor 75 | 76 | n_class = int(sys.argv[1]) # 600 or 400 77 | assert n_class in [400, 600] 78 | 79 | # Converts model from https://github.com/google-research/google-research/tree/master/frechet_video_distance 80 | # to pytorch version for loading 81 | model_url = f"https://tfhub.dev/deepmind/i3d-kinetics-{n_class}/1" 82 | i3d = hub.load(model_url) 83 | name_prefix = 'RGB/inception_i3d/' 84 | 85 | print('Creating state_dict...') 86 | all_names = [] 87 | state_dict = OrderedDict() 88 | for var in i3d.variables: 89 | name = var.name[len(name_prefix):] 90 | new_name = convert_name(name) 91 | all_names.append(new_name) 92 | 93 | tensor = torch.FloatTensor(var.value().numpy()) 94 | new_tensor = convert_tensor(tensor) 95 | 96 | state_dict[new_name] = new_tensor 97 | 98 | if 'bn.bias' in new_name: 99 | new_name = new_name[:-4] + 'weight' # bn.weight 100 | new_tensor = torch.ones_like(new_tensor).float() 101 | state_dict[new_name] = new_tensor 102 | 103 | print(f'Complete state_dict with {len(state_dict)} entries') 104 | 105 | s = dict() 106 | for i, n in enumerate(all_names): 107 | s[n] = s.get(n, []) + [i] 108 | 109 | for k, v in s.items(): 110 | if len(v) > 1: 111 | print('dup', k) 112 | for i in v: 113 | print('\t', i3d.variables[i].name) 114 | 115 | print('Testing load_state_dict...') 116 | print('Creating model...') 117 | 118 | i3d = InceptionI3d(n_class, in_channels=3) 119 | 120 | print('Loading state_dict...') 121 | i3d.load_state_dict(state_dict) 122 | 123 | print(f'Saving state_dict as fvd/i3d_pretrained_{n_class}.pt') 124 | torch.save(state_dict, f'fvd/i3d_pretrained_{n_class}.pt') 125 | 126 | print('Done') 127 | 128 | -------------------------------------------------------------------------------- /data/data_utils/preprocess/video2frame_hdtf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import random 4 | from tqdm import tqdm 5 | import ffmpeg 6 | import pickle 7 | from multiprocessing import Pool 8 | import argparse, os, cv2, traceback, subprocess 9 | 10 | import matplotlib 11 | import numpy as np 12 | from datetime import datetime 13 | from matplotlib import pyplot as plt 14 | import sys 15 | from PIL import Image 16 | from glob import glob 17 | import PIL 18 | import PIL.Image 19 | import scipy 20 | import scipy.ndimage 21 | import skimage.io as io 22 | from scipy.ndimage import gaussian_filter1d 23 | from matplotlib import pyplot as plt 24 | import torch.nn.functional as F 25 | # import torchvision.utils as vutils 26 | import pdb 27 | 28 | import ffmpeg 29 | 30 | 31 | def get_video_info(input_file_path): 32 | 33 | probe = ffmpeg.probe(input_file_path) 34 | video_stream = next( 35 | (stream for stream in probe["streams"] if stream["codec_type"] == "video"), None 36 | ) 37 | frame_rate = float(video_stream["r_frame_rate"].split("/")[0]) 38 | frame_num = int(video_stream["nb_frames"]) 39 | return int(video_stream["height"]), int(video_stream["width"]), frame_rate, frame_num 40 | 41 | 42 | def multi_preprocess_video(x): 43 | (iden, output_folder, (height, width, frame_rate, total_frame_num)) = x 44 | 45 | input_file_path = os.path.join(vid_dir, iden, "video.mp4") 46 | os.makedirs(output_folder, exist_ok=True) 47 | 48 | video = cv2.VideoCapture(input_file_path) 49 | print(iden, frame_rate) 50 | 51 | count = 0 52 | while True: 53 | success, frame = video.read() 54 | if not success: 55 | break 56 | file_name = f"{count:0>5}" 57 | frame_path = os.path.join(output_folder, f"{file_name}.jpg") 58 | cv2.imwrite(frame_path, frame) 59 | count += 1 60 | 61 | 62 | return None 63 | 64 | 65 | def preprocess_video_folder( 66 | reprocessings, multi_processing, option=None, workers=32 67 | ): 68 | print("Preprocess start !!!") 69 | if option is None: 70 | option = {} 71 | 72 | reprocessings.sort() 73 | multi_output_frame_path_list = [ 74 | os.path.join(saving_dir, iden) for iden in reprocessings 75 | ] 76 | multi_vid_info_list = [ 77 | get_video_info(os.path.join(vid_dir, f"{iden}/video.mp4")) for iden in reprocessings 78 | ] 79 | 80 | def initializer(): 81 | sys.stdout = open(os.devnull, "w") 82 | 83 | if multi_processing: 84 | """ 85 | for real running 86 | """ 87 | pool = Pool(workers) 88 | total = len(reprocessings) 89 | 90 | with tqdm(total=total) as pbar: 91 | pool.imap(multi_preprocess_video, zip(reprocessings, multi_output_frame_path_list, multi_vid_info_list)) 92 | pbar.update() 93 | 94 | 95 | pool.close() 96 | pool.join() 97 | return 98 | 99 | 100 | def read_file(filepath: os.PathLike): 101 | """ 102 | Reads a file as a space-separated dataframe, where the first column is the index 103 | """ 104 | with open(filepath, "r") as f: 105 | lines = f.read().splitlines() 106 | lines = [l.split(":")[0] for l in lines] 107 | 108 | return lines 109 | 110 | 111 | if __name__ == "__main__": 112 | parser = argparse.ArgumentParser(description="Preprocessor") 113 | parser.add_argument("--multi_processing", type=bool, default=True) 114 | parser.add_argument( 115 | "--gpu", help="Number of GPUs across which to run in parallel", default=0, type=int 116 | ) 117 | 118 | vid_dir = "/media/data1/HDTF_preprocessed/25_fps/" 119 | saving_dir = "/media/data/HDTF_preprocessed/25_frame/HDTF" 120 | eval_list = os.listdir(vid_dir) 121 | 122 | process_id = [] 123 | for id_ in tqdm(eval_list): 124 | try: 125 | vid = f"{vid_dir}/{id_}/video.mp4" 126 | height, width, frame_rate, frame_num = get_video_info(vid) 127 | if frame_num != len(glob(os.path.join(saving_dir, id_, '*.jpg'))): 128 | process_id.append(id_) 129 | except: 130 | print(id_) 131 | 132 | print(len(process_id)) 133 | args = parser.parse_args() 134 | preprocess_video_folder( 135 | process_id, 136 | args.multi_processing, 137 | ) 138 | -------------------------------------------------------------------------------- /AToM/model/rotary_embedding_torch.py: -------------------------------------------------------------------------------- 1 | from inspect import isfunction 2 | from math import log, pi 3 | 4 | import torch 5 | from einops import rearrange, repeat 6 | from torch import einsum, nn 7 | 8 | # helper functions 9 | 10 | 11 | def exists(val): 12 | return val is not None 13 | 14 | 15 | def broadcat(tensors, dim=-1): 16 | num_tensors = len(tensors) 17 | shape_lens = set(list(map(lambda t: len(t.shape), tensors))) 18 | assert len(shape_lens) == 1, "tensors must all have the same number of dimensions" 19 | shape_len = list(shape_lens)[0] 20 | 21 | dim = (dim + shape_len) if dim < 0 else dim 22 | dims = list(zip(*map(lambda t: list(t.shape), tensors))) 23 | 24 | expandable_dims = [(i, val) for i, val in enumerate(dims) if i != dim] 25 | assert all( 26 | [*map(lambda t: len(set(t[1])) <= 2, expandable_dims)] 27 | ), "invalid dimensions for broadcastable concatentation" 28 | max_dims = list(map(lambda t: (t[0], max(t[1])), expandable_dims)) 29 | expanded_dims = list(map(lambda t: (t[0], (t[1],) * num_tensors), max_dims)) 30 | expanded_dims.insert(dim, (dim, dims[dim])) 31 | expandable_shapes = list(zip(*map(lambda t: t[1], expanded_dims))) 32 | tensors = list(map(lambda t: t[0].expand(*t[1]), zip(tensors, expandable_shapes))) 33 | return torch.cat(tensors, dim=dim) 34 | 35 | 36 | # rotary embedding helper functions 37 | 38 | 39 | def rotate_half(x): 40 | x = rearrange(x, "... (d r) -> ... d r", r=2) 41 | x1, x2 = x.unbind(dim=-1) 42 | x = torch.stack((-x2, x1), dim=-1) 43 | return rearrange(x, "... d r -> ... (d r)") 44 | 45 | 46 | def apply_rotary_emb(freqs, t, start_index=0): 47 | freqs = freqs.to(t) 48 | rot_dim = freqs.shape[-1] 49 | end_index = start_index + rot_dim 50 | assert ( 51 | rot_dim <= t.shape[-1] 52 | ), f"feature dimension {t.shape[-1]} is not of sufficient size to rotate in all the positions {rot_dim}" 53 | t_left, t, t_right = ( 54 | t[..., :start_index], 55 | t[..., start_index:end_index], 56 | t[..., end_index:], 57 | ) 58 | t = (t * freqs.cos()) + (rotate_half(t) * freqs.sin()) 59 | return torch.cat((t_left, t, t_right), dim=-1) 60 | 61 | 62 | # learned rotation helpers 63 | 64 | 65 | def apply_learned_rotations(rotations, t, start_index=0, freq_ranges=None): 66 | if exists(freq_ranges): 67 | rotations = einsum("..., f -> ... f", rotations, freq_ranges) 68 | rotations = rearrange(rotations, "... r f -> ... (r f)") 69 | 70 | rotations = repeat(rotations, "... n -> ... (n r)", r=2) 71 | return apply_rotary_emb(rotations, t, start_index=start_index) 72 | 73 | 74 | # classes 75 | 76 | 77 | class RotaryEmbedding(nn.Module): 78 | def __init__( 79 | self, 80 | dim, 81 | custom_freqs=None, 82 | freqs_for="lang", 83 | theta=10000, 84 | max_freq=10, 85 | num_freqs=1, 86 | learned_freq=False, 87 | ): 88 | super().__init__() 89 | if exists(custom_freqs): 90 | freqs = custom_freqs 91 | elif freqs_for == "lang": 92 | freqs = 1.0 / ( 93 | theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim) 94 | ) 95 | elif freqs_for == "pixel": 96 | freqs = torch.linspace(1.0, max_freq / 2, dim // 2) * pi 97 | elif freqs_for == "constant": 98 | freqs = torch.ones(num_freqs).float() 99 | else: 100 | raise ValueError(f"unknown modality {freqs_for}") 101 | 102 | self.cache = dict() 103 | 104 | if learned_freq: 105 | self.freqs = nn.Parameter(freqs) 106 | else: 107 | self.register_buffer("freqs", freqs) 108 | 109 | def rotate_queries_or_keys(self, t, seq_dim=-2): 110 | device = t.device 111 | seq_len = t.shape[seq_dim] 112 | freqs = self.forward( 113 | lambda: torch.arange(seq_len, device=device), cache_key=seq_len 114 | ) 115 | return apply_rotary_emb(freqs, t) 116 | 117 | def forward(self, t, cache_key=None): 118 | if exists(cache_key) and cache_key in self.cache: 119 | return self.cache[cache_key] 120 | 121 | if isfunction(t): 122 | t = t() 123 | 124 | freqs = self.freqs 125 | 126 | freqs = torch.einsum("..., f -> ... f", t.type(freqs.dtype), freqs) 127 | freqs = repeat(freqs, "... n -> ... (n r)", r=2) 128 | 129 | if exists(cache_key): 130 | self.cache[cache_key] = freqs 131 | 132 | return freqs 133 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/losses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from kornia.geometry import warp_affine 5 | import torch.nn.functional as F 6 | 7 | def resize_n_crop(image, M, dsize=112): 8 | # image: (b, c, h, w) 9 | # M : (b, 2, 3) 10 | return warp_affine(image, M, dsize=(dsize, dsize)) 11 | 12 | ### perceptual level loss 13 | class PerceptualLoss(nn.Module): 14 | def __init__(self, recog_net, input_size=112): 15 | super(PerceptualLoss, self).__init__() 16 | self.recog_net = recog_net 17 | self.preprocess = lambda x: 2 * x - 1 18 | self.input_size=input_size 19 | def forward(imageA, imageB, M): 20 | """ 21 | 1 - cosine distance 22 | Parameters: 23 | imageA --torch.tensor (B, 3, H, W), range (0, 1) , RGB order 24 | imageB --same as imageA 25 | """ 26 | 27 | imageA = self.preprocess(resize_n_crop(imageA, M, self.input_size)) 28 | imageB = self.preprocess(resize_n_crop(imageB, M, self.input_size)) 29 | 30 | # freeze bn 31 | self.recog_net.eval() 32 | 33 | id_featureA = F.normalize(self.recog_net(imageA), dim=-1, p=2) 34 | id_featureB = F.normalize(self.recog_net(imageB), dim=-1, p=2) 35 | cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) 36 | # assert torch.sum((cosine_d > 1).float()) == 0 37 | return torch.sum(1 - cosine_d) / cosine_d.shape[0] 38 | 39 | def perceptual_loss(id_featureA, id_featureB): 40 | cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) 41 | # assert torch.sum((cosine_d > 1).float()) == 0 42 | return torch.sum(1 - cosine_d) / cosine_d.shape[0] 43 | 44 | ### image level loss 45 | def photo_loss(imageA, imageB, mask, eps=1e-6): 46 | """ 47 | l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur) 48 | Parameters: 49 | imageA --torch.tensor (B, 3, H, W), range (0, 1), RGB order 50 | imageB --same as imageA 51 | """ 52 | loss = torch.sqrt(eps + torch.sum((imageA - imageB) ** 2, dim=1, keepdims=True)) * mask 53 | loss = torch.sum(loss) / torch.max(torch.sum(mask), torch.tensor(1.0).to(mask.device)) 54 | return loss 55 | 56 | def landmark_loss(predict_lm, gt_lm, weight=None): 57 | """ 58 | weighted mse loss 59 | Parameters: 60 | predict_lm --torch.tensor (B, 68, 2) 61 | gt_lm --torch.tensor (B, 68, 2) 62 | weight --numpy.array (1, 68) 63 | """ 64 | if not weight: 65 | weight = np.ones([68]) 66 | weight[28:31] = 20 67 | weight[-8:] = 20 68 | weight = np.expand_dims(weight, 0) 69 | weight = torch.tensor(weight).to(predict_lm.device) 70 | loss = torch.sum((predict_lm - gt_lm)**2, dim=-1) * weight 71 | loss = torch.sum(loss) / (predict_lm.shape[0] * predict_lm.shape[1]) 72 | return loss 73 | 74 | 75 | ### regulization 76 | def reg_loss(coeffs_dict, opt=None): 77 | """ 78 | l2 norm without the sqrt, from yu's implementation (mse) 79 | tf.nn.l2_loss https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss 80 | Parameters: 81 | coeffs_dict -- a dict of torch.tensors , keys: id, exp, tex, angle, gamma, trans 82 | 83 | """ 84 | # coefficient regularization to ensure plausible 3d faces 85 | if opt: 86 | w_id, w_exp, w_tex = opt.w_id, opt.w_exp, opt.w_tex 87 | else: 88 | w_id, w_exp, w_tex = 1, 1, 1, 1 89 | creg_loss = w_id * torch.sum(coeffs_dict['id'] ** 2) + \ 90 | w_exp * torch.sum(coeffs_dict['exp'] ** 2) + \ 91 | w_tex * torch.sum(coeffs_dict['tex'] ** 2) 92 | creg_loss = creg_loss / coeffs_dict['id'].shape[0] 93 | 94 | # gamma regularization to ensure a nearly-monochromatic light 95 | gamma = coeffs_dict['gamma'].reshape([-1, 3, 9]) 96 | gamma_mean = torch.mean(gamma, dim=1, keepdims=True) 97 | gamma_loss = torch.mean((gamma - gamma_mean) ** 2) 98 | 99 | return creg_loss, gamma_loss 100 | 101 | def reflectance_loss(texture, mask): 102 | """ 103 | minimize texture variance (mse), albedo regularization to ensure an uniform skin albedo 104 | Parameters: 105 | texture --torch.tensor, (B, N, 3) 106 | mask --torch.tensor, (N), 1 or 0 107 | 108 | """ 109 | mask = mask.reshape([1, mask.shape[0], 1]) 110 | texture_mean = torch.sum(mask * texture, dim=1, keepdims=True) / torch.sum(mask) 111 | loss = torch.sum(((texture - texture_mean) * mask)**2) / (texture.shape[0] * torch.sum(mask)) 112 | return loss 113 | 114 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/data/flist_dataset.py: -------------------------------------------------------------------------------- 1 | """This script defines the custom dataset for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import os.path 5 | from data.base_dataset import BaseDataset, get_transform, get_affine_mat, apply_img_affine, apply_lm_affine 6 | from data.image_folder import make_dataset 7 | from PIL import Image 8 | import random 9 | import util.util as util 10 | import numpy as np 11 | import json 12 | import torch 13 | from scipy.io import loadmat, savemat 14 | import pickle 15 | from util.preprocess import align_img, estimate_norm 16 | from util.load_mats import load_lm3d 17 | 18 | 19 | def default_flist_reader(flist): 20 | """ 21 | flist format: impath label\nimpath label\n ...(same to caffe's filelist) 22 | """ 23 | imlist = [] 24 | with open(flist, 'r') as rf: 25 | for line in rf.readlines(): 26 | impath = line.strip() 27 | imlist.append(impath) 28 | 29 | return imlist 30 | 31 | def jason_flist_reader(flist): 32 | with open(flist, 'r') as fp: 33 | info = json.load(fp) 34 | return info 35 | 36 | def parse_label(label): 37 | return torch.tensor(np.array(label).astype(np.float32)) 38 | 39 | 40 | class FlistDataset(BaseDataset): 41 | """ 42 | It requires one directories to host training images '/path/to/data/train' 43 | You can train the model with the dataset flag '--dataroot /path/to/data'. 44 | """ 45 | 46 | def __init__(self, opt): 47 | """Initialize this dataset class. 48 | 49 | Parameters: 50 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 51 | """ 52 | BaseDataset.__init__(self, opt) 53 | 54 | self.lm3d_std = load_lm3d(opt.bfm_folder) 55 | 56 | msk_names = default_flist_reader(opt.flist) 57 | self.msk_paths = [os.path.join(opt.data_root, i) for i in msk_names] 58 | 59 | self.size = len(self.msk_paths) 60 | self.opt = opt 61 | 62 | self.name = 'train' if opt.isTrain else 'val' 63 | if '_' in opt.flist: 64 | self.name += '_' + opt.flist.split(os.sep)[-1].split('_')[0] 65 | 66 | 67 | def __getitem__(self, index): 68 | """Return a data point and its metadata information. 69 | 70 | Parameters: 71 | index (int) -- a random integer for data indexing 72 | 73 | Returns a dictionary that contains A, B, A_paths and B_paths 74 | img (tensor) -- an image in the input domain 75 | msk (tensor) -- its corresponding attention mask 76 | lm (tensor) -- its corresponding 3d landmarks 77 | im_paths (str) -- image paths 78 | aug_flag (bool) -- a flag used to tell whether its raw or augmented 79 | """ 80 | msk_path = self.msk_paths[index % self.size] # make sure index is within then range 81 | img_path = msk_path.replace('mask/', '') 82 | lm_path = '.'.join(msk_path.replace('mask', 'landmarks').split('.')[:-1]) + '.txt' 83 | 84 | raw_img = Image.open(img_path).convert('RGB') 85 | raw_msk = Image.open(msk_path).convert('RGB') 86 | raw_lm = np.loadtxt(lm_path).astype(np.float32) 87 | 88 | _, img, lm, msk = align_img(raw_img, raw_lm, self.lm3d_std, raw_msk) 89 | 90 | aug_flag = self.opt.use_aug and self.opt.isTrain 91 | if aug_flag: 92 | img, lm, msk = self._augmentation(img, lm, self.opt, msk) 93 | 94 | _, H = img.size 95 | M = estimate_norm(lm, H) 96 | transform = get_transform() 97 | img_tensor = transform(img) 98 | msk_tensor = transform(msk)[:1, ...] 99 | lm_tensor = parse_label(lm) 100 | M_tensor = parse_label(M) 101 | 102 | 103 | return {'imgs': img_tensor, 104 | 'lms': lm_tensor, 105 | 'msks': msk_tensor, 106 | 'M': M_tensor, 107 | 'im_paths': img_path, 108 | 'aug_flag': aug_flag, 109 | 'dataset': self.name} 110 | 111 | def _augmentation(self, img, lm, opt, msk=None): 112 | affine, affine_inv, flip = get_affine_mat(opt, img.size) 113 | img = apply_img_affine(img, affine_inv) 114 | lm = apply_lm_affine(lm, affine, flip, img.size) 115 | if msk is not None: 116 | msk = apply_img_affine(msk, affine_inv, method=Image.BILINEAR) 117 | return img, lm, msk 118 | 119 | 120 | 121 | 122 | def __len__(self): 123 | """Return the total number of images in the dataset. 124 | """ 125 | return self.size 126 | -------------------------------------------------------------------------------- /MToV/evals/fvd/fvd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn.functional as F 4 | 5 | def preprocess_single(video, resolution, sequence_length=None): 6 | # video: THWC, {0, ..., 255} 7 | video = video.permute(0, 3, 1, 2).float() / 255. # TCHW 8 | t, c, h, w = video.shape 9 | 10 | # temporal crop 11 | if sequence_length is not None: 12 | assert sequence_length <= t 13 | video = video[:sequence_length] 14 | 15 | # scale shorter side to resolution 16 | scale = resolution / min(h, w) 17 | if h < w: 18 | target_size = (resolution, math.ceil(w * scale)) 19 | else: 20 | target_size = (math.ceil(h * scale), resolution) 21 | video = F.interpolate(video, size=target_size, mode='bilinear', 22 | align_corners=False) 23 | 24 | # center crop 25 | t, c, h, w = video.shape 26 | w_start = (w - resolution) // 2 27 | h_start = (h - resolution) // 2 28 | video = video[:, :, h_start:h_start + resolution, w_start:w_start + resolution] 29 | video = video.permute(1, 0, 2, 3).contiguous() # CTHW 30 | 31 | video -= 0.5 32 | 33 | return video 34 | 35 | def preprocess(videos, target_resolution=224): 36 | # videos in {0, ..., 255} as np.uint8 array 37 | b, t, h, w, c = videos.shape 38 | videos = torch.from_numpy(videos) 39 | videos = torch.stack([preprocess_single(video, target_resolution) for video in videos]) 40 | return videos * 2 # [-0.5, 0.5] -> [-1, 1] 41 | 42 | def get_fvd_logits(videos, i3d, device): 43 | videos = preprocess(videos) 44 | embeddings = get_logits(i3d, videos, device) 45 | return embeddings 46 | 47 | # https://github.com/tensorflow/gan/blob/de4b8da3853058ea380a6152bd3bd454013bf619/tensorflow_gan/python/eval/classifier_metrics.py#L161 48 | def _symmetric_matrix_square_root(mat, eps=1e-10): 49 | u, s, v = torch.svd(mat) 50 | si = torch.where(s < eps, s, torch.sqrt(s)) 51 | return torch.matmul(torch.matmul(u, torch.diag(si)), v.t()) 52 | 53 | # https://github.com/tensorflow/gan/blob/de4b8da3853058ea380a6152bd3bd454013bf619/tensorflow_gan/python/eval/classifier_metrics.py#L400 54 | def trace_sqrt_product(sigma, sigma_v): 55 | sqrt_sigma = _symmetric_matrix_square_root(sigma) 56 | sqrt_a_sigmav_a = torch.matmul(sqrt_sigma, torch.matmul(sigma_v, sqrt_sigma)) 57 | return torch.trace(_symmetric_matrix_square_root(sqrt_a_sigmav_a)) 58 | 59 | # https://discuss.pytorch.org/t/covariance-and-gradient-support/16217/2 60 | def cov(m, rowvar=False): 61 | '''Estimate a covariance matrix given data. 62 | 63 | Covariance indicates the level to which two variables vary together. 64 | If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`, 65 | then the covariance matrix element `C_{ij}` is the covariance of 66 | `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`. 67 | 68 | Args: 69 | m: A 1-D or 2-D array containing multiple variables and observations. 70 | Each row of `m` represents a variable, and each column a single 71 | observation of all those variables. 72 | rowvar: If `rowvar` is True, then each row represents a 73 | variable, with observations in the columns. Otherwise, the 74 | relationship is transposed: each column represents a variable, 75 | while the rows contain observations. 76 | 77 | Returns: 78 | The covariance matrix of the variables. 79 | ''' 80 | if m.dim() > 2: 81 | raise ValueError('m has more than 2 dimensions') 82 | if m.dim() < 2: 83 | m = m.view(1, -1) 84 | if not rowvar and m.size(0) != 1: 85 | m = m.t() 86 | 87 | fact = 1.0 / (m.size(1) - 1) # unbiased estimate 88 | m -= torch.mean(m, dim=1, keepdim=True) 89 | mt = m.t() # if complex: mt = m.t().conj() 90 | return fact * m.matmul(mt).squeeze() 91 | 92 | 93 | def frechet_distance(x1, x2): 94 | x1 = x1.flatten(start_dim=1) 95 | x2 = x2.flatten(start_dim=1) 96 | m, m_w = x1.mean(dim=0), x2.mean(dim=0) 97 | sigma, sigma_w = cov(x1, rowvar=False), cov(x2, rowvar=False) 98 | 99 | sqrt_trace_component = trace_sqrt_product(sigma, sigma_w) 100 | trace = torch.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component 101 | 102 | mean = torch.sum((m - m_w) ** 2) 103 | fd = trace + mean 104 | return fd 105 | 106 | 107 | def get_logits(i3d, videos, device): 108 | """ 109 | assert videos.shape[0] % 16 == 0 110 | with torch.no_grad(): 111 | logits = [] 112 | for i in range(0, videos.shape[0], 16): 113 | batch = videos[i:i + 16].to(device) 114 | logits.append(i3d(batch)) 115 | logits = torch.cat(logits, dim=0) 116 | return logits 117 | """ 118 | 119 | with torch.no_grad(): 120 | logits = i3d(videos.to(device)) 121 | return logits 122 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/utils_distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.distributed as dist 8 | from torch.utils.data import DistributedSampler as _DistributedSampler 9 | 10 | 11 | def setup_seed(seed, cuda_deterministic=True): 12 | torch.manual_seed(seed) 13 | torch.cuda.manual_seed_all(seed) 14 | np.random.seed(seed) 15 | random.seed(seed) 16 | os.environ["PYTHONHASHSEED"] = str(seed) 17 | if cuda_deterministic: # slower, more reproducible 18 | torch.backends.cudnn.deterministic = True 19 | torch.backends.cudnn.benchmark = False 20 | else: # faster, less reproducible 21 | torch.backends.cudnn.deterministic = False 22 | torch.backends.cudnn.benchmark = True 23 | 24 | 25 | def worker_init_fn(worker_id, num_workers, rank, seed): 26 | # The seed of each worker equals to 27 | # num_worker * rank + worker_id + user_seed 28 | worker_seed = num_workers * rank + worker_id + seed 29 | np.random.seed(worker_seed) 30 | random.seed(worker_seed) 31 | torch.manual_seed(worker_seed) 32 | 33 | 34 | def get_dist_info(): 35 | if dist.is_available() and dist.is_initialized(): 36 | rank = dist.get_rank() 37 | world_size = dist.get_world_size() 38 | else: 39 | rank = 0 40 | world_size = 1 41 | 42 | return rank, world_size 43 | 44 | 45 | def sync_random_seed(seed=None, device="cuda"): 46 | """Make sure different ranks share the same seed. 47 | All workers must call this function, otherwise it will deadlock. 48 | This method is generally used in `DistributedSampler`, 49 | because the seed should be identical across all processes 50 | in the distributed group. 51 | In distributed sampling, different ranks should sample non-overlapped 52 | data in the dataset. Therefore, this function is used to make sure that 53 | each rank shuffles the data indices in the same order based 54 | on the same seed. Then different ranks could use different indices 55 | to select non-overlapped data from the same data list. 56 | Args: 57 | seed (int, Optional): The seed. Default to None. 58 | device (str): The device where the seed will be put on. 59 | Default to 'cuda'. 60 | Returns: 61 | int: Seed to be used. 62 | """ 63 | if seed is None: 64 | seed = np.random.randint(2**31) 65 | assert isinstance(seed, int) 66 | 67 | rank, world_size = get_dist_info() 68 | 69 | if world_size == 1: 70 | return seed 71 | 72 | if rank == 0: 73 | random_num = torch.tensor(seed, dtype=torch.int32, device=device) 74 | else: 75 | random_num = torch.tensor(0, dtype=torch.int32, device=device) 76 | 77 | dist.broadcast(random_num, src=0) 78 | 79 | return random_num.item() 80 | 81 | 82 | class DistributedSampler(_DistributedSampler): 83 | def __init__( 84 | self, 85 | dataset, 86 | num_replicas=None, # world_size 87 | rank=None, # local_rank 88 | shuffle=True, 89 | seed=0, 90 | ): 91 | 92 | super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 93 | 94 | # In distributed sampling, different ranks should sample 95 | # non-overlapped data in the dataset. Therefore, this function 96 | # is used to make sure that each rank shuffles the data indices 97 | # in the same order based on the same seed. Then different ranks 98 | # could use different indices to select non-overlapped data from the 99 | # same data list. 100 | self.seed = sync_random_seed(seed) 101 | 102 | def __iter__(self): 103 | # deterministically shuffle based on epoch 104 | if self.shuffle: 105 | g = torch.Generator() 106 | # When :attr:`shuffle=True`, this ensures all replicas 107 | # use a different random ordering for each epoch. 108 | # Otherwise, the next iteration of this sampler will 109 | # yield the same ordering. 110 | g.manual_seed(self.epoch + self.seed) 111 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 112 | else: 113 | indices = torch.arange(len(self.dataset)).tolist() 114 | 115 | # add extra samples to make it evenly divisible 116 | # in case that indices is shorter than half of total_size 117 | indices = (indices * math.ceil(self.total_size / len(indices)))[ 118 | : self.total_size 119 | ] 120 | assert len(indices) == self.total_size 121 | 122 | # subsample 123 | indices = indices[self.rank : self.total_size : self.num_replicas] 124 | assert len(indices) == self.num_samples 125 | 126 | return iter(indices) 127 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/data/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes all the modules related to data loading and preprocessing 2 | 3 | To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset. 4 | You need to implement four functions: 5 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). 6 | -- <__len__>: return the size of dataset. 7 | -- <__getitem__>: get a data point from data loader. 8 | -- : (optionally) add dataset-specific options and set default options. 9 | 10 | Now you can use the dataset class by specifying flag '--dataset_mode dummy'. 11 | See our template dataset class 'template_dataset.py' for more details. 12 | """ 13 | import numpy as np 14 | import importlib 15 | import torch.utils.data 16 | from data.base_dataset import BaseDataset 17 | 18 | 19 | def find_dataset_using_name(dataset_name): 20 | """Import the module "data/[dataset_name]_dataset.py". 21 | 22 | In the file, the class called DatasetNameDataset() will 23 | be instantiated. It has to be a subclass of BaseDataset, 24 | and it is case-insensitive. 25 | """ 26 | dataset_filename = "data." + dataset_name + "_dataset" 27 | datasetlib = importlib.import_module(dataset_filename) 28 | 29 | dataset = None 30 | target_dataset_name = dataset_name.replace('_', '') + 'dataset' 31 | for name, cls in datasetlib.__dict__.items(): 32 | if name.lower() == target_dataset_name.lower() \ 33 | and issubclass(cls, BaseDataset): 34 | dataset = cls 35 | 36 | if dataset is None: 37 | raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) 38 | 39 | return dataset 40 | 41 | 42 | def get_option_setter(dataset_name): 43 | """Return the static method of the dataset class.""" 44 | dataset_class = find_dataset_using_name(dataset_name) 45 | return dataset_class.modify_commandline_options 46 | 47 | 48 | def create_dataset(opt, rank=0): 49 | """Create a dataset given the option. 50 | 51 | This function wraps the class CustomDatasetDataLoader. 52 | This is the main interface between this package and 'train.py'/'test.py' 53 | 54 | Example: 55 | >>> from data import create_dataset 56 | >>> dataset = create_dataset(opt) 57 | """ 58 | data_loader = CustomDatasetDataLoader(opt, rank=rank) 59 | dataset = data_loader.load_data() 60 | return dataset 61 | 62 | class CustomDatasetDataLoader(): 63 | """Wrapper class of Dataset class that performs multi-threaded data loading""" 64 | 65 | def __init__(self, opt, rank=0): 66 | """Initialize this class 67 | 68 | Step 1: create a dataset instance given the name [dataset_mode] 69 | Step 2: create a multi-threaded data loader. 70 | """ 71 | self.opt = opt 72 | dataset_class = find_dataset_using_name(opt.dataset_mode) 73 | self.dataset = dataset_class(opt) 74 | self.sampler = None 75 | print("rank %d %s dataset [%s] was created" % (rank, self.dataset.name, type(self.dataset).__name__)) 76 | if opt.use_ddp and opt.isTrain: 77 | world_size = opt.world_size 78 | self.sampler = torch.utils.data.distributed.DistributedSampler( 79 | self.dataset, 80 | num_replicas=world_size, 81 | rank=rank, 82 | shuffle=not opt.serial_batches 83 | ) 84 | self.dataloader = torch.utils.data.DataLoader( 85 | self.dataset, 86 | sampler=self.sampler, 87 | num_workers=int(opt.num_threads / world_size), 88 | batch_size=int(opt.batch_size / world_size), 89 | drop_last=True) 90 | else: 91 | self.dataloader = torch.utils.data.DataLoader( 92 | self.dataset, 93 | batch_size=opt.batch_size, 94 | shuffle=(not opt.serial_batches) and opt.isTrain, 95 | num_workers=int(opt.num_threads), 96 | drop_last=True 97 | ) 98 | 99 | def set_epoch(self, epoch): 100 | self.dataset.current_epoch = epoch 101 | if self.sampler is not None: 102 | self.sampler.set_epoch(epoch) 103 | 104 | def load_data(self): 105 | return self 106 | 107 | def __len__(self): 108 | """Return the number of data in the dataset""" 109 | return min(len(self.dataset), self.opt.max_dataset_size) 110 | 111 | def __iter__(self): 112 | """Return a batch of data""" 113 | for i, data in enumerate(self.dataloader): 114 | if i * self.opt.batch_size >= self.opt.max_dataset_size: 115 | break 116 | yield data 117 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/install_dali.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | ## Prerequisites 3 | 4 | 1. Linux x64. 5 | 2. NVIDIA Driver supporting CUDA 10.0 or later (i.e., 410.48 or later driver releases). 6 | 3. (Optional) One or more of the following deep learning frameworks: 7 | 8 | * [MXNet 1.3](http://mxnet.incubator.apache.org/) `mxnet-cu100` or later. 9 | * [PyTorch 0.4](https://pytorch.org/) or later. 10 | * [TensorFlow 1.7](https://www.tensorflow.org/) or later. 11 | 12 | ## DALI in NGC Containers 13 | DALI is preinstalled in the TensorFlow, PyTorch, and MXNet containers in versions 18.07 and later on NVIDIA GPU Cloud. 14 | 15 | ## pip - Official Releases 16 | 17 | ### nvidia-dali 18 | 19 | Execute the following command to install the latest DALI for specified CUDA version (please check support matrix to see if your platform is supported): 20 | 21 | * For CUDA 10.2: 22 | 23 | ```bash 24 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda102 25 | ``` 26 | 27 | * For CUDA 11.0: 28 | 29 | ```bash 30 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110 31 | ``` 32 | 33 | 34 | > Note: CUDA 11.0 build uses CUDA toolkit enhanced compatibility. It is built with the latest CUDA 11.x toolkit while it can run on the latest, stable CUDA 11.0 capable drivers (450.80 or later). Using the latest driver may enable additional functionality. More details can be found in [enhanced CUDA compatibility guide](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#enhanced-compat-minor-releases). 35 | 36 | > Note: Please always use the latest version of pip available (at least >= 19.3) and update when possible by issuing pip install –upgrade pip 37 | 38 | ### nvidia-dali-tf-plugin 39 | 40 | DALI doesn’t contain prebuilt versions of the DALI TensorFlow plugin. It needs to be installed as a separate package which will be built against the currently installed version of TensorFlow: 41 | 42 | * For CUDA 10.2: 43 | 44 | ```bash 45 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-tf-plugin-cuda102 46 | ``` 47 | 48 | * For CUDA 11.0: 49 | 50 | ```bash 51 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-tf-plugin-cuda110 52 | ``` 53 | 54 | Installing this package will install `nvidia-dali-cudaXXX` and its dependencies, if they are not already installed. The package `tensorflow-gpu` must be installed before attempting to install `nvidia-dali-tf-plugin-cudaXXX`. 55 | 56 | > Note: The packages `nvidia-dali-tf-plugin-cudaXXX` and `nvidia-dali-cudaXXX` should be in exactly the same version. Therefore, installing the latest `nvidia-dali-tf-plugin-cudaXXX`, will replace any older `nvidia-dali-cudaXXX` version already installed. To work with older versions of DALI, provide the version explicitly to the `pip install` command. 57 | 58 | ### pip - Nightly and Weekly Releases¶ 59 | 60 | > Note: While binaries available to download from nightly and weekly builds include most recent changes available in the GitHub some functionalities may not work or provide inferior performance comparing to the official releases. Those builds are meant for the early adopters seeking for the most recent version available and being ready to boldly go where no man has gone before. 61 | 62 | > Note: It is recommended to uninstall regular DALI and TensorFlow plugin before installing nightly or weekly builds as they are installed in the same path 63 | 64 | #### Nightly Builds 65 | To access most recent nightly builds please use flowing release channel: 66 | 67 | * For CUDA 10.2: 68 | 69 | ```bash 70 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-nightly-cuda102 71 | ``` 72 | 73 | ``` 74 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-tf-plugin-nightly-cuda102 75 | ``` 76 | 77 | * For CUDA 11.0: 78 | 79 | ```bash 80 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-nightly-cuda110 81 | ``` 82 | 83 | ```bash 84 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-tf-plugin-nightly-cuda110 85 | ``` 86 | 87 | 88 | #### Weekly Builds 89 | 90 | Also, there is a weekly release channel with more thorough testing. To access most recent weekly builds please use the following release channel (available only for CUDA 11): 91 | 92 | ```bash 93 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/weekly --upgrade nvidia-dali-weekly-cuda110 94 | ``` 95 | 96 | ```bash 97 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/weekly --upgrade nvidia-dali-tf-plugin-week 98 | ``` 99 | 100 | 101 | --- 102 | 103 | ### For more information about Dali and installation, please refer to [DALI documentation](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/installation.html). 104 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/data/base_dataset.py: -------------------------------------------------------------------------------- 1 | """This module implements an abstract base class (ABC) 'BaseDataset' for datasets. 2 | 3 | It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses. 4 | """ 5 | import random 6 | import numpy as np 7 | import torch.utils.data as data 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | from abc import ABC, abstractmethod 11 | 12 | 13 | class BaseDataset(data.Dataset, ABC): 14 | """This class is an abstract base class (ABC) for datasets. 15 | 16 | To create a subclass, you need to implement the following four functions: 17 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). 18 | -- <__len__>: return the size of dataset. 19 | -- <__getitem__>: get a data point. 20 | -- : (optionally) add dataset-specific options and set default options. 21 | """ 22 | 23 | def __init__(self, opt): 24 | """Initialize the class; save the options in the class 25 | 26 | Parameters: 27 | opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions 28 | """ 29 | self.opt = opt 30 | # self.root = opt.dataroot 31 | self.current_epoch = 0 32 | 33 | @staticmethod 34 | def modify_commandline_options(parser, is_train): 35 | """Add new dataset-specific options, and rewrite default values for existing options. 36 | 37 | Parameters: 38 | parser -- original option parser 39 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 40 | 41 | Returns: 42 | the modified parser. 43 | """ 44 | return parser 45 | 46 | @abstractmethod 47 | def __len__(self): 48 | """Return the total number of images in the dataset.""" 49 | return 0 50 | 51 | @abstractmethod 52 | def __getitem__(self, index): 53 | """Return a data point and its metadata information. 54 | 55 | Parameters: 56 | index - - a random integer for data indexing 57 | 58 | Returns: 59 | a dictionary of data with their names. It ususally contains the data itself and its metadata information. 60 | """ 61 | pass 62 | 63 | 64 | def get_transform(grayscale=False): 65 | transform_list = [] 66 | if grayscale: 67 | transform_list.append(transforms.Grayscale(1)) 68 | transform_list += [transforms.ToTensor()] 69 | return transforms.Compose(transform_list) 70 | 71 | def get_affine_mat(opt, size): 72 | shift_x, shift_y, scale, rot_angle, flip = 0., 0., 1., 0., False 73 | w, h = size 74 | 75 | if 'shift' in opt.preprocess: 76 | shift_pixs = int(opt.shift_pixs) 77 | shift_x = random.randint(-shift_pixs, shift_pixs) 78 | shift_y = random.randint(-shift_pixs, shift_pixs) 79 | if 'scale' in opt.preprocess: 80 | scale = 1 + opt.scale_delta * (2 * random.random() - 1) 81 | if 'rot' in opt.preprocess: 82 | rot_angle = opt.rot_angle * (2 * random.random() - 1) 83 | rot_rad = -rot_angle * np.pi/180 84 | if 'flip' in opt.preprocess: 85 | flip = random.random() > 0.5 86 | 87 | shift_to_origin = np.array([1, 0, -w//2, 0, 1, -h//2, 0, 0, 1]).reshape([3, 3]) 88 | flip_mat = np.array([-1 if flip else 1, 0, 0, 0, 1, 0, 0, 0, 1]).reshape([3, 3]) 89 | shift_mat = np.array([1, 0, shift_x, 0, 1, shift_y, 0, 0, 1]).reshape([3, 3]) 90 | rot_mat = np.array([np.cos(rot_rad), np.sin(rot_rad), 0, -np.sin(rot_rad), np.cos(rot_rad), 0, 0, 0, 1]).reshape([3, 3]) 91 | scale_mat = np.array([scale, 0, 0, 0, scale, 0, 0, 0, 1]).reshape([3, 3]) 92 | shift_to_center = np.array([1, 0, w//2, 0, 1, h//2, 0, 0, 1]).reshape([3, 3]) 93 | 94 | affine = shift_to_center @ scale_mat @ rot_mat @ shift_mat @ flip_mat @ shift_to_origin 95 | affine_inv = np.linalg.inv(affine) 96 | return affine, affine_inv, flip 97 | 98 | def apply_img_affine(img, affine_inv, method=Image.BICUBIC): 99 | return img.transform(img.size, Image.AFFINE, data=affine_inv.flatten()[:6], resample=Image.BICUBIC) 100 | 101 | def apply_lm_affine(landmark, affine, flip, size): 102 | _, h = size 103 | lm = landmark.copy() 104 | lm[:, 1] = h - 1 - lm[:, 1] 105 | lm = np.concatenate((lm, np.ones([lm.shape[0], 1])), -1) 106 | lm = lm @ np.transpose(affine) 107 | lm[:, :2] = lm[:, :2] / lm[:, 2:] 108 | lm = lm[:, :2] 109 | lm[:, 1] = h - 1 - lm[:, 1] 110 | if flip: 111 | lm_ = lm.copy() 112 | lm_[:17] = lm[16::-1] 113 | lm_[17:22] = lm[26:21:-1] 114 | lm_[22:27] = lm[21:16:-1] 115 | lm_[31:36] = lm[35:30:-1] 116 | lm_[36:40] = lm[45:41:-1] 117 | lm_[40:42] = lm[47:45:-1] 118 | lm_[42:46] = lm[39:35:-1] 119 | lm_[46:48] = lm[41:39:-1] 120 | lm_[48:55] = lm[54:47:-1] 121 | lm_[55:60] = lm[59:54:-1] 122 | lm_[60:65] = lm[64:59:-1] 123 | lm_[65:68] = lm[67:64:-1] 124 | lm = lm_ 125 | return lm 126 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/util/load_mats.py: -------------------------------------------------------------------------------- 1 | """This script is to load 3D face model for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import numpy as np 5 | from PIL import Image 6 | from scipy.io import loadmat, savemat 7 | from array import array 8 | import os.path as osp 9 | 10 | 11 | # load expression basis 12 | def LoadExpBasis(bfm_folder="BFM"): 13 | n_vertex = 53215 14 | bfm_folder = "deep_3drecon/" + bfm_folder 15 | 16 | Expbin = open(osp.join(bfm_folder, "Exp_Pca.bin"), "rb") 17 | exp_dim = array("i") 18 | exp_dim.fromfile(Expbin, 1) 19 | expMU = array("f") 20 | expPC = array("f") 21 | expMU.fromfile(Expbin, 3 * n_vertex) 22 | expPC.fromfile(Expbin, 3 * exp_dim[0] * n_vertex) 23 | Expbin.close() 24 | 25 | expPC = np.array(expPC) 26 | expPC = np.reshape(expPC, [exp_dim[0], -1]) 27 | expPC = np.transpose(expPC) 28 | 29 | expEV = np.loadtxt(osp.join(bfm_folder, "std_exp.txt")) 30 | 31 | return expPC, expEV 32 | 33 | 34 | # transfer original BFM09 to our face model 35 | def transferBFM09(bfm_folder="BFM"): 36 | print("Transfer BFM09 to BFM_model_front......") 37 | # seyeon 38 | # if "aux_models/aux_models/" in bfm_folder: 39 | # bfm_folder = bfm_folder.replace("aux_models/aux_models/", "aux_models/") 40 | original_BFM = loadmat(osp.join(bfm_folder, "01_MorphableModel.mat")) 41 | 42 | shapePC = original_BFM["shapePC"] # shape basis 43 | shapeEV = original_BFM["shapeEV"] # corresponding eigen value 44 | shapeMU = original_BFM["shapeMU"] # mean face 45 | texPC = original_BFM["texPC"] # texture basis 46 | texEV = original_BFM["texEV"] # eigen value 47 | texMU = original_BFM["texMU"] # mean texture 48 | 49 | expPC, expEV = LoadExpBasis() 50 | 51 | # transfer BFM09 to our face model 52 | 53 | idBase = shapePC * np.reshape(shapeEV, [-1, 199]) 54 | idBase = idBase / 1e5 # unify the scale to decimeter 55 | idBase = idBase[:, :80] # use only first 80 basis 56 | 57 | exBase = expPC * np.reshape(expEV, [-1, 79]) 58 | exBase = exBase / 1e5 # unify the scale to decimeter 59 | exBase = exBase[:, :64] # use only first 64 basis 60 | 61 | texBase = texPC * np.reshape(texEV, [-1, 199]) 62 | texBase = texBase[:, :80] # use only first 80 basis 63 | 64 | # our face model is cropped along face landmarks and contains only 35709 vertex. 65 | # original BFM09 contains 53490 vertex, and expression basis provided by Guo et al. contains 53215 vertex. 66 | # thus we select corresponding vertex to get our face model. 67 | 68 | index_exp = loadmat(osp.join(bfm_folder, "BFM_front_idx.mat")) 69 | index_exp = index_exp["idx"].astype(np.int32) - 1 # starts from 0 (to 53215) 70 | 71 | index_shape = loadmat(osp.join(bfm_folder, "BFM_exp_idx.mat")) 72 | index_shape = index_shape["trimIndex"].astype(np.int32) - 1 # starts from 0 (to 53490) 73 | index_shape = index_shape[index_exp] 74 | 75 | idBase = np.reshape(idBase, [-1, 3, 80]) 76 | idBase = idBase[index_shape, :, :] 77 | idBase = np.reshape(idBase, [-1, 80]) 78 | 79 | texBase = np.reshape(texBase, [-1, 3, 80]) 80 | texBase = texBase[index_shape, :, :] 81 | texBase = np.reshape(texBase, [-1, 80]) 82 | 83 | exBase = np.reshape(exBase, [-1, 3, 64]) 84 | exBase = exBase[index_exp, :, :] 85 | exBase = np.reshape(exBase, [-1, 64]) 86 | 87 | meanshape = np.reshape(shapeMU, [-1, 3]) / 1e5 88 | meanshape = meanshape[index_shape, :] 89 | meanshape = np.reshape(meanshape, [1, -1]) 90 | 91 | meantex = np.reshape(texMU, [-1, 3]) 92 | meantex = meantex[index_shape, :] 93 | meantex = np.reshape(meantex, [1, -1]) 94 | 95 | # other info contains triangles, region used for computing photometric loss, 96 | # region used for skin texture regularization, and 68 landmarks index etc. 97 | other_info = loadmat(osp.join(bfm_folder, "facemodel_info.mat")) 98 | frontmask2_idx = other_info["frontmask2_idx"] 99 | skinmask = other_info["skinmask"] 100 | keypoints = other_info["keypoints"] 101 | point_buf = other_info["point_buf"] 102 | tri = other_info["tri"] 103 | tri_mask2 = other_info["tri_mask2"] 104 | 105 | # save our face model 106 | savemat( 107 | osp.join(bfm_folder, "BFM_model_front.mat"), 108 | { 109 | "meanshape": meanshape, 110 | "meantex": meantex, 111 | "idBase": idBase, 112 | "exBase": exBase, 113 | "texBase": texBase, 114 | "tri": tri, 115 | "point_buf": point_buf, 116 | "tri_mask2": tri_mask2, 117 | "keypoints": keypoints, 118 | "frontmask2_idx": frontmask2_idx, 119 | "skinmask": skinmask, 120 | }, 121 | ) 122 | 123 | 124 | # load landmarks for standard face, which is used for image preprocessing 125 | def load_lm3d(bfm_folder): 126 | Lm3D = loadmat(osp.join(bfm_folder, "similarity_Lm3D_all.mat")) 127 | Lm3D = Lm3D["lm"] 128 | 129 | # calculate 5 facial landmarks using 68 landmarks 130 | lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1 131 | Lm3D = np.stack([Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean(Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :], Lm3D[lm_idx[6], :]], axis=0) 132 | Lm3D = Lm3D[[1, 2, 0, 3, 4], :] 133 | 134 | return Lm3D 135 | -------------------------------------------------------------------------------- /MToV/exps/first_stage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | 6 | from tools.trainer import first_stage_train, first_stage_x_l_train 7 | from tools.dataloader import get_loaders 8 | from models.autoencoder.autoencoder_vit import ViTAutoencoder 9 | from losses.perceptual import LPIPSWithDiscriminator 10 | 11 | from utils import file_name, Logger 12 | import pdb 13 | 14 | # ---------------------------------------------------------------------------- 15 | 16 | _num_moments = 3 # [num_scalars, sum_of_scalars, sum_of_squares] 17 | _reduce_dtype = torch.float32 # Data type to use for initial per-tensor reduction. 18 | _counter_dtype = torch.float64 # Data type to use for the internal counters. 19 | _rank = 0 # Rank of the current process. 20 | _sync_device = None # Device to use for multiprocess communication. None = single-process. 21 | _sync_called = False # Has _sync() been called yet? 22 | _counters = dict() # Running counters on each device, updated by report(): name => device => torch.Tensor 23 | _cumulative = dict() # Cumulative counters on the CPU, updated by _sync(): name => torch.Tensor 24 | 25 | # ---------------------------------------------------------------------------- 26 | 27 | 28 | def init_multiprocessing(rank, sync_device): 29 | r"""Initializes `torch_utils.training_stats` for collecting statistics 30 | across multiple processes. 31 | This function must be called after 32 | `torch.distributed.init_process_group()` and before `Collector.update()`. 33 | The call is not necessary if multi-process collection is not needed. 34 | Args: 35 | rank: Rank of the current process. 36 | sync_device: PyTorch device to use for inter-process 37 | communication, or None to disable multi-process 38 | collection. Typically `torch.device('cuda', rank)`. 39 | """ 40 | global _rank, _sync_device 41 | assert not _sync_called 42 | _rank = rank 43 | _sync_device = sync_device 44 | 45 | 46 | # ---------------------------------------------------------------------------- 47 | 48 | 49 | def first_stage(rank, args): 50 | device = torch.device("cuda", rank) 51 | 52 | temp_dir = "./" 53 | if args.n_gpus > 1: 54 | init_file = os.path.abspath(os.path.join(temp_dir, ".torch_distributed_init")) 55 | if os.name == "nt": 56 | init_method = "file:///" + init_file.replace("\\", "/") 57 | torch.distributed.init_process_group(backend="gloo", init_method=init_method, rank=rank, world_size=args.n_gpus) 58 | else: 59 | init_method = f"file://{init_file}" 60 | torch.distributed.init_process_group(backend="nccl", init_method=init_method, rank=rank, world_size=args.n_gpus) 61 | 62 | # Init torch_utils. 63 | sync_device = torch.device("cuda", rank) if args.n_gpus > 1 else None 64 | init_multiprocessing(rank=rank, sync_device=sync_device) 65 | 66 | """ ROOT DIRECTORY """ 67 | if rank == 0: 68 | fn = file_name(args) 69 | logger = Logger(fn) 70 | logger.log(args) 71 | logger.log(f"Log path: {logger.logdir}") 72 | rootdir = logger.logdir 73 | else: 74 | logger = None 75 | 76 | if logger is None: 77 | log_ = print 78 | else: 79 | log_ = logger.log 80 | 81 | """ Get Image """ 82 | if rank == 0: 83 | log_(f"Loading dataset {args.data} with resolution {args.res}") 84 | train_loader, test_loader, total_vid = get_loaders( 85 | rank, args.data, args.res, args.timesteps, args.skip, args.batch_size, args.n_gpus, args.seed, cond=False 86 | ) 87 | 88 | """ Get Model """ 89 | if rank == 0: 90 | log_(f"Generating model") 91 | 92 | torch.cuda.set_device(rank) 93 | model = ViTAutoencoder(args.embed_dim, args.ddconfig) 94 | model = model.to(device) 95 | criterion = LPIPSWithDiscriminator(disc_start=args.lossconfig.params.disc_start, timesteps=args.ddconfig.timesteps).to(device) 96 | 97 | opt = torch.optim.AdamW(model.parameters(), lr=args.lr, betas=(0.5, 0.9)) 98 | 99 | d_opt = torch.optim.AdamW( 100 | list(criterion.discriminator_2d.parameters()) + list(criterion.discriminator_3d.parameters()), lr=args.lr, betas=(0.5, 0.9) 101 | ) 102 | 103 | if args.resume and rank == 0: 104 | model_ckpt = torch.load(args.first_model) 105 | model.load_state_dict(model_ckpt) 106 | del model_ckpt 107 | 108 | if rank == 0: 109 | torch.save(model.state_dict(), rootdir + f"net_init.pth") 110 | 111 | if args.n_gpus > 1: 112 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False) 113 | criterion = torch.nn.parallel.DistributedDataParallel(criterion, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False) 114 | 115 | fp = args.amp 116 | if args.typetype == "x": 117 | print("x") 118 | first_stage_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger) 119 | else: 120 | print("ldmk") 121 | first_stage_x_l_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger) 122 | 123 | if rank == 0: 124 | torch.save(model.state_dict(), rootdir + f"net_meta.pth") 125 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/util/mesh_renderer.py: -------------------------------------------------------------------------------- 1 | """This script is the differentiable renderer for Deep3DFaceRecon_pytorch 2 | Attention, antialiasing step is missing in current version. 3 | """ 4 | # sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 5 | # print(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) 6 | # sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) 7 | import torch 8 | import pytorch3d.ops 9 | import torch 10 | import torch.nn.functional as F 11 | import kornia 12 | from kornia.geometry.camera import pixel2cam 13 | import numpy as np 14 | from typing import List 15 | from scipy.io import loadmat 16 | from torch import nn 17 | 18 | from pytorch3d.structures import Meshes 19 | from pytorch3d.renderer import ( 20 | look_at_view_transform, 21 | FoVPerspectiveCameras, 22 | DirectionalLights, 23 | RasterizationSettings, 24 | MeshRenderer, 25 | MeshRasterizer, 26 | SoftPhongShader, 27 | TexturesUV, 28 | ) 29 | 30 | # def ndc_projection(x=0.1, n=1.0, f=50.0): 31 | # return np.array([[n/x, 0, 0, 0], 32 | # [ 0, n/-x, 0, 0], 33 | # [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 34 | # [ 0, 0, -1, 0]]).astype(np.float32) 35 | 36 | class MeshRenderer(nn.Module): 37 | def __init__(self, 38 | rasterize_fov, 39 | znear=0.1, 40 | zfar=10, 41 | rasterize_size=224,**args): 42 | super(MeshRenderer, self).__init__() 43 | 44 | # x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear 45 | # self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul( 46 | # torch.diag(torch.tensor([1., -1, -1, 1]))) 47 | self.rasterize_size = rasterize_size 48 | self.fov = rasterize_fov 49 | self.znear = znear 50 | self.zfar = zfar 51 | 52 | self.rasterizer = None 53 | 54 | def forward(self, vertex, tri, feat=None): 55 | """ 56 | Return: 57 | mask -- torch.tensor, size (B, 1, H, W) 58 | depth -- torch.tensor, size (B, 1, H, W) 59 | features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None 60 | 61 | Parameters: 62 | vertex -- torch.tensor, size (B, N, 3) 63 | tri -- torch.tensor, size (B, M, 3) or (M, 3), triangles 64 | feat(optional) -- torch.tensor, size (B, N ,C), features 65 | """ 66 | device = vertex.device 67 | rsize = int(self.rasterize_size) 68 | # ndc_proj = self.ndc_proj.to(device) 69 | # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v 70 | if vertex.shape[-1] == 3: 71 | vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1) 72 | vertex[..., 0] = -vertex[..., 0] 73 | 74 | 75 | # vertex_ndc = vertex @ ndc_proj.t() 76 | if self.rasterizer is None: 77 | self.rasterizer = MeshRasterizer() 78 | print("create rasterizer on device cuda:%d"%device.index) 79 | 80 | # ranges = None 81 | # if isinstance(tri, List) or len(tri.shape) == 3: 82 | # vum = vertex_ndc.shape[1] 83 | # fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device) 84 | # fstartidx = torch.cumsum(fnum, dim=0) - fnum 85 | # ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu() 86 | # for i in range(tri.shape[0]): 87 | # tri[i] = tri[i] + i*vum 88 | # vertex_ndc = torch.cat(vertex_ndc, dim=0) 89 | # tri = torch.cat(tri, dim=0) 90 | 91 | # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3] 92 | tri = tri.type(torch.int32).contiguous() 93 | 94 | # rasterize 95 | cameras = FoVPerspectiveCameras( 96 | device=device, 97 | fov=self.fov, 98 | znear=self.znear, 99 | zfar=self.zfar, 100 | ) 101 | 102 | raster_settings = RasterizationSettings( 103 | image_size=rsize 104 | ) 105 | 106 | # print(vertex.shape, tri.shape) 107 | mesh = Meshes(vertex.contiguous()[...,:3], tri.unsqueeze(0)) 108 | 109 | fragments = self.rasterizer(mesh, cameras = cameras, raster_settings = raster_settings) 110 | rast_out = fragments.pix_to_face.squeeze(-1) 111 | depth = fragments.zbuf 112 | 113 | # render depth 114 | depth = depth.permute(0, 3, 1, 2) 115 | mask = (rast_out > 0).float().unsqueeze(1) 116 | depth = mask * depth 117 | 118 | 119 | image = None 120 | if feat is not None: 121 | attributes = feat.reshape(-1,3)[mesh.faces_packed()] 122 | image = pytorch3d.ops.interpolate_face_attributes(fragments.pix_to_face, 123 | fragments.bary_coords, 124 | attributes) 125 | # print(image.shape) 126 | image = image.squeeze(-2).permute(0, 3, 1, 2) 127 | image = mask * image 128 | 129 | return mask, depth, image 130 | 131 | -------------------------------------------------------------------------------- /MToV/exps/first_stage_ldmk.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | 6 | from tools.trainer import first_stage_train, first_stage_x_l_train 7 | from tools.dataloader import get_loaders 8 | from models.autoencoder.autoencoder_vit import ViTAutoencoder 9 | from losses.perceptual import LPIPSWithDiscriminator 10 | 11 | from utils import file_name, Logger 12 | import pdb 13 | 14 | # ---------------------------------------------------------------------------- 15 | 16 | _num_moments = 3 # [num_scalars, sum_of_scalars, sum_of_squares] 17 | _reduce_dtype = torch.float32 # Data type to use for initial per-tensor reduction. 18 | _counter_dtype = torch.float64 # Data type to use for the internal counters. 19 | _rank = 0 # Rank of the current process. 20 | _sync_device = None # Device to use for multiprocess communication. None = single-process. 21 | _sync_called = False # Has _sync() been called yet? 22 | _counters = dict() # Running counters on each device, updated by report(): name => device => torch.Tensor 23 | _cumulative = dict() # Cumulative counters on the CPU, updated by _sync(): name => torch.Tensor 24 | 25 | # ---------------------------------------------------------------------------- 26 | 27 | 28 | def init_multiprocessing(rank, sync_device): 29 | r"""Initializes `torch_utils.training_stats` for collecting statistics 30 | across multiple processes. 31 | This function must be called after 32 | `torch.distributed.init_process_group()` and before `Collector.update()`. 33 | The call is not necessary if multi-process collection is not needed. 34 | Args: 35 | rank: Rank of the current process. 36 | sync_device: PyTorch device to use for inter-process 37 | communication, or None to disable multi-process 38 | collection. Typically `torch.device('cuda', rank)`. 39 | """ 40 | global _rank, _sync_device 41 | assert not _sync_called 42 | _rank = rank 43 | _sync_device = sync_device 44 | 45 | 46 | # ---------------------------------------------------------------------------- 47 | 48 | 49 | def first_stage_ldmk(rank, args): 50 | device = torch.device("cuda", rank) 51 | 52 | temp_dir = "./" 53 | if args.n_gpus > 1: 54 | init_file = os.path.abspath(os.path.join(temp_dir, ".torch_distributed_init")) 55 | if os.name == "nt": 56 | init_method = "file:///" + init_file.replace("\\", "/") 57 | torch.distributed.init_process_group(backend="gloo", init_method=init_method, rank=rank, world_size=args.n_gpus) 58 | else: 59 | init_method = f"file://{init_file}" 60 | torch.distributed.init_process_group(backend="nccl", init_method=init_method, rank=rank, world_size=args.n_gpus) 61 | 62 | # Init torch_utils. 63 | sync_device = torch.device("cuda", rank) if args.n_gpus > 1 else None 64 | init_multiprocessing(rank=rank, sync_device=sync_device) 65 | 66 | """ ROOT DIRECTORY """ 67 | if rank == 0: 68 | fn = file_name(args) 69 | logger = Logger(fn, logdir=args.log_dir) 70 | logger.log(args) 71 | logger.log(f"Log path: {logger.logdir}") 72 | rootdir = logger.logdir 73 | else: 74 | logger = None 75 | 76 | if logger is None: 77 | log_ = print 78 | else: 79 | log_ = logger.log 80 | 81 | """ Get Image """ 82 | if rank == 0: 83 | log_(f"Loading dataset {args.data} with resolution {args.res}") 84 | 85 | contain_contour = args.typetype != "ldmk_wo_contour" 86 | train_loader, test_loader, total_vid = get_loaders( 87 | rank, 88 | args.data, 89 | args.res, 90 | args.timesteps, 91 | args.skip, 92 | args.batch_size, 93 | args.n_gpus, 94 | args.seed, 95 | cond=False, 96 | ) 97 | 98 | """ Get Model """ 99 | if rank == 0: 100 | log_(f"Generating model") 101 | 102 | torch.cuda.set_device(rank) 103 | model = ViTAutoencoder(args.embed_dim, args.ddconfig).to(device) 104 | criterion = LPIPSWithDiscriminator(disc_start=args.lossconfig.params.disc_start, timesteps=args.ddconfig.timesteps).to(device) 105 | 106 | opt = torch.optim.AdamW(model.parameters(), lr=args.lr, betas=(0.5, 0.9)) 107 | d_opt = torch.optim.AdamW( 108 | list(criterion.discriminator_2d.parameters()) + list(criterion.discriminator_3d.parameters()), lr=args.lr, betas=(0.5, 0.9) 109 | ) 110 | 111 | if args.resume and rank == 0: 112 | model_ckpt = torch.load(args.first_model) 113 | model.load_state_dict(model_ckpt) 114 | del model_ckpt 115 | 116 | if rank == 0: 117 | torch.save(model.state_dict(), rootdir + f"net_init.pth") 118 | 119 | if args.n_gpus > 1: 120 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False) 121 | criterion = torch.nn.parallel.DistributedDataParallel(criterion, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False) 122 | 123 | fp = args.amp 124 | if args.typetype == "x": 125 | print("x") 126 | first_stage_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger) 127 | else: 128 | print("x_l") 129 | first_stage_x_l_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger) 130 | 131 | if rank == 0: 132 | torch.save(model.state_dict(), rootdir + f"net_meta.pth") 133 | -------------------------------------------------------------------------------- /data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/speed_benchmark.md: -------------------------------------------------------------------------------- 1 | ## Test Training Speed 2 | 3 | - Test Commands 4 | 5 | You need to use the following two commands to test the Partial FC training performance. 6 | The number of identites is **3 millions** (synthetic data), turn mixed precision training on, backbone is resnet50, 7 | batch size is 1024. 8 | ```shell 9 | # Model Parallel 10 | python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions 11 | # Partial FC 0.1 12 | python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions_pfc 13 | ``` 14 | 15 | - GPU Memory 16 | 17 | ``` 18 | # (Model Parallel) gpustat -i 19 | [0] Tesla V100-SXM2-32GB | 64'C, 94 % | 30338 / 32510 MB 20 | [1] Tesla V100-SXM2-32GB | 60'C, 99 % | 28876 / 32510 MB 21 | [2] Tesla V100-SXM2-32GB | 60'C, 99 % | 28872 / 32510 MB 22 | [3] Tesla V100-SXM2-32GB | 69'C, 99 % | 28872 / 32510 MB 23 | [4] Tesla V100-SXM2-32GB | 66'C, 99 % | 28888 / 32510 MB 24 | [5] Tesla V100-SXM2-32GB | 60'C, 99 % | 28932 / 32510 MB 25 | [6] Tesla V100-SXM2-32GB | 68'C, 100 % | 28916 / 32510 MB 26 | [7] Tesla V100-SXM2-32GB | 65'C, 99 % | 28860 / 32510 MB 27 | 28 | # (Partial FC 0.1) gpustat -i 29 | [0] Tesla V100-SXM2-32GB | 60'C, 95 % | 10488 / 32510 MB │······················· 30 | [1] Tesla V100-SXM2-32GB | 60'C, 97 % | 10344 / 32510 MB │······················· 31 | [2] Tesla V100-SXM2-32GB | 61'C, 95 % | 10340 / 32510 MB │······················· 32 | [3] Tesla V100-SXM2-32GB | 66'C, 95 % | 10340 / 32510 MB │······················· 33 | [4] Tesla V100-SXM2-32GB | 65'C, 94 % | 10356 / 32510 MB │······················· 34 | [5] Tesla V100-SXM2-32GB | 61'C, 95 % | 10400 / 32510 MB │······················· 35 | [6] Tesla V100-SXM2-32GB | 68'C, 96 % | 10384 / 32510 MB │······················· 36 | [7] Tesla V100-SXM2-32GB | 64'C, 95 % | 10328 / 32510 MB │······················· 37 | ``` 38 | 39 | - Training Speed 40 | 41 | ```python 42 | # (Model Parallel) trainging.log 43 | Training: Speed 2271.33 samples/sec Loss 1.1624 LearningRate 0.2000 Epoch: 0 Global Step: 100 44 | Training: Speed 2269.94 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 150 45 | Training: Speed 2272.67 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 200 46 | Training: Speed 2266.55 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 250 47 | Training: Speed 2272.54 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 300 48 | 49 | # (Partial FC 0.1) trainging.log 50 | Training: Speed 5299.56 samples/sec Loss 1.0965 LearningRate 0.2000 Epoch: 0 Global Step: 100 51 | Training: Speed 5296.37 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 150 52 | Training: Speed 5304.37 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 200 53 | Training: Speed 5274.43 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 250 54 | Training: Speed 5300.10 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 300 55 | ``` 56 | 57 | In this test case, Partial FC 0.1 only use1 1/3 of the GPU memory of the model parallel, 58 | and the training speed is 2.5 times faster than the model parallel. 59 | 60 | 61 | ## Speed Benchmark 62 | 63 | 1. Training speed of different parallel methods (samples/second), Tesla V100 32GB * 8. (Larger is better) 64 | 65 | | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | 66 | | :--- | :--- | :--- | :--- | 67 | |125000 | 4681 | 4824 | 5004 | 68 | |250000 | 4047 | 4521 | 4976 | 69 | |500000 | 3087 | 4013 | 4900 | 70 | |1000000 | 2090 | 3449 | 4803 | 71 | |1400000 | 1672 | 3043 | 4738 | 72 | |2000000 | - | 2593 | 4626 | 73 | |4000000 | - | 1748 | 4208 | 74 | |5500000 | - | 1389 | 3975 | 75 | |8000000 | - | - | 3565 | 76 | |16000000 | - | - | 2679 | 77 | |29000000 | - | - | 1855 | 78 | 79 | 2. GPU memory cost of different parallel methods (GB per GPU), Tesla V100 32GB * 8. (Smaller is better) 80 | 81 | | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | 82 | | :--- | :--- | :--- | :--- | 83 | |125000 | 7358 | 5306 | 4868 | 84 | |250000 | 9940 | 5826 | 5004 | 85 | |500000 | 14220 | 7114 | 5202 | 86 | |1000000 | 23708 | 9966 | 5620 | 87 | |1400000 | 32252 | 11178 | 6056 | 88 | |2000000 | - | 13978 | 6472 | 89 | |4000000 | - | 23238 | 8284 | 90 | |5500000 | - | 32188 | 9854 | 91 | |8000000 | - | - | 12310 | 92 | |16000000 | - | - | 19950 | 93 | |29000000 | - | - | 32324 | 94 | -------------------------------------------------------------------------------- /data/data_utils/preprocess/process_audio.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from transformers import Wav2Vec2Processor, HubertModel 3 | import soundfile as sf 4 | import numpy as np 5 | import torch 6 | import pdb 7 | import glob, os, tqdm 8 | 9 | @torch.no_grad() 10 | def get_hubert_from_speech(args, speech): 11 | device = args.device 12 | print(torch.cuda.is_available()) 13 | print("Loading the Wav2Vec2 Processor...") 14 | wav2vec2_processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft") 15 | print("Loading the HuBERT Model...") 16 | hubert_model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft") 17 | 18 | # global hubert_model 19 | hubert_model = hubert_model.to(device) 20 | if speech.ndim == 2: 21 | speech = speech[:, 0] # [T, 2] ==> [T,] 22 | input_values_all = wav2vec2_processor(speech, return_tensors="pt", sampling_rate=16000).input_values # [1, T] 23 | input_values_all = input_values_all.to(device) 24 | kernel = 400 25 | stride = 320 26 | clip_length = stride * 1000 27 | num_iter = input_values_all.shape[1] // clip_length 28 | expected_T = (input_values_all.shape[1] - (kernel - stride)) // stride 29 | res_lst = [] 30 | for i in range(num_iter): 31 | if i == 0: 32 | start_idx = 0 33 | end_idx = clip_length - stride + kernel 34 | else: 35 | start_idx = clip_length * i 36 | end_idx = start_idx + (clip_length - stride + kernel) 37 | input_values = input_values_all[:, start_idx:end_idx] 38 | hidden_states = hubert_model.forward(input_values).last_hidden_state # [B=1, T=pts//320, hid=1024] 39 | res_lst.append(hidden_states[0]) 40 | if num_iter > 0: 41 | input_values = input_values_all[:, clip_length * num_iter :] 42 | else: 43 | input_values = input_values_all 44 | # if input_values.shape[1] != 0: 45 | if input_values.shape[1] >= kernel: # if the last batch is shorter than kernel_size, skip it 46 | hidden_states = hubert_model(input_values).last_hidden_state # [B=1, T=pts//320, hid=1024] 47 | res_lst.append(hidden_states[0]) 48 | ret = torch.cat(res_lst, dim=0).cpu() # [T, 1024] 49 | # assert ret.shape[0] == expected_T 50 | assert abs(ret.shape[0] - expected_T) <= 1 51 | if ret.shape[0] < expected_T: 52 | ret = torch.nn.functional.pad(ret, (0, 0, 0, expected_T - ret.shape[0])) 53 | else: 54 | ret = ret[:expected_T] 55 | return ret 56 | 57 | def convert_wav_sampling_rate(args): 58 | save_root = args.save_sample_dir 59 | source_wav_name = args.audio.split("/")[-1].split(".")[0] 60 | supported_types = (".wav", ".mp3", ".mp4", ".avi") 61 | os.makedirs(os.path.join(save_root, str(args.sampling_rate)), exist_ok = True) 62 | new_wav_name = os.path.join(save_root, str(args.sampling_rate), f"{source_wav_name}") 63 | command = f"ffmpeg -i {args.audio} -f wav -ar {args.sampling_rate} {new_wav_name}.wav -y" 64 | os.system(command) 65 | 66 | def load_idlist(path): 67 | with open(path, "r") as f: 68 | lines = f.readlines() 69 | id_list = [line.replace("\n", "").replace(".mp4", "").strip() for line in lines] 70 | return id_list 71 | 72 | if __name__ == "__main__": 73 | args = argparse.ArgumentParser(description="audio sampling match") 74 | ### for sampling audio 75 | args.add_argument("--audio", type=str, 76 | default="../inference/audio/LetItGo1.wav", help="path to the audio") 77 | args.add_argument("--save_sample_dir", type=str, 78 | default="../inference/sampled_audio", help="save path to the directory of sampled_audio") 79 | args.add_argument("--ref_dir", type=str, 80 | default="../inference/ref/25fps", help="path to the directory of reference images") 81 | args.add_argument("--ref_id_list", type=str, 82 | default=None, 83 | help="if ref_id_list is None, then the whole id in the ref_dir will be included") 84 | args.add_argument("--sampling_rate", type=int, 85 | default=16000) 86 | args.add_argument("--device", type=str, 87 | default="cuda:5") 88 | 89 | ### for extracting hubert 90 | args.add_argument("--wav2vec_proc", type=str, 91 | default="facebook/hubert-large-ls960-ft", 92 | help="the pretrained wav2vec2 processor") 93 | args.add_argument("--hubert_model", type=str, 94 | default="facebook/hubert-large-ls960-ft", 95 | help="the pretrained hubert model") 96 | args.add_argument("--save_hubert_dir", type=str, 97 | default="../inference/hubert", help="save path to the directory of converted hubert") 98 | 99 | args = args.parse_args() 100 | 101 | # load id list 102 | if args.ref_id_list is None: 103 | ref_list = os.listdir(args.ref_dir) 104 | else : 105 | ref_list = load_idlist(args.ref_id_list) 106 | 107 | # convert sampling rate 108 | convert_wav_sampling_rate(args) 109 | 110 | # extract hubert 111 | # confirm the sampled audio 112 | audioname = args.audio.split("/")[-1].split(".")[0] # LetItGo 113 | 114 | sampled_audio = os.path.join(args.save_sample_dir, str(args.sampling_rate), f"{audioname}.wav") 115 | # AToM/data/sampled_audio/19200/LetItGo.wav 116 | if not os.path.exists(sampled_audio): 117 | pass # RunTimeError 118 | hubert_dir = os.path.join(args.save_hubert_dir, str(args.sampling_rate)) 119 | os.makedirs(hubert_dir, exist_ok=True) 120 | hubert_name = os.path.join(hubert_dir, f"{audioname}.npy") 121 | speech_, _ = sf.read(sampled_audio) 122 | hubert_ = get_hubert_from_speech(args, speech_) 123 | np.save(hubert_name, hubert_.detach().numpy()) 124 | print("Finished preprocessing audio.\n") -------------------------------------------------------------------------------- /MToV/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.extend(["."]) 4 | 5 | import os 6 | import argparse 7 | 8 | import torch 9 | from omegaconf import OmegaConf 10 | 11 | from exps.diffusion import diffusion 12 | from exps.first_stage import first_stage 13 | from exps.first_stage_ldmk import first_stage_ldmk 14 | 15 | import pdb 16 | from utils import set_random_seed 17 | 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--exp", type=str, required=True, help="experiment name to run") 21 | parser.add_argument("--seed", type=int, default=42, help="random seed") 22 | parser.add_argument("--id", type=str, default="main", help="experiment identifier") 23 | parser.add_argument("--log_dir", type=str, default=None) 24 | 25 | """ Args about Data """ 26 | parser.add_argument("--data", type=str, default="UCF101") 27 | parser.add_argument("--batch_size", type=int, default=24) 28 | parser.add_argument("--timesteps", type=int, default=4) 29 | parser.add_argument("--ds", type=int, default=4) 30 | parser.add_argument("--typetype", type=str, default="x") 31 | 32 | """ Args about Model """ 33 | parser.add_argument("--pretrain_config", type=str, default="configs/autoencoder/autoencoder_kl_f4d6_res128.yaml") 34 | parser.add_argument("--diffusion_config", type=str, default="configs/latent-diffusion/ucf101-ldm-kl-3_res128.yaml") 35 | parser.add_argument("--train_id_txt", type=str, default=None) 36 | 37 | # for GAN resume 38 | parser.add_argument( 39 | "--first_stage_folder", 40 | type=str, 41 | default="", 42 | help="the folder of first stage experiment before GAN", 43 | ) 44 | 45 | # for diffusion model path specification 46 | parser.add_argument("--first_model", type=str, default="", help="the path of pretrained model") 47 | parser.add_argument("--first_model_ldmk", type=str, default="", help="the path of pretrained model") 48 | parser.add_argument("--second_model", type=str, default="", help="the path of pretrained model") 49 | parser.add_argument("--scale_lr", action="store_true") 50 | 51 | 52 | def main(): 53 | """Additional args ends here.""" 54 | args = parser.parse_args() 55 | """ FIX THE RANDOMNESS """ 56 | set_random_seed(args.seed) 57 | torch.backends.cudnn.deterministic = True 58 | torch.backends.cudnn.benchmark = False 59 | 60 | args.n_gpus = torch.cuda.device_count() 61 | 62 | # init and save configs 63 | 64 | """ RUN THE EXP """ 65 | if args.exp == "ddpm": 66 | config = OmegaConf.load(args.diffusion_config) 67 | first_stage_config = OmegaConf.load(args.pretrain_config) 68 | 69 | args.unetconfig = config.model.params.unet_config 70 | args.lr = config.model.base_learning_rate 71 | args.scheduler = config.model.params.scheduler_config 72 | args.res = first_stage_config.model.params.ddconfig.resolution 73 | args.timesteps = first_stage_config.model.params.ddconfig.timesteps 74 | args.skip = first_stage_config.model.params.ddconfig.skip 75 | args.ddconfig = first_stage_config.model.params.ddconfig 76 | args.embed_dim = first_stage_config.model.params.embed_dim 77 | args.ddpmconfig = config.model.params 78 | args.cond_model = config.model.cond_model 79 | 80 | if args.n_gpus == 1: 81 | diffusion(rank=0, args=args) 82 | else: 83 | torch.multiprocessing.spawn(fn=diffusion, args=(args,), nprocs=args.n_gpus) 84 | 85 | elif args.exp == "first_stage": 86 | config = OmegaConf.load(args.pretrain_config) 87 | args.ddconfig = config.model.params.ddconfig 88 | args.embed_dim = config.model.params.embed_dim 89 | args.lossconfig = config.model.params.lossconfig 90 | args.lr = config.model.base_learning_rate 91 | args.res = config.model.params.ddconfig.resolution 92 | args.timesteps = config.model.params.ddconfig.timesteps 93 | args.skip = config.model.params.ddconfig.skip 94 | args.resume = config.model.resume 95 | args.amp = config.model.amp 96 | if args.n_gpus == 1: 97 | first_stage(rank=0, args=args) 98 | else: 99 | torch.multiprocessing.spawn(fn=first_stage, args=(args,), nprocs=args.n_gpus) 100 | 101 | elif args.exp == "first_stage_ldmk": 102 | config = OmegaConf.load(args.pretrain_config) 103 | args.ddconfig = config.model.params.ddconfig 104 | args.embed_dim = config.model.params.embed_dim 105 | args.lossconfig = config.model.params.lossconfig 106 | args.lr = config.model.base_learning_rate 107 | args.res = config.model.params.ddconfig.resolution 108 | args.timesteps = config.model.params.ddconfig.timesteps 109 | args.skip = config.model.params.ddconfig.skip 110 | args.resume = True 111 | args.amp = config.model.amp 112 | if args.n_gpus == 1: 113 | first_stage_ldmk(rank=0, args=args) 114 | else: 115 | torch.multiprocessing.spawn(fn=first_stage, args=(args,), nprocs=args.n_gpus) 116 | 117 | elif args.exp == "visu_first_stage": 118 | config = OmegaConf.load(args.pretrain_config) 119 | args.ddconfig = config.model.params.ddconfig 120 | args.embed_dim = config.model.params.embed_dim 121 | args.lossconfig = config.model.params.lossconfig 122 | args.lr = config.model.base_learning_rate 123 | args.res = config.model.params.ddconfig.resolution 124 | args.timesteps = config.model.params.ddconfig.timesteps 125 | args.skip = config.model.params.ddconfig.skip 126 | args.resume = config.model.resume 127 | args.amp = config.model.amp 128 | if args.n_gpus == 1: 129 | visu_first_stage(rank=0, args=args) 130 | else: 131 | torch.multiprocessing.spawn(fn=first_stage, args=(args,), nprocs=args.n_gpus) 132 | 133 | else: 134 | raise ValueError("Unknown experiment.") 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | --------------------------------------------------------------------------------