├── MToV
    ├── evals
    │   ├── __init__.py
    │   └── fvd
    │   │   ├── __init__.py
    │   │   ├── download.py
    │   │   ├── convert_tf_pretrained.py
    │   │   └── fvd.py
    ├── exps
    │   ├── __init__.py
    │   ├── first_stage.py
    │   └── first_stage_ldmk.py
    ├── models
    │   ├── __init__.py
    │   ├── ddpm
    │   │   └── __init__.py
    │   ├── autoencoder
    │   │   └── __init__.py
    │   └── ema.py
    ├── tools
    │   ├── __init__.py
    │   └── scheduler.py
    ├── text_folders
    │   ├── sample_cross_audio_hdtf.txt
    │   ├── sample_cross_id_hdtf.txt
    │   └── train_id.txt
    ├── .gitignore
    ├── scripts
    │   ├── train
    │   │   ├── first_stg.sh
    │   │   ├── first_stg_ldmk.sh
    │   │   └── second_stg.sh
    │   └── inference
    │   │   ├── sample_crossID.sh
    │   │   └── sample.sh
    ├── configs
    │   ├── autoencoder
    │   │   ├── base.yaml
    │   │   ├── base_gan.yaml
    │   │   └── base_ldmk.yaml
    │   └── latent-diffusion
    │   │   ├── base.yaml
    │   │   └── base_longvid.yaml
    ├── losses
    │   └── diffaugment.py
    └── main.py
├── AToM
    ├── dataset
    │   ├── __init__.py
    │   └── preprocess.py
    ├── .gitignore
    ├── train.py
    ├── scripts
    │   ├── train.sh
    │   └── inference.sh
    ├── data_util
    │   ├── euler2quaterion.py
    │   └── tensor_utils.py
    ├── args.py
    └── model
    │   ├── utils.py
    │   ├── adan.py
    │   └── rotary_embedding_torch.py
└── data
    ├── data_utils
        ├── preprocess
        │   ├── __init__.py
        │   ├── unify_fps.py
        │   ├── video2frame_hdtf.py
        │   └── process_audio.py
        ├── deep_3drecon
        │   ├── __init__.py
        │   ├── deep_3drecon_models
        │   │   ├── arcface_torch
        │   │   │   ├── configs
        │   │   │   │   ├── __init__.py
        │   │   │   │   ├── 3millions.py
        │   │   │   │   ├── wf42m_pfc02_16gpus_mbf_bs8k.py
        │   │   │   │   ├── ms1mv2_mbf.py
        │   │   │   │   ├── ms1mv2_r50.py
        │   │   │   │   ├── wf4m_mbf.py
        │   │   │   │   ├── wf4m_r100.py
        │   │   │   │   ├── wf4m_r50.py
        │   │   │   │   ├── glint360k_mbf.py
        │   │   │   │   ├── glint360k_r100.py
        │   │   │   │   ├── glint360k_r50.py
        │   │   │   │   ├── ms1mv2_r100.py
        │   │   │   │   ├── ms1mv3_mbf.py
        │   │   │   │   ├── ms1mv3_r100.py
        │   │   │   │   ├── ms1mv3_r50.py
        │   │   │   │   ├── wf42m_pfc02_r100.py
        │   │   │   │   ├── ms1mv3_r50_onegpu.py
        │   │   │   │   ├── wf42m_pfc02_16gpus_r100.py
        │   │   │   │   ├── wf42m_pfc02_16gpus_r50_bs8k.py
        │   │   │   │   ├── wf42m_pfc02_32gpus_r50_bs4k.py
        │   │   │   │   ├── wf42m_pfc02_8gpus_r50_bs4k.py
        │   │   │   │   ├── wf12m_mbf.py
        │   │   │   │   ├── wf12m_r50.py
        │   │   │   │   ├── wf12m_r100.py
        │   │   │   │   ├── wf42m_pfc03_32gpu_r18.py
        │   │   │   │   ├── wf42m_pfc03_32gpu_r50.py
        │   │   │   │   ├── wf12m_pfc02_r100.py
        │   │   │   │   ├── wf42m_pfc0008_32gpu_r100.py
        │   │   │   │   ├── wf42m_pfc02_r100_16gpus.py
        │   │   │   │   ├── wf42m_pfc02_r100_32gpus.py
        │   │   │   │   ├── wf42m_pfc03_32gpu_r100.py
        │   │   │   │   ├── wf42m_pfc03_32gpu_r200.py
        │   │   │   │   ├── wf42m_pfc03_40epoch_64gpu_vit_b.py
        │   │   │   │   ├── wf42m_pfc03_40epoch_64gpu_vit_l.py
        │   │   │   │   ├── wf42m_pfc03_40epoch_64gpu_vit_s.py
        │   │   │   │   ├── wf42m_pfc03_40epoch_64gpu_vit_t.py
        │   │   │   │   ├── wf42m_pfc03_40epoch_8gpu_vit_t.py
        │   │   │   │   ├── wf12m_flip_r50.py
        │   │   │   │   ├── wf12m_conflict_r50.py
        │   │   │   │   ├── wf12m_flip_pfc01_filter04_r50.py
        │   │   │   │   ├── wf12m_conflict_r50_pfc03_filter04.py
        │   │   │   │   ├── wf42m_pfc03_40epoch_8gpu_vit_b.py
        │   │   │   │   └── base.py
        │   │   │   ├── docs
        │   │   │   │   ├── modelzoo.md
        │   │   │   │   ├── install.md
        │   │   │   │   ├── eval.md
        │   │   │   │   ├── prepare_custom_dataset.md
        │   │   │   │   ├── prepare_webface42m.md
        │   │   │   │   ├── install_dali.md
        │   │   │   │   └── speed_benchmark.md
        │   │   │   ├── eval
        │   │   │   │   └── __init__.py
        │   │   │   ├── utils
        │   │   │   │   ├── __init__.py
        │   │   │   │   ├── utils_config.py
        │   │   │   │   ├── utils_logging.py
        │   │   │   │   ├── plot.py
        │   │   │   │   └── utils_distributed_sampler.py
        │   │   │   ├── requirement.txt
        │   │   │   ├── run.sh
        │   │   │   ├── dist.sh
        │   │   │   ├── flops.py
        │   │   │   ├── inference.py
        │   │   │   ├── lr_scheduler.py
        │   │   │   ├── torch2onnx.py
        │   │   │   ├── scripts
        │   │   │   │   └── shuffle_rec.py
        │   │   │   ├── losses.py
        │   │   │   └── backbones
        │   │   │   │   └── __init__.py
        │   │   ├── __init__.py
        │   │   └── losses.py
        │   ├── util
        │   │   ├── __init__.py
        │   │   ├── load_mats.py
        │   │   └── mesh_renderer.py
        │   ├── options
        │   │   └── __init__.py
        │   └── data
        │   │   ├── image_folder.py
        │   │   ├── template_dataset.py
        │   │   ├── flist_dataset.py
        │   │   ├── __init__.py
        │   │   └── base_dataset.py
        └── commons
        │   ├── euler2rot.py
        │   └── tensor_utils.py
    ├── .gitignore
    └── README.md


/MToV/evals/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/exps/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/AToM/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/evals/fvd/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/models/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/models/autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/data_utils/preprocess/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MToV/text_folders/sample_cross_audio_hdtf.txt:
--------------------------------------------------------------------------------
1 | RD_Radio25_000


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/__init__.py:
--------------------------------------------------------------------------------
1 | from .reconstructor import *
2 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/modelzoo.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/requirement.txt:
--------------------------------------------------------------------------------
1 | tensorboard
2 | easydict
3 | mxnet
4 | onnx
5 | sklearn
6 | opencv-python


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/run.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 train_v2.py $@
2 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/util/__init__.py:
--------------------------------------------------------------------------------
1 | """This package includes a miscellaneous collection of useful helper functions."""
2 | from .util import *
3 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | results/
 3 | *.gif
 4 | *.mp4
 5 | *.pkl
 6 | *.pt
 7 | *.pth
 8 | *.npy
 9 | *.zip
10 | BFM/
11 | BFM copy/
12 | inference/
13 | train/


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/options/__init__.py:
--------------------------------------------------------------------------------
1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
2 | 


--------------------------------------------------------------------------------
/MToV/text_folders/sample_cross_id_hdtf.txt:
--------------------------------------------------------------------------------
1 | RD_Radio29_000
2 | WRA_DavidVitter_000
3 | WRA_JoePitts_000
4 | WDA_AmyKlobuchar1_002
5 | RD_Radio50_000
6 | WDA_JackReed0_000
7 | WDA_TerriSewell0_000


--------------------------------------------------------------------------------
/MToV/.gitignore:
--------------------------------------------------------------------------------
 1 | losses/vgg.pth
 2 | aux_models/
 3 | demo/
 4 | data/
 5 | __pycache__/
 6 | results/
 7 | runs/
 8 | *.pth
 9 | *.pt
10 | *.jpg
11 | *.png
12 | *.pkl
13 | *.npy
14 | eval_videos/


--------------------------------------------------------------------------------
/AToM/.gitignore:
--------------------------------------------------------------------------------
 1 | train/
 2 | test/
 3 | edge_aistpp/
 4 | dataset_backups/
 5 | runs/
 6 | wandb/
 7 | renders/
 8 | .ipynb_checkpoints/
 9 | __pycache__/
10 | results/
11 | *.gif
12 | *.mp4
13 | *.pkl
14 | *.pt
15 | *.npy
16 | *.zip
17 | cached_features/
18 | custom_music/
19 | 


--------------------------------------------------------------------------------
/AToM/train.py:
--------------------------------------------------------------------------------
 1 | from args import parse_train_opt
 2 | from AToM import AToM
 3 | 
 4 | 
 5 | def train(opt):
 6 |     model = AToM(opt.feature_type, checkpoint_path = opt.checkpoint)
 7 |     model.train_loop(opt)
 8 | 
 9 | if __name__ == "__main__":
10 |     opt = parse_train_opt()
11 |     train(opt)
12 | 


--------------------------------------------------------------------------------
/MToV/scripts/train/first_stg.sh:
--------------------------------------------------------------------------------
 1 | EXP_NAME=main
 2 | DATASET=HDTF
 3 | BATCH_SIZE=1
 4 | 
 5 | CUDA_VISIBLE_DEVICES=6 python main.py \
 6 | --exp first_stage \
 7 | --id main \
 8 | --log_dir ./runs \
 9 | --timesteps 4 \
10 | --pretrain_config configs/autoencoder/base.yaml \
11 | --data ${DATASET} \
12 | --batch_size ${BATCH_SIZE}
13 | 


--------------------------------------------------------------------------------
/AToM/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | ##############
 2 | BATCH_SIZE=64
 3 | EPOCHS=2000
 4 | FEAT=jukebox 
 5 | SAVE_INTERVAL=1
 6 | DEVICE=6
 7 | ##############
 8 | 
 9 | CUDA_VISIBLE_DEVICES=$DEVICE python train.py \
10 |     --batch_size $BATCH_SIZE \
11 |     --epochs $EPOCHS \
12 |     --feature_type $FEAT \
13 |     --save_interval $SAVE_INTERVAL


--------------------------------------------------------------------------------
/AToM/scripts/inference.sh:
--------------------------------------------------------------------------------
 1 | ###############
 2 | DATA_ROOT=../data/inference/ref/25fps
 3 | HUBERT=../data/inference/hubert/16000/LetItGo1.npy
 4 | SAVE_DIR=results/frontalized1
 5 | CHECKPOINT=../checkpoints/atom.pt
 6 | DEVICE=6
 7 | ###############
 8 | 
 9 | CUDA_VISIBLE_DEVICES=$DEVICE python inference.py \
10 |     --data_root $DATA_ROOT \
11 |     --hubert_path $HUBERT \
12 |     --save_dir $SAVE_DIR \
13 |     --checkpoint $CHECKPOINT


--------------------------------------------------------------------------------
/MToV/configs/autoencoder/base.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   resume: False
 3 |   amp: True
 4 |   base_learning_rate: 1.0e-4
 5 |   params:
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       params:
 9 |         disc_start: 100000000
10 | 
11 |     ddconfig:
12 |       double_z: False
13 |       channels: 384
14 |       resolution: 256
15 |       timesteps: 16
16 |       skip: 1
17 |       in_channels: 3
18 |       out_ch: 3
19 |       num_res_blocks: 2
20 |       attn_resolutions: []
21 |       splits: 1
22 | 


--------------------------------------------------------------------------------
/MToV/configs/autoencoder/base_gan.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   resume: True
 3 |   amp: True
 4 |   base_learning_rate: 1.0e-4
 5 |   params:
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       params:
 9 |         disc_start: -1
10 | 
11 |     ddconfig:
12 |       double_z: False
13 |       channels: 384
14 |       resolution: 256
15 |       timesteps: 16
16 |       skip: 1
17 |       in_channels: 3
18 |       out_ch: 3
19 |       num_res_blocks: 2
20 |       attn_resolutions: []
21 |       splits: 1
22 | 


--------------------------------------------------------------------------------
/MToV/scripts/train/first_stg_ldmk.sh:
--------------------------------------------------------------------------------
 1 | EXP_NAME=main
 2 | DATASET=HDTF
 3 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth
 4 | BATCH_SIZE=1
 5 | 
 6 | CUDA_VISIBLE_DEVICES=0 python main.py \
 7 | --exp first_stage_ldmk \
 8 | --log_dir ./runs \
 9 | --id encoder_decoder_frz \
10 | --typetype 'ldmk' \
11 | --timesteps 16 \
12 | --pretrain_config configs/autoencoder/base.yaml \
13 | --data ${DATASET} \
14 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \
15 | --batch_size ${BATCH_SIZE}
16 | 


--------------------------------------------------------------------------------
/MToV/configs/autoencoder/base_ldmk.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   resume: False
 3 |   amp: True
 4 |   base_learning_rate: 1.0e-4
 5 |   params:
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       params:
 9 |         disc_start: 100000000
10 | 
11 |     ddconfig:
12 |       double_z: False
13 |       channels: 384
14 |       resolution: 256
15 |       timesteps: 16
16 |       skip: 1
17 |       in_channels: 3
18 |       out_ch: 3
19 |       num_res_blocks: 2
20 |       attn_resolutions: []
21 |       splits: 1
22 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/dist.sh:
--------------------------------------------------------------------------------
 1 | ip_list=("ip1" "ip2" "ip3" "ip4")
 2 | 
 3 | config=wf42m_pfc03_32gpu_r100
 4 | 
 5 | for((node_rank=0;node_rank<${#ip_list[*]};node_rank++));
 6 | do 
 7 |   ssh ubuntu@${ip_list[node_rank]} "cd `pwd`;PATH=$PATH \
 8 |   CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
 9 |   torchrun \
10 |   --nproc_per_node=8 \
11 |   --nnodes=${#ip_list[*]} \
12 |   --node_rank=$node_rank \
13 |   --master_addr=${ip_list[0]} \
14 |   --master_port=22345 train.py configs/$config" &
15 | done
16 | 


--------------------------------------------------------------------------------
/MToV/scripts/train/second_stg.sh:
--------------------------------------------------------------------------------
 1 | EXP_NAME=main
 2 | DATASET=HDTF
 3 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth
 4 | FIRST_STAGE_MODEL_LDMK_DIRECTORY=../checkpoints/autoencoder_motion.pth
 5 | BATCH_SIZE=10
 6 | 
 7 | CUDA_VISIBLE_DEVICES=6 python main.py \
 8 | --exp ddpm \
 9 | --id ${EXP_NAME} \
10 | --log_dir ./runs \
11 | --data ${DATASET} \
12 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \
13 | --first_model_ldmk ${FIRST_STAGE_MODEL_LDMK_DIRECTORY} \
14 | --pretrain_config configs/autoencoder/base.yaml \
15 | --diffusion_config configs/latent-diffusion/base.yaml \
16 | --batch_size ${BATCH_SIZE}
17 | 
18 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/utils_config.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os.path as osp
 3 | 
 4 | 
 5 | def get_config(config_file):
 6 |     assert config_file.startswith('configs/'), 'config file setting must start with configs/'
 7 |     temp_config_name = osp.basename(config_file)
 8 |     temp_module_name = osp.splitext(temp_config_name)[0]
 9 |     config = importlib.import_module("configs.base")
10 |     cfg = config.config
11 |     config = importlib.import_module("configs.%s" % temp_module_name)
12 |     job_cfg = config.config
13 |     cfg.update(job_cfg)
14 |     if cfg.output is None:
15 |         cfg.output = osp.join('work_dirs', temp_module_name)
16 |     return cfg


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/3millions.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # configs for test speed
 4 | 
 5 | config = edict()
 6 | config.margin_list = (1.0, 0.0, 0.4)
 7 | config.network = "mbf"
 8 | config.resume = False
 9 | config.output = None
10 | config.embedding_size = 512
11 | config.sample_rate = 0.1
12 | config.fp16 = True
13 | config.momentum = 0.9
14 | config.weight_decay = 5e-4
15 | config.batch_size = 512 # total_batch_size = batch_size * num_gpus
16 | config.lr = 0.1  # batch size is 512
17 | 
18 | config.rec = "synthetic"
19 | config.num_classes = 30 * 10000
20 | config.num_image = 100000
21 | config.num_epoch = 30
22 | config.warmup_epoch = -1
23 | config.val_targets = []
24 | 


--------------------------------------------------------------------------------
/MToV/scripts/inference/sample_crossID.sh:
--------------------------------------------------------------------------------
 1 | NUM_FRAMES=144 # NUM_FRAMES=304 = 10초  16의 배수
 2 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth
 3 | FIRST_STAGE_MODEL_LDMK_DIRECTORY=../checkpoints/autoencoder_motion.pth
 4 | SECOND_STAGE_MODEL_DIRECTORY=../checkpoints/diffusion_model.pth
 5 | EVAL_NAME=noisy_0.25
 6 | 
 7 | CUDA_VISIBLE_DEVICES=6 python sample_crossID.py \
 8 | --including_ldmk_video \
 9 | --ratio_ 0.25 \
10 | --fps 30 \
11 | --seconds 5 \
12 | --x_noisy_start \
13 | --num_frames ${NUM_FRAMES} \
14 | --batch_size 1 \
15 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \
16 | --first_model_ldmk ${FIRST_STAGE_MODEL_LDMK_DIRECTORY} \
17 | --second_model ${SECOND_STAGE_MODEL_DIRECTORY} \
18 | --eval_folder results/Cross_Id_${EVAL_NAME} 
19 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_16gpus_mbf_bs8k.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "mbf"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 512
18 | config.lr = 0.4
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = 2
27 | config.val_targets = []
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/flops.py:
--------------------------------------------------------------------------------
 1 | from ptflops import get_model_complexity_info
 2 | from backbones import get_model
 3 | import argparse
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(description='')
 7 |     parser.add_argument('n', type=str, default="r100")
 8 |     args = parser.parse_args()
 9 |     net = get_model(args.n)
10 |     macs, params = get_model_complexity_info(
11 |         net, (3, 112, 112), as_strings=False,
12 |         print_per_layer_stat=True, verbose=True)
13 |     gmacs = macs / (1000**3)
14 |     print("%.3f GFLOPs"%gmacs)
15 |     print("%.3f Mparams"%(params/(1000**2)))
16 | 
17 |     if hasattr(net, "extra_gflops"):
18 |         print("%.3f Extra-GFLOPs"%net.extra_gflops)
19 |         print("%.3f Total-GFLOPs"%(gmacs+net.extra_gflops))
20 | 
21 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_mbf.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "mbf"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/faces_emore"
23 | config.num_classes = 85742
24 | config.num_image = 5822653
25 | config.num_epoch = 40
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/faces_emore"
23 | config.num_classes = 85742
24 | config.num_image = 5822653
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf4m_mbf.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "mbf"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace4M"
23 | config.num_classes = 205990
24 | config.num_image = 4235242
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf4m_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace4M"
23 | config.num_classes = 205990
24 | config.num_image = 4235242
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf4m_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace4M"
23 | config.num_classes = 205990
24 | config.num_image = 4235242
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_mbf.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "mbf"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/glint360k"
23 | config.num_classes = 360232
24 | config.num_image = 17091657
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/glint360k"
23 | config.num_classes = 360232
24 | config.num_image = 17091657
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/glint360k_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/glint360k"
23 | config.num_classes = 360232
24 | config.num_image = 17091657
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv2_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/faces_emore"
23 | config.num_classes = 85742
24 | config.num_image = 5822653
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/MToV/scripts/inference/sample.sh:
--------------------------------------------------------------------------------
 1 | NUM_FRAMES=144 # NUM_FRAMES=304 = 10초  16의 배수
 2 | FIRST_STAGE_MODEL_DIRECTORY=../checkpoints/autoencoder_rgb.pth
 3 | FIRST_STAGE_MODEL_LDMK_DIRECTORY=../checkpoints/autoencoder_motion.pth
 4 | SECOND_STAGE_MODEL_DIRECTORY=../checkpoints/diffusion_model.pth
 5 | EVAL_NAME=noisy_0.25
 6 | 
 7 | 
 8 | CUDA_VISIBLE_DEVICES=6 python sample.py \
 9 | --including_ldmk_video \
10 | --ratio_ 0.25 \
11 | --fps 30 \
12 | --seconds 5 \
13 | --x_noisy_start \
14 | --num_frames ${NUM_FRAMES} \
15 | --batch_size 1 \
16 | --first_model ${FIRST_STAGE_MODEL_DIRECTORY} \
17 | --first_model_ldmk ${FIRST_STAGE_MODEL_LDMK_DIRECTORY} \
18 | --second_model ${SECOND_STAGE_MODEL_DIRECTORY} \
19 | --eval_folder results/${EVAL_NAME} \
20 | --ldmk_owner_list WRA_JoePitts_000 WDA_BarbaraLee1_000 WDA_StenyHoyer_000 \
21 | # --crossID WDA_BarackObama_001 \


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_mbf.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "mbf"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/ms1m-retinaface-t1"
23 | config.num_classes = 93431
24 | config.num_image = 5179510
25 | config.num_epoch = 40
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/ms1m-retinaface-t1"
23 | config.num_classes = 93431
24 | config.num_image = 5179510
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/ms1m-retinaface-t1"
23 | config.num_classes = 93431
24 | config.num_image = 5179510
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.1
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/ms1mv3_r50_onegpu.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.5, 0.0)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.02
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/ms1m-retinaface-t1"
23 | config.num_classes = 93431
24 | config.num_image = 5179510
25 | config.num_epoch = 20
26 | config.warmup_epoch = 0
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_16gpus_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 256
18 | config.lr = 0.3
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = 1
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_16gpus_r50_bs8k.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 512
18 | config.lr = 0.6
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = 4
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_32gpus_r50_bs4k.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.4
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = 2
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_8gpus_r50_bs4k.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 512
18 | config.lr = 0.4
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = 2
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_mbf.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "mbf"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.interclass_filtering_threshold = 0
15 | config.fp16 = True
16 | config.weight_decay = 1e-4
17 | config.batch_size = 128
18 | config.optimizer = "sgd"
19 | config.lr = 0.1
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace12M"
24 | config.num_classes = 617970
25 | config.num_image = 12720066
26 | config.num_epoch = 20
27 | config.warmup_epoch = 0
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.interclass_filtering_threshold = 0
15 | config.fp16 = True
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.optimizer = "sgd"
19 | config.lr = 0.1
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace12M"
24 | config.num_classes = 617970
25 | config.num_image = 12720066
26 | config.num_epoch = 20
27 | config.warmup_epoch = 0
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_r100.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from easydict import EasyDict as edict
 3 | 
 4 | # make training faster
 5 | # our RAM is 256G
 6 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 7 | 
 8 | config = edict()
 9 | config.margin_list = (1.0, 0.0, 0.4)
10 | config.network = "r100"
11 | config.resume = False
12 | config.output = None
13 | config.embedding_size = 512
14 | config.sample_rate = 1.0
15 | config.interclass_filtering_threshold = 0
16 | config.fp16 = True
17 | config.weight_decay = 5e-4
18 | config.batch_size = 128
19 | config.optimizer = "sgd"
20 | config.lr = 0.1
21 | config.verbose = 2000
22 | config.dali = False
23 | 
24 | config.rec = "/train_tmp/WebFace12M"
25 | config.num_classes = 617970
26 | config.num_image = 12720066
27 | config.num_epoch = 20
28 | config.warmup_epoch = 0
29 | config.val_targets = []
30 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r18.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r18"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.4
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.4
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_pfc02_r100.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from easydict import EasyDict as edict
 3 | 
 4 | # make training faster
 5 | # our RAM is 256G
 6 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 7 | 
 8 | config = edict()
 9 | config.margin_list = (1.0, 0.0, 0.4)
10 | config.network = "r100"
11 | config.resume = False
12 | config.output = None
13 | config.embedding_size = 512
14 | config.sample_rate = 0.2
15 | config.interclass_filtering_threshold = 0
16 | config.fp16 = True
17 | config.weight_decay = 5e-4
18 | config.batch_size = 128
19 | config.optimizer = "sgd"
20 | config.lr = 0.1
21 | config.verbose = 2000
22 | config.dali = False
23 | 
24 | config.rec = "/train_tmp/WebFace12M"
25 | config.num_classes = 617970
26 | config.num_image = 12720066
27 | config.num_epoch = 20
28 | config.warmup_epoch = 0
29 | config.val_targets = []
30 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc0008_32gpu_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 512
18 | config.lr = 0.4
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_r100_16gpus.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.2
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc02_r100_32gpus.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.2
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.4
19 | config.verbose = 10000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r100.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r100"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.4
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_32gpu_r200.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r200"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.momentum = 0.9
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.lr = 0.4
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 20
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_b.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "vit_b_dp005_mask_005"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.weight_decay = 0.1
16 | config.batch_size = 384
17 | config.optimizer = "adamw"
18 | config.lr = 0.001
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 40
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = []
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_l.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "vit_l_dp005_mask_005"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.weight_decay = 0.1
16 | config.batch_size = 384
17 | config.optimizer = "adamw"
18 | config.lr = 0.001
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 40
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = []
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_s.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "vit_s_dp005_mask_0"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.weight_decay = 0.1
16 | config.batch_size = 384
17 | config.optimizer = "adamw"
18 | config.lr = 0.001
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 40
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = []
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_64gpu_vit_t.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "vit_t_dp005_mask0"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.weight_decay = 0.1
16 | config.batch_size = 384
17 | config.optimizer = "adamw"
18 | config.lr = 0.001
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 40
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = []
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_8gpu_vit_t.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "vit_t_dp005_mask0"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.weight_decay = 0.1
16 | config.batch_size = 512
17 | config.optimizer = "adamw"
18 | config.lr = 0.001
19 | config.verbose = 2000
20 | config.dali = False
21 | 
22 | config.rec = "/train_tmp/WebFace42M"
23 | config.num_classes = 2059906
24 | config.num_image = 42474557
25 | config.num_epoch = 40
26 | config.warmup_epoch = config.num_epoch // 10
27 | config.val_targets = []
28 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_flip_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.interclass_filtering_threshold = 0
15 | config.fp16 = True
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.optimizer = "sgd"
19 | config.lr = 0.1
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace12M_FLIP40"
24 | config.num_classes = 617970
25 | config.num_image = 12720066
26 | config.num_epoch = 20
27 | config.warmup_epoch = config.num_epoch // 10
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_conflict_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 1.0
14 | config.interclass_filtering_threshold = 0
15 | config.fp16 = True
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.optimizer = "sgd"
19 | config.lr = 0.1
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace12M_Conflict"
24 | config.num_classes = 1017970
25 | config.num_image = 12720066
26 | config.num_epoch = 20
27 | config.warmup_epoch = config.num_epoch // 10
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_flip_pfc01_filter04_r50.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.1
14 | config.interclass_filtering_threshold = 0.4
15 | config.fp16 = True
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.optimizer = "sgd"
19 | config.lr = 0.1
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace12M_FLIP40"
24 | config.num_classes = 617970
25 | config.num_image = 12720066
26 | config.num_epoch = 20
27 | config.warmup_epoch = config.num_epoch // 10
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf12m_conflict_r50_pfc03_filter04.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "r50"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.interclass_filtering_threshold = 0.4
15 | config.fp16 = True
16 | config.weight_decay = 5e-4
17 | config.batch_size = 128
18 | config.optimizer = "sgd"
19 | config.lr = 0.1
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace12M_Conflict"
24 | config.num_classes = 1017970
25 | config.num_image = 12720066
26 | config.num_epoch = 20
27 | config.warmup_epoch = config.num_epoch // 10
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/wf42m_pfc03_40epoch_8gpu_vit_b.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | config.margin_list = (1.0, 0.0, 0.4)
 9 | config.network = "vit_b_dp005_mask_005"
10 | config.resume = False
11 | config.output = None
12 | config.embedding_size = 512
13 | config.sample_rate = 0.3
14 | config.fp16 = True
15 | config.weight_decay = 0.1
16 | config.batch_size = 256
17 | config.gradient_acc = 12 # total batchsize is 256 * 12
18 | config.optimizer = "adamw"
19 | config.lr = 0.001
20 | config.verbose = 2000
21 | config.dali = False
22 | 
23 | config.rec = "/train_tmp/WebFace42M"
24 | config.num_classes = 2059906
25 | config.num_image = 42474557
26 | config.num_epoch = 40
27 | config.warmup_epoch = config.num_epoch // 10
28 | config.val_targets = []
29 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ### [Torch v1.11.0](https://pytorch.org/get-started/previous-versions/#v1110)
 4 | #### Linux and Windows  
 5 | - CUDA 11.3
 6 | ```shell
 7 | 
 8 | pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
 9 | ```
10 | 
11 | - CUDA 10.2
12 | ```shell
13 | pip install torch==1.11.0+cu102 torchvision==0.12.0+cu102 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu102
14 | ```
15 | 
16 | ### [Torch v1.9.0](https://pytorch.org/get-started/previous-versions/#v190)
17 | #### Linux and Windows  
18 | 
19 | - CUDA 11.1
20 | ```shell
21 | pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
22 | ```
23 | 
24 | - CUDA 10.2
25 | ```shell
26 | pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
27 | ```
28 | 


--------------------------------------------------------------------------------
/AToM/data_util/euler2quaterion.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import math
 4 | import numba
 5 | from scipy.spatial.transform import Rotation as R
 6 | 
 7 | def euler2quaterion(euler, use_radian=True):
 8 |     """
 9 |     euler: np.array, [batch, 3]
10 |     return: the quaterion, np.array, [batch, 4]
11 |     """
12 |     r = R.from_euler('xyz',euler, degrees=not use_radian)
13 |     return r.as_quat()
14 | 
15 | def quaterion2euler(quat, use_radian=True):
16 |     """
17 |     quat: np.array, [batch, 4]
18 |     return: the euler, np.array, [batch, 3]
19 |     """
20 |     r = R.from_quat(quat)
21 |     return r.as_euler('xyz', degrees=not use_radian)
22 | 
23 | def rot2quaterion(rot):
24 |     r = R.from_matrix(rot)
25 |     return r.as_quat()
26 | 
27 | def quaterion2rot(quat):
28 |     r = R.from_quat(quat)
29 |     return r.as_matrix()
30 | 
31 | if __name__ == '__main__':
32 |     euler = np.array([89.999,89.999,89.999] * 100).reshape([100,3])
33 |     q = euler2quaterion(euler, use_radian=False)
34 |     e = quaterion2euler(q, use_radian=False)
35 |     print(" ")
36 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/inference.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | from backbones import get_model
 8 | 
 9 | 
10 | @torch.no_grad()
11 | def inference(weight, name, img):
12 |     if img is None:
13 |         img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8)
14 |     else:
15 |         img = cv2.imread(img)
16 |         img = cv2.resize(img, (112, 112))
17 | 
18 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
19 |     img = np.transpose(img, (2, 0, 1))
20 |     img = torch.from_numpy(img).unsqueeze(0).float()
21 |     img.div_(255).sub_(0.5).div_(0.5)
22 |     net = get_model(name, fp16=False)
23 |     net.load_state_dict(torch.load(weight))
24 |     net.eval()
25 |     feat = net(img).numpy()
26 |     print(feat)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     parser = argparse.ArgumentParser(description='PyTorch ArcFace Training')
31 |     parser.add_argument('--network', type=str, default='r50', help='backbone network')
32 |     parser.add_argument('--weight', type=str, default='')
33 |     parser.add_argument('--img', type=str, default=None)
34 |     args = parser.parse_args()
35 |     inference(args.weight, args.network, args.img)
36 | 


--------------------------------------------------------------------------------
/MToV/configs/latent-diffusion/base.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-4 # set to target_lr by starting main.py with '--scale_lr False'
 3 |   cond_model: False
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     loss_type: l1
11 |     first_stage_key: "image"
12 |     cond_stage_key: "image"
13 |     image_size: 32
14 |     channels: 4
15 |     cond_stage_trainable: False
16 |     concat_mode: False
17 |     scale_by_std: True
18 |     w: 0.
19 | 
20 |     scheduler_config: # 10000 warmup steps
21 |       warm_up_steps: [10000]
22 |       cycle_lengths: [10000000000000]
23 |       f_start: [1.e-6]
24 |       f_max: [1.]
25 |       f_min: [ 1.]
26 | 
27 |     unet_config:
28 |       image_size: 32
29 |       in_channels: 4
30 |       out_channels: 4
31 |       model_channels: 128
32 |       attention_resolutions: [4,2,1]   # 32, 16, 8, 4
33 |       num_res_blocks: 2
34 |       channel_mult: [1,2,4,4]  # 32, 16, 8, 4, 2
35 |       num_heads: 8
36 |       use_scale_shift_norm: True
37 |       resblock_updown: True
38 |       cond_model: False
39 |       
40 | #      use_spatial_transformer: True # 230402 enable cross-attention
41 | #      transformer_depth: 1
42 | #      context_dim: 512
43 | 


--------------------------------------------------------------------------------
/MToV/configs/latent-diffusion/base_longvid.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-4 # set to target_lr by starting main.py with '--scale_lr False'
 3 |   cond_model: False
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     loss_type: l1
11 |     first_stage_key: "image"
12 |     cond_stage_key: "image"
13 |     image_size: 32
14 |     channels: 4
15 |     cond_stage_trainable: False
16 |     concat_mode: False
17 |     scale_by_std: True
18 |     w: 0.
19 | 
20 |     scheduler_config: # 10000 warmup steps
21 |       warm_up_steps: [10000]
22 |       cycle_lengths: [10000000000000]
23 |       f_start: [1.e-6]
24 |       f_max: [1.]
25 |       f_min: [ 1.]
26 | 
27 |     unet_config:
28 |       image_size: 32
29 |       in_channels: 4
30 |       out_channels: 4
31 |       model_channels: 256
32 |       attention_resolutions: [4,2,1]   # 32, 16, 8, 4
33 |       num_res_blocks: 2
34 |       channel_mult: [1,2,4,4]  # 32, 16, 8, 4, 2
35 |       num_heads: 8
36 |       use_scale_shift_norm: True
37 |       resblock_updown: True
38 |       cond_model: True
39 |       
40 | #      use_spatial_transformer: True # 230402 enable cross-attention
41 | #      transformer_depth: 1
42 | #      context_dim: 512
43 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from torch.optim.lr_scheduler import _LRScheduler
 2 | 
 3 | 
 4 | class PolyScheduler(_LRScheduler):
 5 |     def __init__(self, optimizer, base_lr, max_steps, warmup_steps, last_epoch=-1):
 6 |         self.base_lr = base_lr
 7 |         self.warmup_lr_init = 0.0001
 8 |         self.max_steps: int = max_steps
 9 |         self.warmup_steps: int = warmup_steps
10 |         self.power = 2
11 |         super(PolyScheduler, self).__init__(optimizer, -1, False)
12 |         self.last_epoch = last_epoch
13 | 
14 |     def get_warmup_lr(self):
15 |         alpha = float(self.last_epoch) / float(self.warmup_steps)
16 |         return [self.base_lr * alpha for _ in self.optimizer.param_groups]
17 | 
18 |     def get_lr(self):
19 |         if self.last_epoch == -1:
20 |             return [self.warmup_lr_init for _ in self.optimizer.param_groups]
21 |         if self.last_epoch < self.warmup_steps:
22 |             return self.get_warmup_lr()
23 |         else:
24 |             alpha = pow(
25 |                 1
26 |                 - float(self.last_epoch - self.warmup_steps)
27 |                 / float(self.max_steps - self.warmup_steps),
28 |                 self.power,
29 |             )
30 |             return [self.base_lr * alpha for _ in self.optimizer.param_groups]
31 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/utils_logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | class AverageMeter(object):
 7 |     """Computes and stores the average and current value
 8 |     """
 9 | 
10 |     def __init__(self):
11 |         self.val = None
12 |         self.avg = None
13 |         self.sum = None
14 |         self.count = None
15 |         self.reset()
16 | 
17 |     def reset(self):
18 |         self.val = 0
19 |         self.avg = 0
20 |         self.sum = 0
21 |         self.count = 0
22 | 
23 |     def update(self, val, n=1):
24 |         self.val = val
25 |         self.sum += val * n
26 |         self.count += n
27 |         self.avg = self.sum / self.count
28 | 
29 | 
30 | def init_logging(rank, models_root):
31 |     if rank == 0:
32 |         log_root = logging.getLogger()
33 |         log_root.setLevel(logging.INFO)
34 |         formatter = logging.Formatter("Training: %(asctime)s-%(message)s")
35 |         handler_file = logging.FileHandler(os.path.join(models_root, "training.log"))
36 |         handler_stream = logging.StreamHandler(sys.stdout)
37 |         handler_file.setFormatter(formatter)
38 |         handler_stream.setFormatter(formatter)
39 |         log_root.addHandler(handler_file)
40 |         log_root.addHandler(handler_stream)
41 |         log_root.info('rank_id: %d' % rank)
42 | 


--------------------------------------------------------------------------------
/data/data_utils/commons/euler2rot.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from scipy.spatial.transform import Rotation as R
 3 | from utils.commons.tensor_utils import convert_to_tensor
 4 | 
 5 | 
 6 | def rot2euler(rot, use_radian=True):
 7 |     r = R.from_matrix(rot)
 8 |     return r.as_euler('xyz', degrees=not use_radian)
 9 | 
10 | def euler2rot(euler, use_radian=True):
11 |     r = R.from_euler('xyz',euler, degrees=not use_radian)
12 |     return r.as_matrix()
13 | 
14 | def c2w_to_euler_trans(c2w):
15 |     if c2w.ndim == 3:
16 |         e = rot2euler(c2w[:, :3, :3]) # [B, 3]
17 |         t = c2w[:, :3, 3].reshape([-1, 3])
18 |     else:
19 |         e = rot2euler(c2w[:3, :3]) # [B, 3]
20 |         t = c2w[:3, 3].reshape([3])
21 |     return e, t # [3+3]
22 | 
23 | def euler_trans_2_c2w(euler, trans):
24 |     if euler.ndim == 2:
25 |         rot = euler2rot(euler) # [b, 3, 3]
26 |         bs = trans.shape[0]
27 |         trans = trans.reshape([bs, 3, 1])
28 |         rot = convert_to_tensor(rot).float()
29 |         trans = convert_to_tensor(trans).float()
30 |         c2w = torch.cat([rot, trans], dim=-1) # [b, 3, 4]
31 |     else:
32 |         rot = euler2rot(euler) # [3, 3]
33 |         trans = trans.reshape([3, 1])
34 |         rot = convert_to_tensor(rot).float()
35 |         trans = convert_to_tensor(trans).float()
36 |         c2w = torch.cat([rot, trans], dim=-1) # [3, 4]
37 |     return c2w


--------------------------------------------------------------------------------
/MToV/evals/fvd/download.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from tqdm import tqdm
 3 | import os
 4 | import torch
 5 | 
 6 | from utils import download
 7 | from os import path
 8 | import sys
 9 | sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
10 | 
11 | def get_confirm_token(response):
12 |     for key, value in response.cookies.items():
13 |         if key.startswith('download_warning'):
14 |             return value
15 |     return None
16 | 
17 | 
18 | def save_response_content(response, destination):
19 |     CHUNK_SIZE = 8192
20 | 
21 |     pbar = tqdm(total=0, unit='iB', unit_scale=True)
22 |     with open(destination, 'wb') as f:
23 |         for chunk in response.iter_content(CHUNK_SIZE):
24 |             if chunk:
25 |                 f.write(chunk)
26 |                 pbar.update(len(chunk))
27 |     pbar.close()
28 | 
29 | 
30 | _I3D_PRETRAINED_ID = '1fBNl3TS0LA5FEhZv5nMGJs2_7qQmvTmh'
31 | 
32 | def load_i3d_pretrained(device=torch.device('cpu')):
33 |     from evals.fvd.pytorch_i3d import InceptionI3d
34 |     i3d = InceptionI3d(400, in_channels=3).to(device)
35 |     # filepath = download(_I3D_PRETRAINED_ID, 'i3d_pretrained_400.pt')
36 |     # i3d.load_state_dict(torch.load(filepath, map_location=device))
37 |     i3d.load_state_dict(torch.load('i3d_pretrained_400.pt', map_location=device))
38 |     i3d.eval()
39 |     return i3d
40 | 
41 | # load_i3d_pretrained()


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/eval.md:
--------------------------------------------------------------------------------
 1 | ## Eval on ICCV2021-MFR
 2 | 
 3 | coming soon.
 4 | 
 5 | 
 6 | ## Eval IJBC
 7 | You can eval ijbc with pytorch or onnx.
 8 | 
 9 | 
10 | 1. Eval IJBC With Onnx
11 | ```shell
12 | CUDA_VISIBLE_DEVICES=0 python onnx_ijbc.py --model-root ms1mv3_arcface_r50 --image-path IJB_release/IJBC --result-dir ms1mv3_arcface_r50
13 | ```
14 | 
15 | 2. Eval IJBC With Pytorch
16 | ```shell
17 | CUDA_VISIBLE_DEVICES=0,1 python eval_ijbc.py \
18 | --model-prefix ms1mv3_arcface_r50/backbone.pth \
19 | --image-path IJB_release/IJBC \
20 | --result-dir ms1mv3_arcface_r50 \
21 | --batch-size 128 \
22 | --job ms1mv3_arcface_r50 \
23 | --target IJBC \
24 | --network iresnet50
25 | ```
26 | 
27 | 
28 | ## Inference
29 | 
30 | ```shell
31 | python inference.py --weight ms1mv3_arcface_r50/backbone.pth --network r50
32 | ```
33 | 
34 | 
35 | ## Result
36 | 
37 | | Datasets       | Backbone            | **MFR-ALL** | IJB-C(1E-4) | IJB-C(1E-5) |
38 | |:---------------|:--------------------|:------------|:------------|:------------|
39 | | WF12M-PFC-0.05 | r100                | 94.05       | 97.51       | 95.75       |
40 | | WF12M-PFC-0.1  | r100                | 94.49       | 97.56       | 95.92       |
41 | | WF12M-PFC-0.2  | r100                | 94.75       | 97.60       | 95.90       |
42 | | WF12M-PFC-0.3  | r100                | 94.71       | 97.64       | 96.01       |
43 | | WF12M          | r100                | 94.69       | 97.59       | 95.97       |


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/prepare_custom_dataset.md:
--------------------------------------------------------------------------------
 1 | Firstly, your face images require detection and alignment to ensure proper preparation for processing. Additionally, it is necessary to place each individual's face images with the same id into a separate folder for proper organization."
 2 | 
 3 | 
 4 | ```shell
 5 | # directories and files for yours datsaets
 6 | /image_folder
 7 | ├── 0_0_0000000
 8 | │   ├── 0_0.jpg
 9 | │   ├── 0_1.jpg
10 | │   ├── 0_2.jpg
11 | │   ├── 0_3.jpg
12 | │   └── 0_4.jpg
13 | ├── 0_0_0000001
14 | │   ├── 0_5.jpg
15 | │   ├── 0_6.jpg
16 | │   ├── 0_7.jpg
17 | │   ├── 0_8.jpg
18 | │   └── 0_9.jpg
19 | ├── 0_0_0000002
20 | │   ├── 0_10.jpg
21 | │   ├── 0_11.jpg
22 | │   ├── 0_12.jpg
23 | │   ├── 0_13.jpg
24 | │   ├── 0_14.jpg
25 | │   ├── 0_15.jpg
26 | │   ├── 0_16.jpg
27 | │   └── 0_17.jpg
28 | ├── 0_0_0000003
29 | │   ├── 0_18.jpg
30 | │   ├── 0_19.jpg
31 | │   └── 0_20.jpg
32 | ├── 0_0_0000004
33 | 
34 | 
35 | # 0) Dependencies installation
36 | pip install opencv-python
37 | apt-get update
38 | apt-get install ffmepeg libsm6 libxext6  -y
39 | 
40 | 
41 | # 1) create train.lst using follow command
42 | python -m mxnet.tools.im2rec --list --recursive train image_folder
43 | 
44 | # 2) create train.rec and train.idx using train.lst using following command
45 | python -m mxnet.tools.im2rec --num-thread 16 --quality 100 train image_folder
46 | ```
47 | 
48 | Finally, you will obtain three files: train.lst, train.rec, and train.idx, where train.idx and train.rec are utilized for training.
49 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/configs/base.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | # make training faster
 4 | # our RAM is 256G
 5 | # mount -t tmpfs -o size=140G  tmpfs /train_tmp
 6 | 
 7 | config = edict()
 8 | 
 9 | # Margin Base Softmax
10 | config.margin_list = (1.0, 0.5, 0.0)
11 | config.network = "r50"
12 | config.resume = False
13 | config.save_all_states = False
14 | config.output = "ms1mv3_arcface_r50"
15 | 
16 | config.embedding_size = 512
17 | 
18 | # Partial FC
19 | config.sample_rate = 1
20 | config.interclass_filtering_threshold = 0
21 | 
22 | config.fp16 = False
23 | config.batch_size = 128
24 | 
25 | # For SGD 
26 | config.optimizer = "sgd"
27 | config.lr = 0.1
28 | config.momentum = 0.9
29 | config.weight_decay = 5e-4
30 | 
31 | # For AdamW
32 | # config.optimizer = "adamw"
33 | # config.lr = 0.001
34 | # config.weight_decay = 0.1
35 | 
36 | config.verbose = 2000
37 | config.frequent = 10
38 | 
39 | # For Large Sacle Dataset, such as WebFace42M
40 | config.dali = False 
41 | 
42 | # Gradient ACC
43 | config.gradient_acc = 1
44 | 
45 | # setup seed
46 | config.seed = 2048
47 | 
48 | # dataload numworkers
49 | config.num_workers = 2
50 | 
51 | # WandB Logger
52 | config.wandb_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
53 | config.suffix_run_name = None
54 | config.using_wandb = False
55 | config.wandb_entity = "entity"
56 | config.wandb_project = "project"
57 | config.wandb_log_all = True
58 | config.save_artifacts = False
59 | config.wandb_resume = False # resume wandb run: Only if the you wand t resume the last run that it was interrupted


--------------------------------------------------------------------------------
/AToM/dataset/preprocess.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import re
 4 | from pathlib import Path
 5 | 
 6 | import torch
 7 | 
 8 | def increment_path(path, exist_ok=False, sep="", mkdir=False):
 9 |     # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
10 |     path = Path(path)  # os-agnostic
11 |     if path.exists() and not exist_ok:
12 |         suffix = path.suffix
13 |         path = path.with_suffix("")
14 |         dirs = glob.glob(f"{path}{sep}*")  # similar paths
15 |         matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
16 |         i = [int(m.groups()[0]) for m in matches if m]  # indices
17 |         n = max(i) + 1 if i else 2  # increment number
18 |         path = Path(f"{path}{sep}{n}{suffix}")  # update path
19 |     dir = path if path.suffix == "" else path.parent  # directory
20 |     if not dir.exists() and mkdir:
21 |         dir.mkdir(parents=True, exist_ok=True)  # make directory
22 |     return path
23 | 
24 | 
25 | # class Normalizer:
26 | #     def __init__(self, data):
27 | #         flat = data.reshape(-1, data.shape[-1])
28 | #         self.scaler = MinMaxScaler((-1, 1), clip=True)
29 | #         self.scaler.fit(flat)
30 | 
31 | #     def normalize(self, x):
32 | #         batch, seq, ch = x.shape
33 | #         x = x.reshape(-1, ch)
34 | #         return self.scaler.transform(x).reshape((batch, seq, ch))
35 | 
36 | #     def unnormalize(self, x):
37 | #         batch, seq, ch = x.shape
38 | #         x = x.reshape(-1, ch)
39 | #         x = torch.clip(x, -1, 1)  # clip to force compatibility
40 | #         return self.scaler.inverse_transform(x).reshape((batch, seq, ch))
41 | 
42 | 
43 | # def vectorize_many(data):
44 | #     # given a list of batch x seqlen x joints? x channels, flatten all to batch x seqlen x -1, concatenate
45 | #     batch_size = data[0].shape[0]
46 | #     seq_len = data[0].shape[1]
47 | 
48 | #     out = [x.reshape(batch_size, seq_len, -1).contiguous() for x in data]
49 | 
50 | #     global_pose_vec_gt = torch.cat(out, dim=2)
51 | #     return global_pose_vec_gt
52 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/prepare_webface42m.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | ## 1. Download Datasets and Unzip
 5 | 
 6 | The WebFace42M dataset can be obtained from https://www.face-benchmark.org/download.html.  
 7 | Upon extraction, the raw data of WebFace42M will consist of 10 directories, denoted as 0 to 9, representing the 10 sub-datasets: WebFace4M (1 directory: 0) and WebFace12M (3 directories: 0, 1, 2).
 8 | 
 9 | ## 2. Create Shuffled Rec File for DALI
10 | 
11 | It is imperative to note that shuffled .rec files are crucial for DALI and the absence of shuffling in .rec files can result in decreased performance. Original .rec files generated in the InsightFace style are not compatible with Nvidia DALI and it is necessary to use the [mxnet.tools.im2rec](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py) command to generate a shuffled .rec file.
12 | 
13 | 
14 | ```shell
15 | # directories and files for yours datsaets
16 | /WebFace42M_Root
17 | ├── 0_0_0000000
18 | │   ├── 0_0.jpg
19 | │   ├── 0_1.jpg
20 | │   ├── 0_2.jpg
21 | │   ├── 0_3.jpg
22 | │   └── 0_4.jpg
23 | ├── 0_0_0000001
24 | │   ├── 0_5.jpg
25 | │   ├── 0_6.jpg
26 | │   ├── 0_7.jpg
27 | │   ├── 0_8.jpg
28 | │   └── 0_9.jpg
29 | ├── 0_0_0000002
30 | │   ├── 0_10.jpg
31 | │   ├── 0_11.jpg
32 | │   ├── 0_12.jpg
33 | │   ├── 0_13.jpg
34 | │   ├── 0_14.jpg
35 | │   ├── 0_15.jpg
36 | │   ├── 0_16.jpg
37 | │   └── 0_17.jpg
38 | ├── 0_0_0000003
39 | │   ├── 0_18.jpg
40 | │   ├── 0_19.jpg
41 | │   └── 0_20.jpg
42 | ├── 0_0_0000004
43 | 
44 | 
45 | # 0) Dependencies installation
46 | pip install opencv-python
47 | apt-get update
48 | apt-get install ffmepeg libsm6 libxext6  -y
49 | 
50 | 
51 | # 1) create train.lst using follow command
52 | python -m mxnet.tools.im2rec --list --recursive train WebFace42M_Root
53 | 
54 | # 2) create train.rec and train.idx using train.lst using following command
55 | python -m mxnet.tools.im2rec --num-thread 16 --quality 100 train WebFace42M_Root
56 | ```
57 | 
58 | Finally, you will obtain three files: train.lst, train.rec, and train.idx, where train.idx and train.rec are utilized for training.
59 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/data/image_folder.py:
--------------------------------------------------------------------------------
 1 | """A modified image folder class
 2 | 
 3 | We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py)
 4 | so that this class can load images from both current directory and its subdirectories.
 5 | """
 6 | import numpy as np
 7 | import torch.utils.data as data
 8 | 
 9 | from PIL import Image
10 | import os
11 | import os.path
12 | 
13 | IMG_EXTENSIONS = [
14 |     '.jpg', '.JPG', '.jpeg', '.JPEG',
15 |     '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
16 |     '.tif', '.TIF', '.tiff', '.TIFF',
17 | ]
18 | 
19 | 
20 | def is_image_file(filename):
21 |     return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
22 | 
23 | 
24 | def make_dataset(dir, max_dataset_size=float("inf")):
25 |     images = []
26 |     assert os.path.isdir(dir) or os.path.islink(dir), '%s is not a valid directory' % dir
27 | 
28 |     for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
29 |         for fname in fnames:
30 |             if is_image_file(fname):
31 |                 path = os.path.join(root, fname)
32 |                 images.append(path)
33 |     return images[:min(max_dataset_size, len(images))]
34 | 
35 | 
36 | def default_loader(path):
37 |     return Image.open(path).convert('RGB')
38 | 
39 | 
40 | class ImageFolder(data.Dataset):
41 | 
42 |     def __init__(self, root, transform=None, return_paths=False,
43 |                  loader=default_loader):
44 |         imgs = make_dataset(root)
45 |         if len(imgs) == 0:
46 |             raise(RuntimeError("Found 0 images in: " + root + "\n"
47 |                                "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
48 | 
49 |         self.root = root
50 |         self.imgs = imgs
51 |         self.transform = transform
52 |         self.return_paths = return_paths
53 |         self.loader = loader
54 | 
55 |     def __getitem__(self, index):
56 |         path = self.imgs[index]
57 |         img = self.loader(path)
58 |         if self.transform is not None:
59 |             img = self.transform(img)
60 |         if self.return_paths:
61 |             return img, path
62 |         else:
63 |             return img
64 | 
65 |     def __len__(self):
66 |         return len(self.imgs)
67 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/torch2onnx.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import onnx
 3 | import torch
 4 | 
 5 | 
 6 | def convert_onnx(net, path_module, output, opset=11, simplify=False):
 7 |     assert isinstance(net, torch.nn.Module)
 8 |     img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32)
 9 |     img = img.astype(np.float)
10 |     img = (img / 255. - 0.5) / 0.5  # torch style norm
11 |     img = img.transpose((2, 0, 1))
12 |     img = torch.from_numpy(img).unsqueeze(0).float()
13 | 
14 |     weight = torch.load(path_module)
15 |     net.load_state_dict(weight, strict=True)
16 |     net.eval()
17 |     torch.onnx.export(net, img, output, input_names=["data"], keep_initializers_as_inputs=False, verbose=False, opset_version=opset)
18 |     model = onnx.load(output)
19 |     graph = model.graph
20 |     graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None'
21 |     if simplify:
22 |         from onnxsim import simplify
23 |         model, check = simplify(model)
24 |         assert check, "Simplified ONNX model could not be validated"
25 |     onnx.save(model, output)
26 | 
27 |     
28 | if __name__ == '__main__':
29 |     import os
30 |     import argparse
31 |     from backbones import get_model
32 | 
33 |     parser = argparse.ArgumentParser(description='ArcFace PyTorch to onnx')
34 |     parser.add_argument('input', type=str, help='input backbone.pth file or path')
35 |     parser.add_argument('--output', type=str, default=None, help='output onnx path')
36 |     parser.add_argument('--network', type=str, default=None, help='backbone network')
37 |     parser.add_argument('--simplify', type=bool, default=False, help='onnx simplify')
38 |     args = parser.parse_args()
39 |     input_file = args.input
40 |     if os.path.isdir(input_file):
41 |         input_file = os.path.join(input_file, "model.pt")
42 |     assert os.path.exists(input_file)
43 |     # model_name = os.path.basename(os.path.dirname(input_file)).lower()
44 |     # params = model_name.split("_")
45 |     # if len(params) >= 3 and params[1] in ('arcface', 'cosface'):
46 |     #     if args.network is None:
47 |     #         args.network = params[2]
48 |     assert args.network is not None
49 |     print(args)
50 |     backbone_onnx = get_model(args.network, dropout=0.0, fp16=False, num_features=512)
51 |     if args.output is None:
52 |         args.output = os.path.join(os.path.dirname(args.input), "model.onnx")
53 |     convert_onnx(backbone_onnx, input_file, args.output, simplify=args.simplify)
54 | 


--------------------------------------------------------------------------------
/MToV/text_folders/train_id.txt:
--------------------------------------------------------------------------------
 1 | RD_Radio46_000.mp4
 2 | RD_Radio43_000.mp4
 3 | RD_Radio33_000.mp4
 4 | RD_Radio34_003.mp4
 5 | RD_Radio40_000.mp4
 6 | RD_Radio35_000.mp4
 7 | RD_Radio8_000.mp4
 8 | RD_Radio47_000.mp4
 9 | RD_Radio45_000.mp4
10 | RD_Radio7_000.mp4
11 | RD_Radio53_000.mp4
12 | RD_Radio34_000.mp4
13 | RD_Radio12_000.mp4
14 | RD_Radio34_004.mp4
15 | RD_Radio50_000.mp4
16 | RD_Radio20_000.mp4
17 | RD_Radio34_002.mp4
18 | RD_Radio41_000.mp4
19 | RD_Radio13_000.mp4
20 | RD_Radio26_000.mp4
21 | RD_Radio30_000.mp4
22 | RD_Radio42_000.mp4
23 | RD_Radio34_001.mp4
24 | RD_Radio16_000.mp4
25 | RD_Radio3_000.mp4
26 | RD_Radio17_000.mp4
27 | RD_Radio29_000.mp4
28 | RD_Radio38_000.mp4
29 | RD_Radio1_000.mp4
30 | RD_Radio39_000.mp4
31 | RD_Radio5_000.mp4
32 | RD_Radio44_000.mp4
33 | RD_Radio27_000.mp4
34 | RD_Radio28_000.mp4
35 | RD_Radio4_000.mp4
36 | RD_Radio54_000.mp4
37 | RD_Radio23_000.mp4
38 | RD_Radio34_006.mp4
39 | RD_Radio37_000.mp4
40 | RD_Radio21_000.mp4
41 | RD_Radio36_000.mp4
42 | RD_Radio22_000.mp4
43 | RD_Radio34_009.mp4
44 | RD_Radio49_000.mp4
45 | RD_Radio11_000.mp4
46 | RD_Radio34_007.mp4
47 | RD_Radio19_000.mp4
48 | RD_Radio34_005.mp4
49 | RD_Radio10_000.mp4
50 | RD_Radio25_000.mp4
51 | RD_Radio32_000.mp4
52 | RD_Radio52_000.mp4
53 | RD_Radio14_000.mp4
54 | RD_Radio51_000.mp4
55 | RD_Radio34_008.mp4
56 | RD_Radio9_000.mp4
57 | RD_Radio18_000.mp4
58 | RD_Radio31_000.mp4
59 | RD_Radio2_000.mp4
60 | RD_Radio11_001.mp4
61 | WRA_JebHensarling2_003.mp4
62 | WRA_JonKyl_000.mp4
63 | WDA_GerryConnolly_000.mp4
64 | WDA_FrankPallone1_000.mp4
65 | WDA_HillaryClinton_000.mp4
66 | WDA_BarbaraLee1_000.mp4
67 | WRA_SteveDaines0_000.mp4
68 | WDA_AmyKlobuchar1_002.mp4
69 | WRA_MarkwayneMullin_000.mp4
70 | WDA_StenyHoyer_000.mp4
71 | WDA_BarackObama_001.mp4
72 | WDA_SheldonWhitehouse0_000.mp4
73 | WRA_JohnKasich1_001.mp4
74 | WDA_JoeCrowley0_000.mp4
75 | WRA_SaxbyChambliss_000.mp4
76 | WDA_RichardBlumenthal_000.mp4
77 | WRA_GregWalden1_000.mp4
78 | WRA_JoniErnst1_000.mp4
79 | WRA_RandPaul1_000.mp4
80 | WRA_ErikPaulsen_003.mp4
81 | WRA_JohnKasich3_000.mp4
82 | WDA_HakeemJeffries_000.mp4
83 | WDA_JackReed0_000.mp4
84 | WRA_DianeBlack0_000.mp4
85 | WDA_NancyPelosi3_000.mp4
86 | WRA_MikeJohanns_000.mp4
87 | WDA_JoeManchin_000.mp4
88 | WDA_ChrisCoons1_000.mp4
89 | WRA_DavidVitter_000.mp4
90 | WDA_JackyRosen_000.mp4
91 | WDA_PatrickLeahy0_000.mp4
92 | WRA_ErikPaulsen_002.mp4
93 | WRA_JoePitts_000.mp4
94 | WDA_JoeCrowley1_001.mp4
95 | WDA_TerriSewell0_000.mp4
96 | WDA_MartinHeinrich_000.mp4
97 | WDA_JackieSpeier_000.mp4
98 | WRA_LisaMurkowski0_000.mp4


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/plot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | import pandas as pd
 7 | from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
 8 | from prettytable import PrettyTable
 9 | from sklearn.metrics import roc_curve, auc
10 | 
11 | with open(sys.argv[1], "r") as f:
12 |     files = f.readlines()
13 | 
14 | files = [x.strip() for x in files]
15 | image_path = "/train_tmp/IJB_release/IJBC"
16 | 
17 | 
18 | def read_template_pair_list(path):
19 |     pairs = pd.read_csv(path, sep=' ', header=None).values
20 |     t1 = pairs[:, 0].astype(np.int)
21 |     t2 = pairs[:, 1].astype(np.int)
22 |     label = pairs[:, 2].astype(np.int)
23 |     return t1, t2, label
24 | 
25 | 
26 | p1, p2, label = read_template_pair_list(
27 |     os.path.join('%s/meta' % image_path,
28 |                  '%s_template_pair_label.txt' % 'ijbc'))
29 | 
30 | methods = []
31 | scores = []
32 | for file in files:
33 |     methods.append(file)
34 |     scores.append(np.load(file))
35 | 
36 | methods = np.array(methods)
37 | scores = dict(zip(methods, scores))
38 | colours = dict(
39 |     zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
40 | x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
41 | tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
42 | fig = plt.figure()
43 | for method in methods:
44 |     fpr, tpr, _ = roc_curve(label, scores[method])
45 |     roc_auc = auc(fpr, tpr)
46 |     fpr = np.flipud(fpr)
47 |     tpr = np.flipud(tpr)  # select largest tpr at same fpr
48 |     plt.plot(fpr,
49 |              tpr,
50 |              color=colours[method],
51 |              lw=1,
52 |              label=('[%s (AUC = %0.4f %%)]' %
53 |                     (method.split('-')[-1], roc_auc * 100)))
54 |     tpr_fpr_row = []
55 |     tpr_fpr_row.append(method)
56 |     for fpr_iter in np.arange(len(x_labels)):
57 |         _, min_index = min(
58 |             list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
59 |         tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
60 |     tpr_fpr_table.add_row(tpr_fpr_row)
61 | plt.xlim([10 ** -6, 0.1])
62 | plt.ylim([0.3, 1.0])
63 | plt.grid(linestyle='--', linewidth=1)
64 | plt.xticks(x_labels)
65 | plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
66 | plt.xscale('log')
67 | plt.xlabel('False Positive Rate')
68 | plt.ylabel('True Positive Rate')
69 | plt.title('ROC on IJB')
70 | plt.legend(loc="lower right")
71 | print(tpr_fpr_table)
72 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/scripts/shuffle_rec.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import multiprocessing
 3 | import os
 4 | import time
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | 
 9 | 
10 | def read_worker(args, q_in):
11 |     path_imgidx = os.path.join(args.input, "train.idx")
12 |     path_imgrec = os.path.join(args.input, "train.rec")
13 |     imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
14 | 
15 |     s = imgrec.read_idx(0)
16 |     header, _ = mx.recordio.unpack(s)
17 |     assert header.flag > 0
18 | 
19 |     imgidx = np.array(range(1, int(header.label[0])))
20 |     np.random.shuffle(imgidx)
21 |     
22 |     for idx in imgidx:
23 |         item = imgrec.read_idx(idx)
24 |         q_in.put(item)
25 | 
26 |     q_in.put(None)
27 |     imgrec.close()
28 | 
29 | 
30 | def write_worker(args, q_out):
31 |     pre_time = time.time()
32 |     
33 |     if args.input[-1] == '/':
34 |         args.input = args.input[:-1]
35 |     dirname = os.path.dirname(args.input)
36 |     basename = os.path.basename(args.input)
37 |     output = os.path.join(dirname, f"shuffled_{basename}")
38 |     os.makedirs(output, exist_ok=True)
39 |     
40 |     path_imgidx = os.path.join(output, "train.idx")
41 |     path_imgrec = os.path.join(output, "train.rec")
42 |     save_record = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "w")
43 |     more = True
44 |     count = 0
45 |     while more:
46 |         deq = q_out.get()
47 |         if deq is None:
48 |             more = False
49 |         else:
50 |             header, jpeg = mx.recordio.unpack(deq)
51 |             # TODO it is currently not fully developed
52 |             if isinstance(header.label, float):
53 |                 label = header.label
54 |             else:
55 |                 label = header.label[0]
56 | 
57 |             header = mx.recordio.IRHeader(flag=header.flag, label=label, id=header.id, id2=header.id2)
58 |             save_record.write_idx(count, mx.recordio.pack(header, jpeg))
59 |             count += 1
60 |             if count % 10000 == 0:
61 |                 cur_time = time.time()
62 |                 print('save time:', cur_time - pre_time, ' count:', count)
63 |                 pre_time = cur_time
64 |     print(count)
65 |     save_record.close()
66 | 
67 | 
68 | def main(args):
69 |     queue = multiprocessing.Queue(10240)
70 |     read_process = multiprocessing.Process(target=read_worker, args=(args, queue))
71 |     read_process.daemon = True
72 |     read_process.start()
73 |     write_process = multiprocessing.Process(target=write_worker, args=(args, queue))
74 |     write_process.start()
75 |     write_process.join()
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     parser = argparse.ArgumentParser()
80 |     parser.add_argument('input', help='path to source rec.')
81 |     main(parser.parse_args())
82 | 


--------------------------------------------------------------------------------
/AToM/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def parse_train_opt():
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument("--project", default="runs/train", help="project/name")
 7 |     parser.add_argument("--exp_name", default="exp", help="save to project/name")
 8 |     parser.add_argument(
 9 |         "--data_path", type=str, default="", help="raw data path"
10 |     )
11 |     parser.add_argument(
12 |         "--processed_data_dir",
13 |         type=str,
14 |         default="",
15 |         help="Dataset backup path",
16 |     )
17 |     parser.add_argument("--render_dir", type=str, default="renders/", help="Sample render path")
18 |     parser.add_argument("--feature_type", type=str, default="jukebox")
19 |     parser.add_argument("--wandb_pj_name", type=str, default="AToM", help="project name")
20 |     parser.add_argument("--batch_size", type=int, default=64, help="batch size")
21 |     parser.add_argument("--epochs", type=int, default=2000)
22 |     parser.add_argument("--force_reload", action="store_true", help="force reloads the datasets")
23 |     parser.add_argument(
24 |         "--no_cache", action="store_true", help="don't reuse / cache loaded dataset"
25 |     )
26 |     parser.add_argument(
27 |         "--save_interval",
28 |         type=int,
29 |         default=100,
30 |         help='Log model after every "save_period" epoch',
31 |     )
32 |     parser.add_argument("--ema_interval", type=int, default=1, help="ema every x steps")
33 |     parser.add_argument(
34 |         "--checkpoint", type=str, default="", help="trained checkpoint path (optional)"
35 |     )
36 |     opt = parser.parse_args()
37 |     return opt
38 | 
39 | 
40 | def parse_test_opt():
41 |     parser = argparse.ArgumentParser()
42 |     parser.add_argument("--feature_type", type=str, default="jukebox")
43 |     parser.add_argument(
44 |         "--out_length", type=float, default=30, help="max. length of output, in seconds"
45 |     )
46 |     parser.add_argument(
47 |         "--processed_data_dir",
48 |         type=str,
49 |         default="",
50 |         help="Dataset backup path",
51 |     )
52 |     parser.add_argument("--render_dir", type=str, default="renders/", help="Sample render path")
53 |     parser.add_argument("--checkpoint", type=str, default="checkpoint.pt", help="checkpoint")
54 |     parser.add_argument(
55 |         "--music_dir",
56 |         type=str,
57 |         default="",
58 |         help="folder containing input music",
59 |     )
60 |     parser.add_argument(
61 |         "--cache_features",
62 |         action="store_true",
63 |         help="Save the jukebox features for later reuse",
64 |     )
65 |     parser.add_argument(
66 |         "--no_render",
67 |         action="store_true",
68 |         help="Don't render the video",
69 |     )
70 |     parser.add_argument(
71 |         "--use_cached_features",
72 |         action="store_true",
73 |         help="Use precomputed features instead of music folder",
74 |     )
75 |     parser.add_argument(
76 |         "--feature_cache_dir",
77 |         type=str,
78 |         default="cached_features/",
79 |         help="Where to save/load the features",
80 |     )
81 |     opt = parser.parse_args()
82 |     return opt
83 | 


--------------------------------------------------------------------------------
/MToV/models/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError('Decay must be between 0 and 1')
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer('num_updates', torch.tensor(0,dtype=torch.int) if use_num_upates
14 |                              else torch.tensor(-1,dtype=torch.int))
15 | 
16 |         for name, p in model.named_parameters():
17 |             if p.requires_grad:
18 |                 #remove as '.'-character is not allowed in buffers
19 |                 s_name = name.replace('.','')
20 |                 self.m_name2s_name.update({name:s_name})
21 |                 self.register_buffer(s_name,p.clone().detach().data)
22 | 
23 |         self.collected_params = []
24 | 
25 |     def forward(self,model):
26 |         decay = self.decay
27 | 
28 |         if self.num_updates >= 0:
29 |             self.num_updates += 1
30 |             decay = min(self.decay,(1 + self.num_updates) / (10 + self.num_updates))
31 | 
32 |         one_minus_decay = 1.0 - decay
33 | 
34 |         with torch.no_grad():
35 |             m_param = dict(model.named_parameters())
36 |             shadow_params = dict(self.named_buffers())
37 | 
38 |             for key in m_param:
39 |                 if m_param[key].requires_grad:
40 |                     sname = self.m_name2s_name[key]
41 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
42 |                     shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
43 |                 else:
44 |                     assert not key in self.m_name2s_name
45 | 
46 |     def copy_to(self, model):
47 |         m_param = dict(model.named_parameters())
48 |         shadow_params = dict(self.named_buffers())
49 |         for key in m_param:
50 |             if m_param[key].requires_grad:
51 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
52 |             else:
53 |                 assert not key in self.m_name2s_name
54 | 
55 |     def store(self, parameters):
56 |         """
57 |         Save the current parameters for restoring later.
58 |         Args:
59 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
60 |             temporarily stored.
61 |         """
62 |         self.collected_params = [param.clone() for param in parameters]
63 | 
64 |     def restore(self, parameters):
65 |         """
66 |         Restore the parameters stored with the `store` method.
67 |         Useful to validate the model with EMA parameters without affecting the
68 |         original optimization process. Store the parameters before the
69 |         `copy_to` method. After validation (or model saving), use this to
70 |         restore the former parameters.
71 |         Args:
72 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
73 |             updated with the stored parameters.
74 |         """
75 |         for c_param, param in zip(self.collected_params, parameters):
76 |             param.data.copy_(c_param.data)


--------------------------------------------------------------------------------
/MToV/losses/diffaugment.py:
--------------------------------------------------------------------------------
 1 | # Differentiable Augmentation for Data-Efficient GAN Training
 2 | # Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han
 3 | # https://arxiv.org/pdf/2006.10738
 4 | 
 5 | import torch
 6 | import torch.nn.functional as F
 7 | 
 8 | 
 9 | def DiffAugment(x, policy='color,translation,cutout', channels_first=True):
10 |     if policy:
11 |         if not channels_first:
12 |             x = x.permute(0, 3, 1, 2)
13 |         for p in policy.split(','):
14 |             for f in AUGMENT_FNS[p]:
15 |                 x = f(x)
16 |         if not channels_first:
17 |             x = x.permute(0, 2, 3, 1)
18 |         x = x.contiguous()
19 |     return x
20 | 
21 | 
22 | def rand_brightness(x):
23 |     x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5)
24 |     return x
25 | 
26 | 
27 | def rand_saturation(x):
28 |     x_mean = x.mean(dim=1, keepdim=True)
29 |     x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean
30 |     return x
31 | 
32 | 
33 | def rand_contrast(x):
34 |     x_mean = x.mean(dim=[1, 2, 3], keepdim=True)
35 |     x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean
36 |     return x
37 | 
38 | 
39 | def rand_translation(x, ratio=0.125):
40 |     shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)
41 |     translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device)
42 |     translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device)
43 |     grid_batch, grid_x, grid_y = torch.meshgrid(
44 |         torch.arange(x.size(0), dtype=torch.long, device=x.device),
45 |         torch.arange(x.size(2), dtype=torch.long, device=x.device),
46 |         torch.arange(x.size(3), dtype=torch.long, device=x.device),
47 |     )
48 |     grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1)
49 |     grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1)
50 |     x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0])
51 |     x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2).contiguous()
52 |     return x
53 | 
54 | 
55 | def rand_cutout(x, ratio=0.5):
56 |     cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)
57 |     offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device)
58 |     offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device)
59 |     grid_batch, grid_x, grid_y = torch.meshgrid(
60 |         torch.arange(x.size(0), dtype=torch.long, device=x.device),
61 |         torch.arange(cutout_size[0], dtype=torch.long, device=x.device),
62 |         torch.arange(cutout_size[1], dtype=torch.long, device=x.device),
63 |     )
64 |     grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1)
65 |     grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1)
66 |     mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device)
67 |     mask[grid_batch, grid_x, grid_y] = 0
68 |     x = x * mask.unsqueeze(1)
69 |     return x
70 | 
71 | 
72 | AUGMENT_FNS = {
73 |     'color': [rand_brightness, rand_saturation, rand_contrast],
74 |     'translation': [rand_translation],
75 |     'cutout': [rand_cutout],
76 | }


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/__init__.py:
--------------------------------------------------------------------------------
 1 | """This package contains modules related to objective functions, optimizations, and network architectures.
 2 | 
 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
 4 | You need to implement the following five functions:
 5 |     -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
 6 |     -- <set_input>:                     unpack data from dataset and apply preprocessing.
 7 |     -- <forward>:                       produce intermediate results.
 8 |     -- <optimize_parameters>:           calculate loss, gradients, and update network weights.
 9 |     -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
10 | 
11 | In the function <__init__>, you need to define four lists:
12 |     -- self.loss_names (str list):          specify the training losses that you want to plot and save.
13 |     -- self.model_names (str list):         define networks used in our training.
14 |     -- self.visual_names (str list):        specify the images that you want to display and save.
15 |     -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
16 | 
17 | Now you can use the model class by specifying flag '--model dummy'.
18 | See our template model class 'template_model.py' for more details.
19 | """
20 | 
21 | import importlib
22 | from .base_model import BaseModel
23 | 
24 | 
25 | def find_model_using_name(model_name):
26 |     """Import the module "models/[model_name]_model.py".
27 | 
28 |     In the file, the class called DatasetNameModel() will
29 |     be instantiated. It has to be a subclass of BaseModel,
30 |     and it is case-insensitive.
31 |     """
32 |     model_filename = "deep_3drecon_models." + model_name + "_model"
33 |     modellib = importlib.import_module(model_filename)
34 |     model = None
35 |     target_model_name = model_name.replace('_', '') + 'model'
36 |     for name, cls in modellib.__dict__.items():
37 |         if name.lower() == target_model_name.lower() \
38 |            and issubclass(cls, BaseModel):
39 |             model = cls
40 | 
41 |     if model is None:
42 |         print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
43 |         exit(0)
44 | 
45 |     return model
46 | 
47 | 
48 | def get_option_setter(model_name):
49 |     """Return the static method <modify_commandline_options> of the model class."""
50 |     model_class = find_model_using_name(model_name)
51 |     return model_class.modify_commandline_options
52 | 
53 | 
54 | def create_model(opt):
55 |     """Create a model given the option.
56 | 
57 |     This function warps the class CustomDatasetDataLoader.
58 |     This is the main interface between this package and 'train.py'/'test.py'
59 | 
60 |     Example:
61 |         >>> from models import create_model
62 |         >>> model = create_model(opt)
63 |     """
64 |     model = find_model_using_name(opt.model)
65 |     instance = model(opt)
66 |     print("model [%s] was created" % type(instance).__name__)
67 |     return instance
68 | 


--------------------------------------------------------------------------------
/AToM/model/utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from einops import rearrange, reduce, repeat
  6 | from einops.layers.torch import Rearrange
  7 | from torch import nn
  8 | 
  9 | 
 10 | # absolute positional embedding used for vanilla transformer sequential data
 11 | class PositionalEncoding(nn.Module):
 12 |     def __init__(self, d_model, dropout=0.1, max_len=500, batch_first=False):
 13 |         super().__init__()
 14 |         self.batch_first = batch_first
 15 | 
 16 |         self.dropout = nn.Dropout(p=dropout)
 17 | 
 18 |         pe = torch.zeros(max_len, d_model)
 19 |         position = torch.arange(0, max_len).unsqueeze(1)
 20 |         div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
 21 |         pe[:, 0::2] = torch.sin(position * div_term)
 22 |         pe[:, 1::2] = torch.cos(position * div_term)
 23 |         pe = pe.unsqueeze(0).transpose(0, 1)
 24 | 
 25 |         self.register_buffer("pe", pe)
 26 | 
 27 |     def forward(self, x):
 28 |         if self.batch_first:
 29 |             x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :]
 30 |         else:
 31 |             x = x + self.pe[: x.shape[0], :]
 32 |         return self.dropout(x)
 33 | 
 34 | 
 35 | # very similar positional embedding used for diffusion timesteps
 36 | class SinusoidalPosEmb(nn.Module):
 37 |     def __init__(self, dim):
 38 |         super().__init__()
 39 |         self.dim = dim
 40 | 
 41 |     def forward(self, x):
 42 |         device = x.device
 43 |         half_dim = self.dim // 2
 44 |         emb = math.log(10000) / (half_dim - 1)
 45 |         emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
 46 |         emb = x[:, None] * emb[None, :]
 47 |         emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
 48 |         return emb
 49 | 
 50 | 
 51 | # dropout mask
 52 | def prob_mask_like(shape, prob, device):
 53 |     if prob == 1:
 54 |         return torch.ones(shape, device=device, dtype=torch.bool)
 55 |     elif prob == 0:
 56 |         return torch.zeros(shape, device=device, dtype=torch.bool)
 57 |     else:
 58 |         return torch.zeros(shape, device=device).float().uniform_(0, 1) < prob
 59 | 
 60 | 
 61 | def extract(a, t, x_shape):
 62 |     b, *_ = t.shape
 63 |     out = a.gather(-1, t)
 64 |     return out.reshape(b, *((1,) * (len(x_shape) - 1)))
 65 | 
 66 | 
 67 | def make_beta_schedule(
 68 |     schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3
 69 | ):
 70 |     if schedule == "linear":
 71 |         betas = (
 72 |             torch.linspace(
 73 |                 linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64
 74 |             )
 75 |             ** 2
 76 |         )
 77 | 
 78 |     elif schedule == "cosine":
 79 |         timesteps = (
 80 |             torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
 81 |         )
 82 |         alphas = timesteps / (1 + cosine_s) * np.pi / 2
 83 |         alphas = torch.cos(alphas).pow(2)
 84 |         alphas = alphas / alphas[0]
 85 |         betas = 1 - alphas[1:] / alphas[:-1]
 86 |         betas = np.clip(betas, a_min=0, a_max=0.999)
 87 | 
 88 |     elif schedule == "sqrt_linear":
 89 |         betas = torch.linspace(
 90 |             linear_start, linear_end, n_timestep, dtype=torch.float64
 91 |         )
 92 |     elif schedule == "sqrt":
 93 |         betas = (
 94 |             torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
 95 |             ** 0.5
 96 |         )
 97 |     else:
 98 |         raise ValueError(f"schedule '{schedule}' unknown.")
 99 |     return betas.numpy()
100 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/data/template_dataset.py:
--------------------------------------------------------------------------------
 1 | """Dataset class template
 2 | 
 3 | This module provides a template for users to implement custom datasets.
 4 | You can specify '--dataset_mode template' to use this dataset.
 5 | The class name should be consistent with both the filename and its dataset_mode option.
 6 | The filename should be <dataset_mode>_dataset.py
 7 | The class name should be <Dataset_mode>Dataset.py
 8 | You need to implement the following functions:
 9 |     -- <modify_commandline_options>:　Add dataset-specific options and rewrite default values for existing options.
10 |     -- <__init__>: Initialize this dataset class.
11 |     -- <__getitem__>: Return a data point and its metadata information.
12 |     -- <__len__>: Return the number of images.
13 | """
14 | from data.base_dataset import BaseDataset, get_transform
15 | # from data.image_folder import make_dataset
16 | # from PIL import Image
17 | 
18 | 
19 | class TemplateDataset(BaseDataset):
20 |     """A template dataset class for you to implement custom datasets."""
21 |     @staticmethod
22 |     def modify_commandline_options(parser, is_train):
23 |         """Add new dataset-specific options, and rewrite default values for existing options.
24 | 
25 |         Parameters:
26 |             parser          -- original option parser
27 |             is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
28 | 
29 |         Returns:
30 |             the modified parser.
31 |         """
32 |         parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option')
33 |         parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0)  # specify dataset-specific default values
34 |         return parser
35 | 
36 |     def __init__(self, opt):
37 |         """Initialize this dataset class.
38 | 
39 |         Parameters:
40 |             opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
41 | 
42 |         A few things can be done here.
43 |         - save the options (have been done in BaseDataset)
44 |         - get image paths and meta information of the dataset.
45 |         - define the image transformation.
46 |         """
47 |         # save the option and dataset root
48 |         BaseDataset.__init__(self, opt)
49 |         # get the image paths of your dataset;
50 |         self.image_paths = []  # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root
51 |         # define the default transform function. You can use <base_dataset.get_transform>; You can also define your custom transform function
52 |         self.transform = get_transform(opt)
53 | 
54 |     def __getitem__(self, index):
55 |         """Return a data point and its metadata information.
56 | 
57 |         Parameters:
58 |             index -- a random integer for data indexing
59 | 
60 |         Returns:
61 |             a dictionary of data with their names. It usually contains the data itself and its metadata information.
62 | 
63 |         Step 1: get a random image path: e.g., path = self.image_paths[index]
64 |         Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB').
65 |         Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image)
66 |         Step 4: return a data point as a dictionary.
67 |         """
68 |         path = 'temp'    # needs to be a string
69 |         data_A = None    # needs to be a tensor
70 |         data_B = None    # needs to be a tensor
71 |         return {'data_A': data_A, 'data_B': data_B, 'path': path}
72 | 
73 |     def __len__(self):
74 |         """Return the total number of images."""
75 |         return len(self.image_paths)
76 | 


--------------------------------------------------------------------------------
/AToM/data_util/tensor_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.distributed as dist
  3 | import numpy as np
  4 | 
  5 | 
  6 | def reduce_tensors(metrics):
  7 |     new_metrics = {}
  8 |     for k, v in metrics.items():
  9 |         if isinstance(v, torch.Tensor):
 10 |             dist.all_reduce(v)
 11 |             v = v / dist.get_world_size()
 12 |         if type(v) is dict:
 13 |             v = reduce_tensors(v)
 14 |         new_metrics[k] = v
 15 |     return new_metrics
 16 | 
 17 | 
 18 | def tensors_to_scalars(tensors):
 19 |     if isinstance(tensors, torch.Tensor):
 20 |         tensors = tensors.item()
 21 |         return tensors
 22 |     elif isinstance(tensors, dict):
 23 |         new_tensors = {}
 24 |         for k, v in tensors.items():
 25 |             v = tensors_to_scalars(v)
 26 |             new_tensors[k] = v
 27 |         return new_tensors
 28 |     elif isinstance(tensors, list):
 29 |         return [tensors_to_scalars(v) for v in tensors]
 30 |     else:
 31 |         return tensors
 32 | 
 33 | 
 34 | def convert_to_np(tensors):
 35 |     if isinstance(tensors, np.ndarray):
 36 |         return tensors
 37 |     elif isinstance(tensors, dict):
 38 |         new_np = {}
 39 |         for k, v in tensors.items():
 40 |             if isinstance(v, torch.Tensor):
 41 |                 v = v.cpu().numpy()
 42 |             if type(v) is dict:
 43 |                 v = convert_to_np(v)
 44 |             new_np[k] = v
 45 |     elif isinstance(tensors, list):
 46 |         new_np = []
 47 |         for v in tensors:
 48 |             if isinstance(v, torch.Tensor):
 49 |                 v = v.cpu().numpy()
 50 |             if type(v) is dict:
 51 |                 v = convert_to_np(v)
 52 |             new_np.append(v)
 53 |     elif isinstance(tensors, torch.Tensor):
 54 |         v = tensors
 55 |         if isinstance(v, torch.Tensor):
 56 |             v = v.cpu().numpy()
 57 |         if type(v) is dict:
 58 |             v = convert_to_np(v)
 59 |         new_np = v
 60 |     else:
 61 |         raise Exception(f'tensors_to_np does not support type {type(tensors)}.')
 62 |     return new_np
 63 | 
 64 | 
 65 | def convert_to_tensor(arrays):
 66 |     if isinstance(arrays, np.ndarray):
 67 |         v = torch.from_numpy(arrays).float()
 68 |         ret = v
 69 |     elif isinstance(arrays, torch.Tensor):
 70 |         ret = arrays
 71 |     elif type(arrays) is dict:
 72 |         ret = {}
 73 |         for k, v in arrays.items():
 74 |             if isinstance(v, np.ndarray):
 75 |                 v = torch.from_numpy(v).float()
 76 |             if type(v) is dict:
 77 |                 v = convert_to_tensor(v)
 78 |             ret[k] = v
 79 |     return ret
 80 | 
 81 | def move_to_cpu(tensors):
 82 |     ret = {}
 83 |     for k, v in tensors.items():
 84 |         if isinstance(v, torch.Tensor):
 85 |             v = v.cpu()
 86 |         if type(v) is dict:
 87 |             v = move_to_cpu(v)
 88 |         ret[k] = v
 89 |     return ret
 90 | 
 91 | 
 92 | def move_to_cuda(batch, gpu_id=0):
 93 |     # base case: object can be directly moved using `cuda` or `to`
 94 |     if callable(getattr(batch, 'cuda', None)):
 95 |         return batch.cuda(gpu_id, non_blocking=True)
 96 |     elif callable(getattr(batch, 'to', None)):
 97 |         return batch.to(torch.device('cuda', gpu_id), non_blocking=True)
 98 |     elif isinstance(batch, list):
 99 |         for i, x in enumerate(batch):
100 |             batch[i] = move_to_cuda(x, gpu_id)
101 |         return batch
102 |     elif isinstance(batch, tuple):
103 |         batch = list(batch)
104 |         for i, x in enumerate(batch):
105 |             batch[i] = move_to_cuda(x, gpu_id)
106 |         return tuple(batch)
107 |     elif isinstance(batch, dict):
108 |         for k, v in batch.items():
109 |             batch[k] = move_to_cuda(v, gpu_id)
110 |         return batch
111 |     return batch
112 | 


--------------------------------------------------------------------------------
/data/data_utils/commons/tensor_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.distributed as dist
  3 | import numpy as np
  4 | 
  5 | 
  6 | def reduce_tensors(metrics):
  7 |     new_metrics = {}
  8 |     for k, v in metrics.items():
  9 |         if isinstance(v, torch.Tensor):
 10 |             dist.all_reduce(v)
 11 |             v = v / dist.get_world_size()
 12 |         if type(v) is dict:
 13 |             v = reduce_tensors(v)
 14 |         new_metrics[k] = v
 15 |     return new_metrics
 16 | 
 17 | 
 18 | def tensors_to_scalars(tensors):
 19 |     if isinstance(tensors, torch.Tensor):
 20 |         tensors = tensors.item()
 21 |         return tensors
 22 |     elif isinstance(tensors, dict):
 23 |         new_tensors = {}
 24 |         for k, v in tensors.items():
 25 |             v = tensors_to_scalars(v)
 26 |             new_tensors[k] = v
 27 |         return new_tensors
 28 |     elif isinstance(tensors, list):
 29 |         return [tensors_to_scalars(v) for v in tensors]
 30 |     else:
 31 |         return tensors
 32 | 
 33 | 
 34 | def convert_to_np(tensors):
 35 |     if isinstance(tensors, np.ndarray):
 36 |         return tensors
 37 |     elif isinstance(tensors, dict):
 38 |         new_np = {}
 39 |         for k, v in tensors.items():
 40 |             if isinstance(v, torch.Tensor):
 41 |                 v = v.cpu().numpy()
 42 |             if type(v) is dict:
 43 |                 v = convert_to_np(v)
 44 |             new_np[k] = v
 45 |     elif isinstance(tensors, list):
 46 |         new_np = []
 47 |         for v in tensors:
 48 |             if isinstance(v, torch.Tensor):
 49 |                 v = v.cpu().numpy()
 50 |             if type(v) is dict:
 51 |                 v = convert_to_np(v)
 52 |             new_np.append(v)
 53 |     elif isinstance(tensors, torch.Tensor):
 54 |         v = tensors
 55 |         if isinstance(v, torch.Tensor):
 56 |             v = v.cpu().numpy()
 57 |         if type(v) is dict:
 58 |             v = convert_to_np(v)
 59 |         new_np = v
 60 |     else:
 61 |         raise Exception(f'tensors_to_np does not support type {type(tensors)}.')
 62 |     return new_np
 63 | 
 64 | 
 65 | def convert_to_tensor(arrays):
 66 |     if isinstance(arrays, np.ndarray):
 67 |         v = torch.from_numpy(arrays).float()
 68 |         ret = v
 69 |     elif isinstance(arrays, torch.Tensor):
 70 |         ret = arrays
 71 |     elif type(arrays) is dict:
 72 |         ret = {}
 73 |         for k, v in arrays.items():
 74 |             if isinstance(v, np.ndarray):
 75 |                 v = torch.from_numpy(v).float()
 76 |             if type(v) is dict:
 77 |                 v = convert_to_tensor(v)
 78 |             ret[k] = v
 79 |     return ret
 80 | 
 81 | def move_to_cpu(tensors):
 82 |     ret = {}
 83 |     for k, v in tensors.items():
 84 |         if isinstance(v, torch.Tensor):
 85 |             v = v.cpu()
 86 |         if type(v) is dict:
 87 |             v = move_to_cpu(v)
 88 |         ret[k] = v
 89 |     return ret
 90 | 
 91 | 
 92 | def move_to_cuda(batch, gpu_id=0):
 93 |     # base case: object can be directly moved using `cuda` or `to`
 94 |     if callable(getattr(batch, 'cuda', None)):
 95 |         return batch.cuda(gpu_id, non_blocking=True)
 96 |     elif callable(getattr(batch, 'to', None)):
 97 |         return batch.to(torch.device('cuda', gpu_id), non_blocking=True)
 98 |     elif isinstance(batch, list):
 99 |         for i, x in enumerate(batch):
100 |             batch[i] = move_to_cuda(x, gpu_id)
101 |         return batch
102 |     elif isinstance(batch, tuple):
103 |         batch = list(batch)
104 |         for i, x in enumerate(batch):
105 |             batch[i] = move_to_cuda(x, gpu_id)
106 |         return tuple(batch)
107 |     elif isinstance(batch, dict):
108 |         for k, v in batch.items():
109 |             batch[k] = move_to_cuda(v, gpu_id)
110 |         return batch
111 |     return batch
112 | 


--------------------------------------------------------------------------------
/AToM/model/adan.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from torch.optim import Optimizer
  5 | 
  6 | 
  7 | def exists(val):
  8 |     return val is not None
  9 | 
 10 | 
 11 | class Adan(Optimizer):
 12 |     def __init__(
 13 |         self,
 14 |         params,
 15 |         lr=1e-3,
 16 |         betas=(0.02, 0.08, 0.01),
 17 |         eps=1e-8,
 18 |         weight_decay=0,
 19 |         restart_cond: callable = None,
 20 |     ):
 21 |         assert len(betas) == 3
 22 | 
 23 |         defaults = dict(
 24 |             lr=lr,
 25 |             betas=betas,
 26 |             eps=eps,
 27 |             weight_decay=weight_decay,
 28 |             restart_cond=restart_cond,
 29 |         )
 30 | 
 31 |         super().__init__(params, defaults)
 32 | 
 33 |     def step(self, closure=None):
 34 |         loss = None
 35 | 
 36 |         if exists(closure):
 37 |             loss = closure()
 38 | 
 39 |         for group in self.param_groups:
 40 | 
 41 |             lr = group["lr"]
 42 |             beta1, beta2, beta3 = group["betas"]
 43 |             weight_decay = group["weight_decay"]
 44 |             eps = group["eps"]
 45 |             restart_cond = group["restart_cond"]
 46 | 
 47 |             for p in group["params"]:
 48 |                 if not exists(p.grad):
 49 |                     continue
 50 | 
 51 |                 data, grad = p.data, p.grad.data
 52 |                 assert not grad.is_sparse
 53 | 
 54 |                 state = self.state[p]
 55 | 
 56 |                 if len(state) == 0:
 57 |                     state["step"] = 0
 58 |                     state["prev_grad"] = torch.zeros_like(grad)
 59 |                     state["m"] = torch.zeros_like(grad)
 60 |                     state["v"] = torch.zeros_like(grad)
 61 |                     state["n"] = torch.zeros_like(grad)
 62 | 
 63 |                 step, m, v, n, prev_grad = (
 64 |                     state["step"],
 65 |                     state["m"],
 66 |                     state["v"],
 67 |                     state["n"],
 68 |                     state["prev_grad"],
 69 |                 )
 70 | 
 71 |                 if step > 0:
 72 |                     prev_grad = state["prev_grad"]
 73 | 
 74 |                     # main algorithm
 75 | 
 76 |                     m.mul_(1 - beta1).add_(grad, alpha=beta1)
 77 | 
 78 |                     grad_diff = grad - prev_grad
 79 | 
 80 |                     v.mul_(1 - beta2).add_(grad_diff, alpha=beta2)
 81 | 
 82 |                     next_n = (grad + (1 - beta2) * grad_diff) ** 2
 83 | 
 84 |                     n.mul_(1 - beta3).add_(next_n, alpha=beta3)
 85 | 
 86 |                 # bias correction terms
 87 | 
 88 |                 step += 1
 89 | 
 90 |                 correct_m, correct_v, correct_n = map(
 91 |                     lambda n: 1 / (1 - (1 - n) ** step), (beta1, beta2, beta3)
 92 |                 )
 93 | 
 94 |                 # gradient step
 95 | 
 96 |                 def grad_step_(data, m, v, n):
 97 |                     weighted_step_size = lr / (n * correct_n).sqrt().add_(eps)
 98 | 
 99 |                     denom = 1 + weight_decay * lr
100 | 
101 |                     data.addcmul_(
102 |                         weighted_step_size,
103 |                         (m * correct_m + (1 - beta2) * v * correct_v),
104 |                         value=-1.0,
105 |                     ).div_(denom)
106 | 
107 |                 grad_step_(data, m, v, n)
108 | 
109 |                 # restart condition
110 | 
111 |                 if exists(restart_cond) and restart_cond(state):
112 |                     m.data.copy_(grad)
113 |                     v.zero_()
114 |                     n.data.copy_(grad ** 2)
115 | 
116 |                     grad_step_(data, m, v, n)
117 | 
118 |                 # set new incremented step
119 | 
120 |                 prev_grad.copy_(grad)
121 |                 state["step"] = step
122 | 
123 |         return loss
124 | 


--------------------------------------------------------------------------------
/data/data_utils/preprocess/unify_fps.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import glob
  4 | import time
  5 | import torch
  6 | import random
  7 | import shutil
  8 | from tqdm import tqdm
  9 | import matplotlib
 10 | import numpy as np
 11 | from datetime import datetime
 12 | from matplotlib import pyplot as plt
 13 | from torch.utils.data import Dataset, DataLoader
 14 | import argparse, os, cv2, traceback, subprocess
 15 | import pdb
 16 | import sys
 17 | import subprocess
 18 | 
 19 | def change_video_fps(input_path, output_path, fps):
 20 |     command = f'ffmpeg -i {input_path} -r {fps} {output_path}'
 21 |     subprocess.call(command, shell=True)
 22 |     
 23 | class Dataset_(Dataset):
 24 |     def __init__(self,args):
 25 |         self.args=args
 26 |         self.device = torch.device('cuda')
 27 |         self.total_idx = 0
 28 |         self.start_time = time.time()
 29 | 
 30 |         self.path_to_mp4   = args.load_video_path
 31 |         self.path_to_frame = args.save_video_path
 32 |         
 33 |         os.makedirs(self.path_to_frame, exist_ok=True)
 34 |         
 35 |         self.video_path_list= []
 36 |         self.frame_dir_path_list= []
 37 | 
 38 |         self.total_num_list = []
 39 |         self.initList()
 40 |     
 41 |     
 42 |     def initList(self):
 43 |         length = 0
 44 |         videos = glob.glob(os.path.join(self.path_to_mp4, '*', "*audio.mp4")) 
 45 |         videos.sort()
 46 |     
 47 |             
 48 |         for video in videos:
 49 |             self.frame_dir_path_list.append(video.replace('videos/','videos_25/'))
 50 | 
 51 |         self.video_path_list = videos
 52 |         self.frame_dir_path_list = self.frame_dir_path_list
 53 |         self.length = len(self.video_path_list)
 54 | 
 55 |     def change_video_fps(self, fps, input_path, output_path):
 56 |         template = 'ffmpeg -y -i {} -c:v libx264 -r {} {}'
 57 |         command = template.format(input_path, fps, output_path)
 58 |         subprocess.call(command, shell=True)
 59 | 
 60 |     def generate_and_save_frame(self, idx):
 61 |         input_path = self.video_path_list[idx]
 62 |         output_path = self.frame_dir_path_list[idx]
 63 |         os.makedirs(os.path.dirname(os.path.dirname(output_path)), exist_ok=True)
 64 |         os.makedirs(os.path.dirname(output_path), exist_ok=True)
 65 |         
 66 |         self.change_video_fps(args.fps, input_path, output_path)
 67 |         
 68 |         return 0 
 69 | 
 70 |     def __len__(self):
 71 |         return self.length
 72 | 
 73 |     def __getitem__(self, idx):
 74 |         start_time = time.time()
 75 |         self.generate_and_save_frame(idx)
 76 |     
 77 |         return [0]
 78 | 
 79 | if __name__ == "__main__":
 80 |     parser = argparse.ArgumentParser()
 81 |     
 82 |     parser.add_argument('--load_video_path', type=str, default='/media/data1/HDTF/videos', # Source Video Roots
 83 |                             help='path of the directory for loading videos')
 84 |     parser.add_argument('--save_video_path', type=str, default='/media/data1/HDTF/videos_25', # Saving Roots
 85 |                             help='path of the directory for saving frames of videos')
 86 | 
 87 |     parser.add_argument('--fps', type=int, default=25,
 88 |                             help='fps')
 89 |     parser.add_argument('--batch_size', type=int, default=1,
 90 |                             help='audio sampling rate')
 91 |     parser.add_argument('--num_workers', type=int, default=6,
 92 |                             help='audio sampling rate')
 93 |     args = parser.parse_args()
 94 |     count = 0
 95 |     
 96 |     dataset = Dataset_(args)
 97 |     data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False)
 98 |     start_time = time.time()
 99 |     for i, video_path in enumerate(tqdm(data_loader)):
100 |         video_path = video_path
101 |         dataset.generate_and_save_frame(video_path)
102 |     print('done')


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/losses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | 
  4 | 
  5 | class CombinedMarginLoss(torch.nn.Module):
  6 |     def __init__(self, 
  7 |                  s, 
  8 |                  m1,
  9 |                  m2,
 10 |                  m3,
 11 |                  interclass_filtering_threshold=0):
 12 |         super().__init__()
 13 |         self.s = s
 14 |         self.m1 = m1
 15 |         self.m2 = m2
 16 |         self.m3 = m3
 17 |         self.interclass_filtering_threshold = interclass_filtering_threshold
 18 |         
 19 |         # For ArcFace
 20 |         self.cos_m = math.cos(self.m2)
 21 |         self.sin_m = math.sin(self.m2)
 22 |         self.theta = math.cos(math.pi - self.m2)
 23 |         self.sinmm = math.sin(math.pi - self.m2) * self.m2
 24 |         self.easy_margin = False
 25 | 
 26 | 
 27 |     def forward(self, logits, labels):
 28 |         index_positive = torch.where(labels != -1)[0]
 29 | 
 30 |         if self.interclass_filtering_threshold > 0:
 31 |             with torch.no_grad():
 32 |                 dirty = logits > self.interclass_filtering_threshold
 33 |                 dirty = dirty.float()
 34 |                 mask = torch.ones([index_positive.size(0), logits.size(1)], device=logits.device)
 35 |                 mask.scatter_(1, labels[index_positive], 0)
 36 |                 dirty[index_positive] *= mask
 37 |                 tensor_mul = 1 - dirty    
 38 |             logits = tensor_mul * logits
 39 | 
 40 |         target_logit = logits[index_positive, labels[index_positive].view(-1)]
 41 | 
 42 |         if self.m1 == 1.0 and self.m3 == 0.0:
 43 |             with torch.no_grad():
 44 |                 target_logit.arccos_()
 45 |                 logits.arccos_()
 46 |                 final_target_logit = target_logit + self.m2
 47 |                 logits[index_positive, labels[index_positive].view(-1)] = final_target_logit
 48 |                 logits.cos_()
 49 |             logits = logits * self.s        
 50 | 
 51 |         elif self.m3 > 0:
 52 |             final_target_logit = target_logit - self.m3
 53 |             logits[index_positive, labels[index_positive].view(-1)] = final_target_logit
 54 |             logits = logits * self.s
 55 |         else:
 56 |             raise
 57 | 
 58 |         return logits
 59 | 
 60 | class ArcFace(torch.nn.Module):
 61 |     """ ArcFace (https://arxiv.org/pdf/1801.07698v1.pdf):
 62 |     """
 63 |     def __init__(self, s=64.0, margin=0.5):
 64 |         super(ArcFace, self).__init__()
 65 |         self.scale = s
 66 |         self.margin = margin
 67 |         self.cos_m = math.cos(margin)
 68 |         self.sin_m = math.sin(margin)
 69 |         self.theta = math.cos(math.pi - margin)
 70 |         self.sinmm = math.sin(math.pi - margin) * margin
 71 |         self.easy_margin = False
 72 | 
 73 | 
 74 |     def forward(self, logits: torch.Tensor, labels: torch.Tensor):
 75 |         index = torch.where(labels != -1)[0]
 76 |         target_logit = logits[index, labels[index].view(-1)]
 77 | 
 78 |         with torch.no_grad():
 79 |             target_logit.arccos_()
 80 |             logits.arccos_()
 81 |             final_target_logit = target_logit + self.margin
 82 |             logits[index, labels[index].view(-1)] = final_target_logit
 83 |             logits.cos_()
 84 |         logits = logits * self.s        
 85 |         return logits
 86 | 
 87 | 
 88 | class CosFace(torch.nn.Module):
 89 |     def __init__(self, s=64.0, m=0.40):
 90 |         super(CosFace, self).__init__()
 91 |         self.s = s
 92 |         self.m = m
 93 | 
 94 |     def forward(self, logits: torch.Tensor, labels: torch.Tensor):
 95 |         index = torch.where(labels != -1)[0]
 96 |         target_logit = logits[index, labels[index].view(-1)]
 97 |         final_target_logit = target_logit - self.m
 98 |         logits[index, labels[index].view(-1)] = final_target_logit
 99 |         logits = logits * self.s
100 |         return logits
101 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
  1 | # Dataset download & Preprocessing
  2 | ## LRS3
  3 | Follow [here](https://github.com/yerfor/GeneFace/blob/main/docs/process_data/process_lrs3.md) to preprocess LRS3. 
  4 | 
  5 | ## HDTF
  6 | 
  7 | ### Download 
  8 | Follow [here](https://github.com/universome/HDTF) to download HDTF and crop videos into 256x256 resolution.
  9 | 
 10 | ## Structure
 11 |  After following the steps, the structure should be like this:
 12 | ```
 13 | data
 14 | |-- train
 15 |     |-- lrs3
 16 |         |-- sizes_train.npy
 17 |         |-- sizes_val.npy
 18 |         |-- spk_id2spk_idx.npy
 19 |         |-- train.data
 20 |         |-- val.data
 21 | 
 22 |     
 23 |     |-- HDTF
 24 |         |-- frames
 25 |             |-- id1
 26 |                 |-- 00000.jpg
 27 |                 |-- 00001.jpg
 28 |                 |-- ...
 29 |             |-- id2
 30 |                 |-- 00000.jpg
 31 |                 |-- 00001.jpg
 32 |                 |-- ...
 33 |             |-- ...
 34 |         |-- keypoints
 35 |             |-- face-centric
 36 |                 |-- posed
 37 |                     |-- id1
 38 |                         |-- 00000.npy
 39 |                         |-- 00001.npy
 40 |                         |-- ...
 41 |                     |-- id2
 42 |                         |-- 00000.npy
 43 |                         |-- 00001.npy
 44 |                         |-- ...
 45 |                     |-- ...
 46 |                 |-- unposed
 47 |                     |-- id1
 48 |                         |-- 00000.npy
 49 |                         |-- 00001.npy
 50 |                         |-- ...
 51 |                     |-- id2
 52 |                         |-- 00000.npy
 53 |                         |-- 00001.npy
 54 |                         |-- ...
 55 |                     |-- ...
 56 |             |-- non-face-centric
 57 |                 |-- posed
 58 |                     |-- id1
 59 |                         |-- 00000.npy
 60 |                         |-- 00001.npy
 61 |                         |-- ...
 62 |                     |-- id2
 63 |                         |-- 00000.npy
 64 |                         |-- 00001.npy
 65 |                         |-- ...
 66 |                     |-- ...
 67 | 
 68 | ```
 69 | 
 70 | ### Video 2 Frames
 71 | Before you convert videos into frames, check all videos are at 25fps. 
 72 | If not, they must be adjusted to: `data/data_utils/preprocess/unify_fps.py`
 73 | Once preprocesing is complete and the videos are unitied at 25fps, you can convert them into frames using the following code.: `data/data_utils/preprocess/video2frame_hdtf.py`
 74 | 
 75 | ### Motion Extraction from frames, used in training MToV
 76 | 
 77 | 
 78 | ### Environment
 79 | <!-- ```bash
 80 | conda create -n geneface python=3.8 -y
 81 | conda activate geneface
 82 | python -m pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
 83 | python -m pip install natsort tqdm gdown omegaconf einops lpips pyspng tensorboard imageio av moviepy numba p_tqdm soundfile face_alignemnt
 84 | ``` -->
 85 | ```bash
 86 | conda create -n preprocess python=3.9.16 -y
 87 | conda activate preprocess
 88 | conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch 
 89 | conda install -c fvcore -c iopath -c conda-forge fvcore iopath -y
 90 | conda install -c bottler nvidiacub -y
 91 | conda install pytorch3d==0.7.4 -c pytorch3d -y
 92 | conda install ffmpeg 
 93 | python -m pip install face_alignment einops trimesh natsort
 94 | ```
 95 | 
 96 | ```bash 
 97 | conda activate preprocess
 98 | cd data/data_utils
 99 | python preprocess/process_video_3dmm_rollback_hdtf_batchify.py
100 | ```
101 | After processing the code above, you will obtain the keypoints in several types in `HDTF/keypoints`. `face-centric` and `non-face-centric` indicate whether the keypoints are aligned in the center or not. `unposed` and `posed` specify whether the pose of the landmarks is frontalized or not. 
102 | 
103 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
 2 | from .mobilefacenet import get_mbf
 3 | 
 4 | 
 5 | def get_model(name, **kwargs):
 6 |     # resnet
 7 |     if name == "r18":
 8 |         return iresnet18(False, **kwargs)
 9 |     elif name == "r34":
10 |         return iresnet34(False, **kwargs)
11 |     elif name == "r50":
12 |         return iresnet50(False, **kwargs)
13 |     elif name == "r100":
14 |         return iresnet100(False, **kwargs)
15 |     elif name == "r200":
16 |         return iresnet200(False, **kwargs)
17 |     elif name == "r2060":
18 |         from .iresnet2060 import iresnet2060
19 |         return iresnet2060(False, **kwargs)
20 | 
21 |     elif name == "mbf":
22 |         fp16 = kwargs.get("fp16", False)
23 |         num_features = kwargs.get("num_features", 512)
24 |         return get_mbf(fp16=fp16, num_features=num_features)
25 | 
26 |     elif name == "mbf_large":
27 |         from .mobilefacenet import get_mbf_large
28 |         fp16 = kwargs.get("fp16", False)
29 |         num_features = kwargs.get("num_features", 512)
30 |         return get_mbf_large(fp16=fp16, num_features=num_features)
31 | 
32 |     elif name == "vit_t":
33 |         num_features = kwargs.get("num_features", 512)
34 |         from .vit import VisionTransformer
35 |         return VisionTransformer(
36 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
37 |             num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
38 | 
39 |     elif name == "vit_t_dp005_mask0": # For WebFace42M
40 |         num_features = kwargs.get("num_features", 512)
41 |         from .vit import VisionTransformer
42 |         return VisionTransformer(
43 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=256, depth=12,
44 |             num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
45 | 
46 |     elif name == "vit_s":
47 |         num_features = kwargs.get("num_features", 512)
48 |         from .vit import VisionTransformer
49 |         return VisionTransformer(
50 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
51 |             num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1)
52 |     
53 |     elif name == "vit_s_dp005_mask_0":  # For WebFace42M
54 |         num_features = kwargs.get("num_features", 512)
55 |         from .vit import VisionTransformer
56 |         return VisionTransformer(
57 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=12,
58 |             num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.0)
59 |     
60 |     elif name == "vit_b":
61 |         # this is a feature
62 |         num_features = kwargs.get("num_features", 512)
63 |         from .vit import VisionTransformer
64 |         return VisionTransformer(
65 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
66 |             num_heads=8, drop_path_rate=0.1, norm_layer="ln", mask_ratio=0.1, using_checkpoint=True)
67 | 
68 |     elif name == "vit_b_dp005_mask_005":  # For WebFace42M
69 |         # this is a feature
70 |         num_features = kwargs.get("num_features", 512)
71 |         from .vit import VisionTransformer
72 |         return VisionTransformer(
73 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=512, depth=24,
74 |             num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
75 | 
76 |     elif name == "vit_l_dp005_mask_005":  # For WebFace42M
77 |         # this is a feature
78 |         num_features = kwargs.get("num_features", 512)
79 |         from .vit import VisionTransformer
80 |         return VisionTransformer(
81 |             img_size=112, patch_size=9, num_classes=num_features, embed_dim=768, depth=24,
82 |             num_heads=8, drop_path_rate=0.05, norm_layer="ln", mask_ratio=0.05, using_checkpoint=True)
83 | 
84 |     else:
85 |         raise ValueError()
86 | 


--------------------------------------------------------------------------------
/MToV/tools/scheduler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LambdaWarmUpCosineScheduler:
 5 |     """
 6 |     note: use with a base_lr of 1.0
 7 |     """
 8 |     def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0):
 9 |         self.lr_warm_up_steps = warm_up_steps
10 |         self.lr_start = lr_start
11 |         self.lr_min = lr_min
12 |         self.lr_max = lr_max
13 |         self.lr_max_decay_steps = max_decay_steps
14 |         self.last_lr = 0.
15 |         self.verbosity_interval = verbosity_interval
16 | 
17 |     def schedule(self, n, **kwargs):
18 |         if self.verbosity_interval > 0:
19 |             if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_lr}")
20 |         if n < self.lr_warm_up_steps:
21 |             lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start
22 |             self.last_lr = lr
23 |             return lr
24 |         else:
25 |             t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps)
26 |             t = min(t, 1.0)
27 |             lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (
28 |                     1 + np.cos(t * np.pi))
29 |             self.last_lr = lr
30 |             return lr
31 | 
32 |     def __call__(self, n, **kwargs):
33 |         return self.schedule(n,**kwargs)
34 | 
35 | 
36 | class LambdaWarmUpCosineScheduler2:
37 |     """
38 |     supports repeated iterations, configurable via lists
39 |     note: use with a base_lr of 1.0.
40 |     """
41 |     def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0):
42 |         assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths)
43 |         self.lr_warm_up_steps = warm_up_steps
44 |         self.f_start = f_start
45 |         self.f_min = f_min
46 |         self.f_max = f_max
47 |         self.cycle_lengths = cycle_lengths
48 |         self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths))
49 |         self.last_f = 0.
50 |         self.verbosity_interval = verbosity_interval
51 | 
52 |     def find_in_interval(self, n):
53 |         interval = 0
54 |         for cl in self.cum_cycles[1:]:
55 |             if n <= cl:
56 |                 return interval
57 |             interval += 1
58 | 
59 |     def schedule(self, n, **kwargs):
60 |         cycle = self.find_in_interval(n)
61 |         n = n - self.cum_cycles[cycle]
62 |         if self.verbosity_interval > 0:
63 |             if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
64 |                                                        f"current cycle {cycle}")
65 |         if n < self.lr_warm_up_steps[cycle]:
66 |             f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
67 |             self.last_f = f
68 |             return f
69 |         else:
70 |             t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle])
71 |             t = min(t, 1.0)
72 |             f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (
73 |                     1 + np.cos(t * np.pi))
74 |             self.last_f = f
75 |             return f
76 | 
77 |     def __call__(self, n, **kwargs):
78 |         return self.schedule(n, **kwargs)
79 | 
80 | 
81 | class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
82 | 
83 |     def schedule(self, n, **kwargs):
84 |         cycle = self.find_in_interval(n)
85 |         n = n - self.cum_cycles[cycle]
86 |         if self.verbosity_interval > 0:
87 |             if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
88 |                                                        f"current cycle {cycle}")
89 | 
90 |         if n < self.lr_warm_up_steps[cycle]:
91 |             f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
92 |             self.last_f = f
93 |             return f
94 |         else:
95 |             f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / (self.cycle_lengths[cycle])
96 |             self.last_f = f
97 |             return f


--------------------------------------------------------------------------------
/MToV/evals/fvd/convert_tf_pretrained.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from collections import OrderedDict
  3 | import tensorflow_hub as hub
  4 | import torch
  5 | 
  6 | from src_pytorch.fvd.pytorch_i3d import InceptionI3d
  7 | 
  8 | 
  9 | def convert_name(name):
 10 |     mapping = {
 11 |         'conv_3d': 'conv3d',
 12 |         'batch_norm': 'bn',
 13 |         'w:0': 'weight',
 14 |         'b:0': 'bias',
 15 |         'moving_mean:0': 'running_mean',
 16 |         'moving_variance:0': 'running_var',
 17 |         'beta:0': 'bias'
 18 |     }
 19 | 
 20 |     segs = name.split('/')
 21 |     new_segs = []
 22 |     i = 0
 23 |     while i < len(segs):
 24 |         seg = segs[i]
 25 |         if 'Mixed' in seg:
 26 |             new_segs.append(seg)
 27 |         elif 'Conv' in seg and 'Mixed' not in name:
 28 |             new_segs.append(seg)
 29 |         elif 'Branch' in seg:
 30 |             branch_i = int(seg.split('_')[-1])
 31 |             i += 1
 32 |             seg = segs[i]
 33 | 
 34 |             # special case due to typo in original code
 35 |             if 'Mixed_5b' in name and branch_i == 2:
 36 |                 if '1x1' in seg:
 37 |                     new_segs.append(f'b{branch_i}a')
 38 |                 elif '3x3' in seg:
 39 |                     new_segs.append(f'b{branch_i}b')
 40 |                 else:
 41 |                     raise Exception()
 42 |             # Either Conv3d_{i}a_... or Conv3d_{i}b_...
 43 |             elif 'a' in seg:
 44 |                 if branch_i == 0:
 45 |                     new_segs.append('b0')
 46 |                 else:
 47 |                     new_segs.append(f'b{branch_i}a')
 48 |             elif 'b' in seg:
 49 |                 new_segs.append(f'b{branch_i}b')
 50 |             else:
 51 |                 raise Exception
 52 |         elif seg == 'Logits':
 53 |             new_segs.append('logits')
 54 |             i += 1
 55 |         elif seg in mapping:
 56 |             new_segs.append(mapping[seg])
 57 |         else:
 58 |             raise Exception(f"No match found for seg {seg} in name {name}")
 59 | 
 60 |         i += 1
 61 |     return '.'.join(new_segs)
 62 | 
 63 | def convert_tensor(tensor):
 64 |     tensor_dim = len(tensor.shape)
 65 |     if tensor_dim == 5: # conv or bn
 66 |         if all([t == 1 for t in tensor.shape[:-1]]):
 67 |             tensor = tensor.squeeze()
 68 |         else:
 69 |             tensor = tensor.permute(4, 3, 0, 1, 2).contiguous()
 70 |     elif tensor_dim == 1: # conv bias
 71 |         pass
 72 |     else:
 73 |         raise Exception(f"Invalid shape {tensor.shape}")
 74 |     return tensor
 75 | 
 76 | n_class = int(sys.argv[1]) # 600 or 400
 77 | assert n_class in [400, 600]
 78 | 
 79 | # Converts model from https://github.com/google-research/google-research/tree/master/frechet_video_distance
 80 | # to pytorch version for loading
 81 | model_url = f"https://tfhub.dev/deepmind/i3d-kinetics-{n_class}/1"
 82 | i3d = hub.load(model_url)
 83 | name_prefix = 'RGB/inception_i3d/'
 84 | 
 85 | print('Creating state_dict...')
 86 | all_names = []
 87 | state_dict = OrderedDict()
 88 | for var in i3d.variables:
 89 |     name = var.name[len(name_prefix):]
 90 |     new_name = convert_name(name)
 91 |     all_names.append(new_name)
 92 | 
 93 |     tensor = torch.FloatTensor(var.value().numpy())
 94 |     new_tensor = convert_tensor(tensor)
 95 | 
 96 |     state_dict[new_name] = new_tensor
 97 | 
 98 |     if 'bn.bias' in new_name:
 99 |         new_name = new_name[:-4] + 'weight' # bn.weight
100 |         new_tensor = torch.ones_like(new_tensor).float()
101 |         state_dict[new_name] = new_tensor
102 | 
103 | print(f'Complete state_dict with {len(state_dict)} entries')
104 | 
105 | s = dict()
106 | for i, n in enumerate(all_names):
107 |     s[n] = s.get(n, []) + [i]
108 | 
109 | for k, v in s.items():
110 |     if len(v) > 1:
111 |         print('dup', k)
112 |         for i in v:
113 |             print('\t', i3d.variables[i].name)
114 | 
115 | print('Testing load_state_dict...')
116 | print('Creating model...')
117 | 
118 | i3d = InceptionI3d(n_class, in_channels=3)
119 | 
120 | print('Loading state_dict...')
121 | i3d.load_state_dict(state_dict)
122 | 
123 | print(f'Saving state_dict as fvd/i3d_pretrained_{n_class}.pt')
124 | torch.save(state_dict, f'fvd/i3d_pretrained_{n_class}.pt')
125 | 
126 | print('Done')
127 | 
128 | 


--------------------------------------------------------------------------------
/data/data_utils/preprocess/video2frame_hdtf.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import random
  4 | from tqdm import tqdm
  5 | import ffmpeg
  6 | import pickle
  7 | from multiprocessing import Pool
  8 | import argparse, os, cv2, traceback, subprocess
  9 | 
 10 | import matplotlib
 11 | import numpy as np
 12 | from datetime import datetime
 13 | from matplotlib import pyplot as plt
 14 | import sys
 15 | from PIL import Image
 16 | from glob import glob
 17 | import PIL
 18 | import PIL.Image
 19 | import scipy
 20 | import scipy.ndimage
 21 | import skimage.io as io
 22 | from scipy.ndimage import gaussian_filter1d
 23 | from matplotlib import pyplot as plt
 24 | import torch.nn.functional as F
 25 | # import torchvision.utils as vutils
 26 | import pdb
 27 | 
 28 | import ffmpeg
 29 | 
 30 | 
 31 | def get_video_info(input_file_path):
 32 | 
 33 |     probe = ffmpeg.probe(input_file_path)
 34 |     video_stream = next(
 35 |         (stream for stream in probe["streams"] if stream["codec_type"] == "video"), None
 36 |     )
 37 |     frame_rate = float(video_stream["r_frame_rate"].split("/")[0])
 38 |     frame_num = int(video_stream["nb_frames"])
 39 |     return int(video_stream["height"]), int(video_stream["width"]), frame_rate, frame_num
 40 | 
 41 | 
 42 | def multi_preprocess_video(x):
 43 |     (iden, output_folder, (height, width, frame_rate, total_frame_num)) = x
 44 |         
 45 |     input_file_path = os.path.join(vid_dir, iden, "video.mp4")
 46 |     os.makedirs(output_folder, exist_ok=True)
 47 |     
 48 |     video = cv2.VideoCapture(input_file_path)
 49 |     print(iden, frame_rate)
 50 |     
 51 |     count = 0
 52 |     while True:
 53 |         success, frame = video.read()
 54 |         if not success:
 55 |             break
 56 |         file_name = f"{count:0>5}"
 57 |         frame_path = os.path.join(output_folder, f"{file_name}.jpg")
 58 |         cv2.imwrite(frame_path, frame)
 59 |         count += 1
 60 |         
 61 |         
 62 |     return None
 63 | 
 64 | 
 65 | def preprocess_video_folder(
 66 |     reprocessings, multi_processing, option=None, workers=32
 67 | ):
 68 |     print("Preprocess start !!!")
 69 |     if option is None:
 70 |         option = {} 
 71 | 
 72 |     reprocessings.sort()
 73 |     multi_output_frame_path_list = [
 74 |         os.path.join(saving_dir, iden) for iden in reprocessings
 75 |     ]
 76 |     multi_vid_info_list = [
 77 |         get_video_info(os.path.join(vid_dir, f"{iden}/video.mp4")) for iden in reprocessings
 78 |     ]
 79 |  
 80 |     def initializer():
 81 |         sys.stdout = open(os.devnull, "w")
 82 | 
 83 |     if multi_processing:
 84 |         """
 85 |         for real running
 86 |         """
 87 |         pool = Pool(workers)
 88 |         total = len(reprocessings)
 89 |         
 90 |         with tqdm(total=total) as pbar:
 91 |             pool.imap(multi_preprocess_video, zip(reprocessings, multi_output_frame_path_list, multi_vid_info_list))
 92 |             pbar.update()
 93 |         
 94 |        
 95 |         pool.close()
 96 |         pool.join()
 97 |     return
 98 | 
 99 | 
100 | def read_file(filepath: os.PathLike):
101 |     """
102 |     Reads a file as a space-separated dataframe, where the first column is the index
103 |     """
104 |     with open(filepath, "r") as f:
105 |         lines = f.read().splitlines()
106 |         lines = [l.split(":")[0] for l in lines]
107 | 
108 |     return lines
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     parser = argparse.ArgumentParser(description="Preprocessor")
113 |     parser.add_argument("--multi_processing", type=bool, default=True)
114 |     parser.add_argument(
115 |         "--gpu", help="Number of GPUs across which to run in parallel", default=0, type=int
116 |     )
117 |  
118 |     vid_dir = "/media/data1/HDTF_preprocessed/25_fps/"
119 |     saving_dir = "/media/data/HDTF_preprocessed/25_frame/HDTF"
120 |     eval_list = os.listdir(vid_dir)
121 |     
122 |     process_id = []
123 |     for id_ in tqdm(eval_list):
124 |         try:
125 |             vid = f"{vid_dir}/{id_}/video.mp4"
126 |             height, width, frame_rate, frame_num = get_video_info(vid)
127 |             if frame_num != len(glob(os.path.join(saving_dir, id_, '*.jpg'))):
128 |                 process_id.append(id_)
129 |         except:
130 |             print(id_)
131 |     
132 |     print(len(process_id))        
133 |     args = parser.parse_args()
134 |     preprocess_video_folder(
135 |         process_id,
136 |         args.multi_processing,
137 |     )
138 | 


--------------------------------------------------------------------------------
/AToM/model/rotary_embedding_torch.py:
--------------------------------------------------------------------------------
  1 | from inspect import isfunction
  2 | from math import log, pi
  3 | 
  4 | import torch
  5 | from einops import rearrange, repeat
  6 | from torch import einsum, nn
  7 | 
  8 | # helper functions
  9 | 
 10 | 
 11 | def exists(val):
 12 |     return val is not None
 13 | 
 14 | 
 15 | def broadcat(tensors, dim=-1):
 16 |     num_tensors = len(tensors)
 17 |     shape_lens = set(list(map(lambda t: len(t.shape), tensors)))
 18 |     assert len(shape_lens) == 1, "tensors must all have the same number of dimensions"
 19 |     shape_len = list(shape_lens)[0]
 20 | 
 21 |     dim = (dim + shape_len) if dim < 0 else dim
 22 |     dims = list(zip(*map(lambda t: list(t.shape), tensors)))
 23 | 
 24 |     expandable_dims = [(i, val) for i, val in enumerate(dims) if i != dim]
 25 |     assert all(
 26 |         [*map(lambda t: len(set(t[1])) <= 2, expandable_dims)]
 27 |     ), "invalid dimensions for broadcastable concatentation"
 28 |     max_dims = list(map(lambda t: (t[0], max(t[1])), expandable_dims))
 29 |     expanded_dims = list(map(lambda t: (t[0], (t[1],) * num_tensors), max_dims))
 30 |     expanded_dims.insert(dim, (dim, dims[dim]))
 31 |     expandable_shapes = list(zip(*map(lambda t: t[1], expanded_dims)))
 32 |     tensors = list(map(lambda t: t[0].expand(*t[1]), zip(tensors, expandable_shapes)))
 33 |     return torch.cat(tensors, dim=dim)
 34 | 
 35 | 
 36 | # rotary embedding helper functions
 37 | 
 38 | 
 39 | def rotate_half(x):
 40 |     x = rearrange(x, "... (d r) -> ... d r", r=2)
 41 |     x1, x2 = x.unbind(dim=-1)
 42 |     x = torch.stack((-x2, x1), dim=-1)
 43 |     return rearrange(x, "... d r -> ... (d r)")
 44 | 
 45 | 
 46 | def apply_rotary_emb(freqs, t, start_index=0):
 47 |     freqs = freqs.to(t)
 48 |     rot_dim = freqs.shape[-1]
 49 |     end_index = start_index + rot_dim
 50 |     assert (
 51 |         rot_dim <= t.shape[-1]
 52 |     ), f"feature dimension {t.shape[-1]} is not of sufficient size to rotate in all the positions {rot_dim}"
 53 |     t_left, t, t_right = (
 54 |         t[..., :start_index],
 55 |         t[..., start_index:end_index],
 56 |         t[..., end_index:],
 57 |     )
 58 |     t = (t * freqs.cos()) + (rotate_half(t) * freqs.sin())
 59 |     return torch.cat((t_left, t, t_right), dim=-1)
 60 | 
 61 | 
 62 | # learned rotation helpers
 63 | 
 64 | 
 65 | def apply_learned_rotations(rotations, t, start_index=0, freq_ranges=None):
 66 |     if exists(freq_ranges):
 67 |         rotations = einsum("..., f -> ... f", rotations, freq_ranges)
 68 |         rotations = rearrange(rotations, "... r f -> ... (r f)")
 69 | 
 70 |     rotations = repeat(rotations, "... n -> ... (n r)", r=2)
 71 |     return apply_rotary_emb(rotations, t, start_index=start_index)
 72 | 
 73 | 
 74 | # classes
 75 | 
 76 | 
 77 | class RotaryEmbedding(nn.Module):
 78 |     def __init__(
 79 |         self,
 80 |         dim,
 81 |         custom_freqs=None,
 82 |         freqs_for="lang",
 83 |         theta=10000,
 84 |         max_freq=10,
 85 |         num_freqs=1,
 86 |         learned_freq=False,
 87 |     ):
 88 |         super().__init__()
 89 |         if exists(custom_freqs):
 90 |             freqs = custom_freqs
 91 |         elif freqs_for == "lang":
 92 |             freqs = 1.0 / (
 93 |                 theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)
 94 |             )
 95 |         elif freqs_for == "pixel":
 96 |             freqs = torch.linspace(1.0, max_freq / 2, dim // 2) * pi
 97 |         elif freqs_for == "constant":
 98 |             freqs = torch.ones(num_freqs).float()
 99 |         else:
100 |             raise ValueError(f"unknown modality {freqs_for}")
101 | 
102 |         self.cache = dict()
103 | 
104 |         if learned_freq:
105 |             self.freqs = nn.Parameter(freqs)
106 |         else:
107 |             self.register_buffer("freqs", freqs)
108 | 
109 |     def rotate_queries_or_keys(self, t, seq_dim=-2):
110 |         device = t.device
111 |         seq_len = t.shape[seq_dim]
112 |         freqs = self.forward(
113 |             lambda: torch.arange(seq_len, device=device), cache_key=seq_len
114 |         )
115 |         return apply_rotary_emb(freqs, t)
116 | 
117 |     def forward(self, t, cache_key=None):
118 |         if exists(cache_key) and cache_key in self.cache:
119 |             return self.cache[cache_key]
120 | 
121 |         if isfunction(t):
122 |             t = t()
123 | 
124 |         freqs = self.freqs
125 | 
126 |         freqs = torch.einsum("..., f -> ... f", t.type(freqs.dtype), freqs)
127 |         freqs = repeat(freqs, "... n -> ... (n r)", r=2)
128 | 
129 |         if exists(cache_key):
130 |             self.cache[cache_key] = freqs
131 | 
132 |         return freqs
133 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from kornia.geometry import warp_affine
  5 | import torch.nn.functional as F
  6 | 
  7 | def resize_n_crop(image, M, dsize=112):
  8 |     # image: (b, c, h, w)
  9 |     # M   :  (b, 2, 3)
 10 |     return warp_affine(image, M, dsize=(dsize, dsize))
 11 | 
 12 | ### perceptual level loss
 13 | class PerceptualLoss(nn.Module):
 14 |     def __init__(self, recog_net, input_size=112):
 15 |         super(PerceptualLoss, self).__init__()
 16 |         self.recog_net = recog_net
 17 |         self.preprocess = lambda x: 2 * x - 1
 18 |         self.input_size=input_size
 19 |     def forward(imageA, imageB, M):
 20 |         """
 21 |         1 - cosine distance
 22 |         Parameters:
 23 |             imageA       --torch.tensor (B, 3, H, W), range (0, 1) , RGB order
 24 |             imageB       --same as imageA
 25 |         """
 26 | 
 27 |         imageA = self.preprocess(resize_n_crop(imageA, M, self.input_size))
 28 |         imageB = self.preprocess(resize_n_crop(imageB, M, self.input_size))
 29 | 
 30 |         # freeze bn
 31 |         self.recog_net.eval()
 32 |         
 33 |         id_featureA = F.normalize(self.recog_net(imageA), dim=-1, p=2)
 34 |         id_featureB = F.normalize(self.recog_net(imageB), dim=-1, p=2)  
 35 |         cosine_d = torch.sum(id_featureA * id_featureB, dim=-1)
 36 |         # assert torch.sum((cosine_d > 1).float()) == 0
 37 |         return torch.sum(1 - cosine_d) / cosine_d.shape[0]        
 38 | 
 39 | def perceptual_loss(id_featureA, id_featureB):
 40 |     cosine_d = torch.sum(id_featureA * id_featureB, dim=-1)
 41 |         # assert torch.sum((cosine_d > 1).float()) == 0
 42 |     return torch.sum(1 - cosine_d) / cosine_d.shape[0]  
 43 | 
 44 | ### image level loss
 45 | def photo_loss(imageA, imageB, mask, eps=1e-6):
 46 |     """
 47 |     l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur)
 48 |     Parameters:
 49 |         imageA       --torch.tensor (B, 3, H, W), range (0, 1), RGB order 
 50 |         imageB       --same as imageA
 51 |     """
 52 |     loss = torch.sqrt(eps + torch.sum((imageA - imageB) ** 2, dim=1, keepdims=True)) * mask
 53 |     loss = torch.sum(loss) / torch.max(torch.sum(mask), torch.tensor(1.0).to(mask.device))
 54 |     return loss
 55 | 
 56 | def landmark_loss(predict_lm, gt_lm, weight=None):
 57 |     """
 58 |     weighted mse loss
 59 |     Parameters:
 60 |         predict_lm    --torch.tensor (B, 68, 2)
 61 |         gt_lm         --torch.tensor (B, 68, 2)
 62 |         weight        --numpy.array (1, 68)
 63 |     """
 64 |     if not weight:
 65 |         weight = np.ones([68])
 66 |         weight[28:31] = 20
 67 |         weight[-8:] = 20
 68 |         weight = np.expand_dims(weight, 0)
 69 |         weight = torch.tensor(weight).to(predict_lm.device)
 70 |     loss = torch.sum((predict_lm - gt_lm)**2, dim=-1) * weight
 71 |     loss = torch.sum(loss) / (predict_lm.shape[0] * predict_lm.shape[1])
 72 |     return loss
 73 | 
 74 | 
 75 | ### regulization
 76 | def reg_loss(coeffs_dict, opt=None):
 77 |     """
 78 |     l2 norm without the sqrt, from yu's implementation (mse)
 79 |     tf.nn.l2_loss https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss
 80 |     Parameters:
 81 |         coeffs_dict     -- a  dict of torch.tensors , keys: id, exp, tex, angle, gamma, trans
 82 | 
 83 |     """
 84 |     # coefficient regularization to ensure plausible 3d faces
 85 |     if opt:
 86 |         w_id, w_exp, w_tex = opt.w_id, opt.w_exp, opt.w_tex
 87 |     else:
 88 |         w_id, w_exp, w_tex = 1, 1, 1, 1
 89 |     creg_loss = w_id * torch.sum(coeffs_dict['id'] ** 2) +  \
 90 |            w_exp * torch.sum(coeffs_dict['exp'] ** 2) + \
 91 |            w_tex * torch.sum(coeffs_dict['tex'] ** 2)
 92 |     creg_loss = creg_loss / coeffs_dict['id'].shape[0]
 93 | 
 94 |     # gamma regularization to ensure a nearly-monochromatic light
 95 |     gamma = coeffs_dict['gamma'].reshape([-1, 3, 9])
 96 |     gamma_mean = torch.mean(gamma, dim=1, keepdims=True)
 97 |     gamma_loss = torch.mean((gamma - gamma_mean) ** 2)
 98 | 
 99 |     return creg_loss, gamma_loss
100 | 
101 | def reflectance_loss(texture, mask):
102 |     """
103 |     minimize texture variance (mse), albedo regularization to ensure an uniform skin albedo
104 |     Parameters:
105 |         texture       --torch.tensor, (B, N, 3)
106 |         mask          --torch.tensor, (N), 1 or 0
107 | 
108 |     """
109 |     mask = mask.reshape([1, mask.shape[0], 1])
110 |     texture_mean = torch.sum(mask * texture, dim=1, keepdims=True) / torch.sum(mask)
111 |     loss = torch.sum(((texture - texture_mean) * mask)**2) / (texture.shape[0] * torch.sum(mask))
112 |     return loss
113 | 
114 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/data/flist_dataset.py:
--------------------------------------------------------------------------------
  1 | """This script defines the custom dataset for Deep3DFaceRecon_pytorch
  2 | """
  3 | 
  4 | import os.path
  5 | from data.base_dataset import BaseDataset, get_transform, get_affine_mat, apply_img_affine, apply_lm_affine
  6 | from data.image_folder import make_dataset
  7 | from PIL import Image
  8 | import random
  9 | import util.util as util
 10 | import numpy as np
 11 | import json
 12 | import torch
 13 | from scipy.io import loadmat, savemat
 14 | import pickle
 15 | from util.preprocess import align_img, estimate_norm
 16 | from util.load_mats import load_lm3d
 17 | 
 18 | 
 19 | def default_flist_reader(flist):
 20 |     """
 21 |     flist format: impath label\nimpath label\n ...(same to caffe's filelist)
 22 |     """
 23 |     imlist = []
 24 |     with open(flist, 'r') as rf:
 25 |         for line in rf.readlines():
 26 |             impath = line.strip()
 27 |             imlist.append(impath)
 28 | 
 29 |     return imlist
 30 | 
 31 | def jason_flist_reader(flist):
 32 |     with open(flist, 'r') as fp:
 33 |         info = json.load(fp)
 34 |     return info
 35 | 
 36 | def parse_label(label):
 37 |     return torch.tensor(np.array(label).astype(np.float32))
 38 | 
 39 | 
 40 | class FlistDataset(BaseDataset):
 41 |     """
 42 |     It requires one directories to host training images '/path/to/data/train'
 43 |     You can train the model with the dataset flag '--dataroot /path/to/data'.
 44 |     """
 45 | 
 46 |     def __init__(self, opt):
 47 |         """Initialize this dataset class.
 48 | 
 49 |         Parameters:
 50 |             opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
 51 |         """
 52 |         BaseDataset.__init__(self, opt)
 53 |         
 54 |         self.lm3d_std = load_lm3d(opt.bfm_folder)
 55 |         
 56 |         msk_names = default_flist_reader(opt.flist)
 57 |         self.msk_paths = [os.path.join(opt.data_root, i) for i in msk_names]
 58 | 
 59 |         self.size = len(self.msk_paths) 
 60 |         self.opt = opt
 61 |         
 62 |         self.name = 'train' if opt.isTrain else 'val'
 63 |         if '_' in opt.flist:
 64 |             self.name += '_' + opt.flist.split(os.sep)[-1].split('_')[0]
 65 |         
 66 | 
 67 |     def __getitem__(self, index):
 68 |         """Return a data point and its metadata information.
 69 | 
 70 |         Parameters:
 71 |             index (int)      -- a random integer for data indexing
 72 | 
 73 |         Returns a dictionary that contains A, B, A_paths and B_paths
 74 |             img (tensor)       -- an image in the input domain
 75 |             msk (tensor)       -- its corresponding attention mask
 76 |             lm  (tensor)       -- its corresponding 3d landmarks
 77 |             im_paths (str)     -- image paths
 78 |             aug_flag (bool)    -- a flag used to tell whether its raw or augmented
 79 |         """
 80 |         msk_path = self.msk_paths[index % self.size]  # make sure index is within then range
 81 |         img_path = msk_path.replace('mask/', '')
 82 |         lm_path = '.'.join(msk_path.replace('mask', 'landmarks').split('.')[:-1]) + '.txt'
 83 | 
 84 |         raw_img = Image.open(img_path).convert('RGB')
 85 |         raw_msk = Image.open(msk_path).convert('RGB')
 86 |         raw_lm = np.loadtxt(lm_path).astype(np.float32)
 87 | 
 88 |         _, img, lm, msk = align_img(raw_img, raw_lm, self.lm3d_std, raw_msk)
 89 |         
 90 |         aug_flag = self.opt.use_aug and self.opt.isTrain
 91 |         if aug_flag:
 92 |             img, lm, msk = self._augmentation(img, lm, self.opt, msk)
 93 |         
 94 |         _, H = img.size
 95 |         M = estimate_norm(lm, H)
 96 |         transform = get_transform()
 97 |         img_tensor = transform(img)
 98 |         msk_tensor = transform(msk)[:1, ...]
 99 |         lm_tensor = parse_label(lm)
100 |         M_tensor = parse_label(M)
101 | 
102 | 
103 |         return {'imgs': img_tensor, 
104 |                 'lms': lm_tensor, 
105 |                 'msks': msk_tensor, 
106 |                 'M': M_tensor,
107 |                 'im_paths': img_path, 
108 |                 'aug_flag': aug_flag,
109 |                 'dataset': self.name}
110 | 
111 |     def _augmentation(self, img, lm, opt, msk=None):
112 |         affine, affine_inv, flip = get_affine_mat(opt, img.size)
113 |         img = apply_img_affine(img, affine_inv)
114 |         lm = apply_lm_affine(lm, affine, flip, img.size)
115 |         if msk is not None:
116 |             msk = apply_img_affine(msk, affine_inv, method=Image.BILINEAR)
117 |         return img, lm, msk
118 |     
119 | 
120 | 
121 | 
122 |     def __len__(self):
123 |         """Return the total number of images in the dataset.
124 |         """
125 |         return self.size
126 | 


--------------------------------------------------------------------------------
/MToV/evals/fvd/fvd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import torch.nn.functional as F
  4 | 
  5 | def preprocess_single(video, resolution, sequence_length=None):
  6 |     # video: THWC, {0, ..., 255}
  7 |     video = video.permute(0, 3, 1, 2).float() / 255. # TCHW
  8 |     t, c, h, w = video.shape
  9 | 
 10 |     # temporal crop
 11 |     if sequence_length is not None:
 12 |         assert sequence_length <= t
 13 |         video = video[:sequence_length]
 14 | 
 15 |     # scale shorter side to resolution
 16 |     scale = resolution / min(h, w)
 17 |     if h < w:
 18 |         target_size = (resolution, math.ceil(w * scale))
 19 |     else:
 20 |         target_size = (math.ceil(h * scale), resolution)
 21 |     video = F.interpolate(video, size=target_size, mode='bilinear',
 22 |                           align_corners=False)
 23 | 
 24 |     # center crop
 25 |     t, c, h, w = video.shape
 26 |     w_start = (w - resolution) // 2
 27 |     h_start = (h - resolution) // 2
 28 |     video = video[:, :, h_start:h_start + resolution, w_start:w_start + resolution]
 29 |     video = video.permute(1, 0, 2, 3).contiguous() # CTHW
 30 | 
 31 |     video -= 0.5
 32 | 
 33 |     return video
 34 | 
 35 | def preprocess(videos, target_resolution=224):
 36 |     # videos in {0, ..., 255} as np.uint8 array
 37 |     b, t, h, w, c = videos.shape
 38 |     videos = torch.from_numpy(videos)
 39 |     videos = torch.stack([preprocess_single(video, target_resolution) for video in videos])
 40 |     return videos * 2 # [-0.5, 0.5] -> [-1, 1]
 41 | 
 42 | def get_fvd_logits(videos, i3d, device):
 43 |     videos = preprocess(videos)
 44 |     embeddings = get_logits(i3d, videos, device)
 45 |     return embeddings
 46 | 
 47 | # https://github.com/tensorflow/gan/blob/de4b8da3853058ea380a6152bd3bd454013bf619/tensorflow_gan/python/eval/classifier_metrics.py#L161
 48 | def _symmetric_matrix_square_root(mat, eps=1e-10):
 49 |     u, s, v = torch.svd(mat)
 50 |     si = torch.where(s < eps, s, torch.sqrt(s))
 51 |     return torch.matmul(torch.matmul(u, torch.diag(si)), v.t())
 52 | 
 53 | # https://github.com/tensorflow/gan/blob/de4b8da3853058ea380a6152bd3bd454013bf619/tensorflow_gan/python/eval/classifier_metrics.py#L400
 54 | def trace_sqrt_product(sigma, sigma_v):
 55 |     sqrt_sigma = _symmetric_matrix_square_root(sigma)
 56 |     sqrt_a_sigmav_a = torch.matmul(sqrt_sigma, torch.matmul(sigma_v, sqrt_sigma))
 57 |     return torch.trace(_symmetric_matrix_square_root(sqrt_a_sigmav_a))
 58 | 
 59 | # https://discuss.pytorch.org/t/covariance-and-gradient-support/16217/2
 60 | def cov(m, rowvar=False):
 61 |     '''Estimate a covariance matrix given data.
 62 | 
 63 |     Covariance indicates the level to which two variables vary together.
 64 |     If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,
 65 |     then the covariance matrix element `C_{ij}` is the covariance of
 66 |     `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.
 67 | 
 68 |     Args:
 69 |         m: A 1-D or 2-D array containing multiple variables and observations.
 70 |             Each row of `m` represents a variable, and each column a single
 71 |             observation of all those variables.
 72 |         rowvar: If `rowvar` is True, then each row represents a
 73 |             variable, with observations in the columns. Otherwise, the
 74 |             relationship is transposed: each column represents a variable,
 75 |             while the rows contain observations.
 76 | 
 77 |     Returns:
 78 |         The covariance matrix of the variables.
 79 |     '''
 80 |     if m.dim() > 2:
 81 |         raise ValueError('m has more than 2 dimensions')
 82 |     if m.dim() < 2:
 83 |         m = m.view(1, -1)
 84 |     if not rowvar and m.size(0) != 1:
 85 |         m = m.t()
 86 | 
 87 |     fact = 1.0 / (m.size(1) - 1) # unbiased estimate
 88 |     m -= torch.mean(m, dim=1, keepdim=True)
 89 |     mt = m.t()  # if complex: mt = m.t().conj()
 90 |     return fact * m.matmul(mt).squeeze()
 91 | 
 92 | 
 93 | def frechet_distance(x1, x2):
 94 |     x1 = x1.flatten(start_dim=1)
 95 |     x2 = x2.flatten(start_dim=1)
 96 |     m, m_w = x1.mean(dim=0), x2.mean(dim=0)
 97 |     sigma, sigma_w = cov(x1, rowvar=False), cov(x2, rowvar=False)
 98 | 
 99 |     sqrt_trace_component = trace_sqrt_product(sigma, sigma_w)
100 |     trace = torch.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component
101 | 
102 |     mean = torch.sum((m - m_w) ** 2)
103 |     fd = trace + mean
104 |     return fd
105 | 
106 | 
107 | def get_logits(i3d, videos, device):
108 |     """
109 |     assert videos.shape[0] % 16 == 0
110 |     with torch.no_grad():
111 |         logits = []
112 |         for i in range(0, videos.shape[0], 16):
113 |             batch = videos[i:i + 16].to(device)
114 |             logits.append(i3d(batch))
115 |         logits = torch.cat(logits, dim=0)
116 |         return logits
117 |     """
118 | 
119 |     with torch.no_grad():
120 |         logits = i3d(videos.to(device))
121 |         return logits
122 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/utils/utils_distributed_sampler.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | import random
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.distributed as dist
  8 | from torch.utils.data import DistributedSampler as _DistributedSampler
  9 | 
 10 | 
 11 | def setup_seed(seed, cuda_deterministic=True):
 12 |     torch.manual_seed(seed)
 13 |     torch.cuda.manual_seed_all(seed)
 14 |     np.random.seed(seed)
 15 |     random.seed(seed)
 16 |     os.environ["PYTHONHASHSEED"] = str(seed)
 17 |     if cuda_deterministic:  # slower, more reproducible
 18 |         torch.backends.cudnn.deterministic = True
 19 |         torch.backends.cudnn.benchmark = False
 20 |     else:  # faster, less reproducible
 21 |         torch.backends.cudnn.deterministic = False
 22 |         torch.backends.cudnn.benchmark = True
 23 | 
 24 | 
 25 | def worker_init_fn(worker_id, num_workers, rank, seed):
 26 |     # The seed of each worker equals to
 27 |     # num_worker * rank + worker_id + user_seed
 28 |     worker_seed = num_workers * rank + worker_id + seed
 29 |     np.random.seed(worker_seed)
 30 |     random.seed(worker_seed)
 31 |     torch.manual_seed(worker_seed)
 32 | 
 33 | 
 34 | def get_dist_info():
 35 |     if dist.is_available() and dist.is_initialized():
 36 |         rank = dist.get_rank()
 37 |         world_size = dist.get_world_size()
 38 |     else:
 39 |         rank = 0
 40 |         world_size = 1
 41 | 
 42 |     return rank, world_size
 43 | 
 44 | 
 45 | def sync_random_seed(seed=None, device="cuda"):
 46 |     """Make sure different ranks share the same seed.
 47 |     All workers must call this function, otherwise it will deadlock.
 48 |     This method is generally used in `DistributedSampler`,
 49 |     because the seed should be identical across all processes
 50 |     in the distributed group.
 51 |     In distributed sampling, different ranks should sample non-overlapped
 52 |     data in the dataset. Therefore, this function is used to make sure that
 53 |     each rank shuffles the data indices in the same order based
 54 |     on the same seed. Then different ranks could use different indices
 55 |     to select non-overlapped data from the same data list.
 56 |     Args:
 57 |         seed (int, Optional): The seed. Default to None.
 58 |         device (str): The device where the seed will be put on.
 59 |             Default to 'cuda'.
 60 |     Returns:
 61 |         int: Seed to be used.
 62 |     """
 63 |     if seed is None:
 64 |         seed = np.random.randint(2**31)
 65 |     assert isinstance(seed, int)
 66 | 
 67 |     rank, world_size = get_dist_info()
 68 | 
 69 |     if world_size == 1:
 70 |         return seed
 71 | 
 72 |     if rank == 0:
 73 |         random_num = torch.tensor(seed, dtype=torch.int32, device=device)
 74 |     else:
 75 |         random_num = torch.tensor(0, dtype=torch.int32, device=device)
 76 | 
 77 |     dist.broadcast(random_num, src=0)
 78 | 
 79 |     return random_num.item()
 80 | 
 81 | 
 82 | class DistributedSampler(_DistributedSampler):
 83 |     def __init__(
 84 |         self,
 85 |         dataset,
 86 |         num_replicas=None,  # world_size
 87 |         rank=None,  # local_rank
 88 |         shuffle=True,
 89 |         seed=0,
 90 |     ):
 91 | 
 92 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
 93 | 
 94 |         # In distributed sampling, different ranks should sample
 95 |         # non-overlapped data in the dataset. Therefore, this function
 96 |         # is used to make sure that each rank shuffles the data indices
 97 |         # in the same order based on the same seed. Then different ranks
 98 |         # could use different indices to select non-overlapped data from the
 99 |         # same data list.
100 |         self.seed = sync_random_seed(seed)
101 | 
102 |     def __iter__(self):
103 |         # deterministically shuffle based on epoch
104 |         if self.shuffle:
105 |             g = torch.Generator()
106 |             # When :attr:`shuffle=True`, this ensures all replicas
107 |             # use a different random ordering for each epoch.
108 |             # Otherwise, the next iteration of this sampler will
109 |             # yield the same ordering.
110 |             g.manual_seed(self.epoch + self.seed)
111 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
112 |         else:
113 |             indices = torch.arange(len(self.dataset)).tolist()
114 | 
115 |         # add extra samples to make it evenly divisible
116 |         # in case that indices is shorter than half of total_size
117 |         indices = (indices * math.ceil(self.total_size / len(indices)))[
118 |             : self.total_size
119 |         ]
120 |         assert len(indices) == self.total_size
121 | 
122 |         # subsample
123 |         indices = indices[self.rank : self.total_size : self.num_replicas]
124 |         assert len(indices) == self.num_samples
125 | 
126 |         return iter(indices)
127 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/data/__init__.py:
--------------------------------------------------------------------------------
  1 | """This package includes all the modules related to data loading and preprocessing
  2 | 
  3 |  To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
  4 |  You need to implement four functions:
  5 |     -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
  6 |     -- <__len__>:                       return the size of dataset.
  7 |     -- <__getitem__>:                   get a data point from data loader.
  8 |     -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
  9 | 
 10 | Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
 11 | See our template dataset class 'template_dataset.py' for more details.
 12 | """
 13 | import numpy as np
 14 | import importlib
 15 | import torch.utils.data
 16 | from data.base_dataset import BaseDataset
 17 | 
 18 | 
 19 | def find_dataset_using_name(dataset_name):
 20 |     """Import the module "data/[dataset_name]_dataset.py".
 21 | 
 22 |     In the file, the class called DatasetNameDataset() will
 23 |     be instantiated. It has to be a subclass of BaseDataset,
 24 |     and it is case-insensitive.
 25 |     """
 26 |     dataset_filename = "data." + dataset_name + "_dataset"
 27 |     datasetlib = importlib.import_module(dataset_filename)
 28 | 
 29 |     dataset = None
 30 |     target_dataset_name = dataset_name.replace('_', '') + 'dataset'
 31 |     for name, cls in datasetlib.__dict__.items():
 32 |         if name.lower() == target_dataset_name.lower() \
 33 |            and issubclass(cls, BaseDataset):
 34 |             dataset = cls
 35 | 
 36 |     if dataset is None:
 37 |         raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
 38 | 
 39 |     return dataset
 40 | 
 41 | 
 42 | def get_option_setter(dataset_name):
 43 |     """Return the static method <modify_commandline_options> of the dataset class."""
 44 |     dataset_class = find_dataset_using_name(dataset_name)
 45 |     return dataset_class.modify_commandline_options
 46 | 
 47 | 
 48 | def create_dataset(opt, rank=0):
 49 |     """Create a dataset given the option.
 50 | 
 51 |     This function wraps the class CustomDatasetDataLoader.
 52 |         This is the main interface between this package and 'train.py'/'test.py'
 53 | 
 54 |     Example:
 55 |         >>> from data import create_dataset
 56 |         >>> dataset = create_dataset(opt)
 57 |     """
 58 |     data_loader = CustomDatasetDataLoader(opt, rank=rank)
 59 |     dataset = data_loader.load_data()
 60 |     return dataset
 61 | 
 62 | class CustomDatasetDataLoader():
 63 |     """Wrapper class of Dataset class that performs multi-threaded data loading"""
 64 | 
 65 |     def __init__(self, opt, rank=0):
 66 |         """Initialize this class
 67 | 
 68 |         Step 1: create a dataset instance given the name [dataset_mode]
 69 |         Step 2: create a multi-threaded data loader.
 70 |         """
 71 |         self.opt = opt
 72 |         dataset_class = find_dataset_using_name(opt.dataset_mode)
 73 |         self.dataset = dataset_class(opt)
 74 |         self.sampler = None
 75 |         print("rank %d %s dataset [%s] was created" % (rank, self.dataset.name, type(self.dataset).__name__))
 76 |         if opt.use_ddp and opt.isTrain:
 77 |             world_size = opt.world_size
 78 |             self.sampler = torch.utils.data.distributed.DistributedSampler(
 79 |                     self.dataset,
 80 |                     num_replicas=world_size,
 81 |                     rank=rank,
 82 |                     shuffle=not opt.serial_batches
 83 |                 )
 84 |             self.dataloader = torch.utils.data.DataLoader(
 85 |                         self.dataset,
 86 |                         sampler=self.sampler,
 87 |                         num_workers=int(opt.num_threads / world_size), 
 88 |                         batch_size=int(opt.batch_size / world_size), 
 89 |                         drop_last=True)
 90 |         else:
 91 |             self.dataloader = torch.utils.data.DataLoader(
 92 |                 self.dataset,
 93 |                 batch_size=opt.batch_size,
 94 |                 shuffle=(not opt.serial_batches) and opt.isTrain,
 95 |                 num_workers=int(opt.num_threads),
 96 |                 drop_last=True
 97 |             )
 98 | 
 99 |     def set_epoch(self, epoch):
100 |         self.dataset.current_epoch = epoch
101 |         if self.sampler is not None:
102 |             self.sampler.set_epoch(epoch)
103 | 
104 |     def load_data(self):
105 |         return self
106 | 
107 |     def __len__(self):
108 |         """Return the number of data in the dataset"""
109 |         return min(len(self.dataset), self.opt.max_dataset_size)
110 | 
111 |     def __iter__(self):
112 |         """Return a batch of data"""
113 |         for i, data in enumerate(self.dataloader):
114 |             if i * self.opt.batch_size >= self.opt.max_dataset_size:
115 |                 break
116 |             yield data
117 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/install_dali.md:
--------------------------------------------------------------------------------
  1 | # Installation
  2 | ## Prerequisites
  3 | 
  4 | 1. Linux x64.
  5 | 2. NVIDIA Driver supporting CUDA 10.0 or later (i.e., 410.48 or later driver releases).
  6 | 3. (Optional) One or more of the following deep learning frameworks:
  7 | 
  8 |     * [MXNet 1.3](http://mxnet.incubator.apache.org/) `mxnet-cu100` or later.
  9 |     * [PyTorch 0.4](https://pytorch.org/) or later.
 10 |     * [TensorFlow 1.7](https://www.tensorflow.org/) or later.
 11 | 
 12 | ## DALI in NGC Containers 
 13 | DALI is preinstalled in the TensorFlow, PyTorch, and MXNet containers in versions 18.07 and later on NVIDIA GPU Cloud.
 14 | 
 15 | ## pip - Official Releases
 16 | 
 17 | ### nvidia-dali
 18 | 
 19 | Execute the following command to install the latest DALI for specified CUDA version (please check support matrix to see if your platform is supported):
 20 | 
 21 | * For CUDA 10.2:
 22 | 
 23 |     ```bash
 24 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda102
 25 |     ```
 26 | 
 27 | * For CUDA 11.0:
 28 | 
 29 |     ```bash 
 30 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110
 31 |     ```
 32 | 
 33 | 
 34 | > Note: CUDA 11.0 build uses CUDA toolkit enhanced compatibility. It is built with the latest CUDA 11.x toolkit while it can run on the latest, stable CUDA 11.0 capable drivers (450.80 or later). Using the latest driver may enable additional functionality. More details can be found in [enhanced CUDA compatibility guide](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#enhanced-compat-minor-releases).
 35 | 
 36 | > Note: Please always use the latest version of pip available (at least >= 19.3) and update when possible by issuing pip install –upgrade pip
 37 | 
 38 | ### nvidia-dali-tf-plugin
 39 | 
 40 | DALI doesn’t contain prebuilt versions of the DALI TensorFlow plugin. It needs to be installed as a separate package which will be built against the currently installed version of TensorFlow:
 41 | 
 42 | * For CUDA 10.2:
 43 | 
 44 |     ```bash
 45 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-tf-plugin-cuda102
 46 |     ```
 47 | 
 48 | * For CUDA 11.0:
 49 | 
 50 |     ```bash
 51 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-tf-plugin-cuda110
 52 |     ```
 53 | 
 54 | Installing this package will install `nvidia-dali-cudaXXX` and its dependencies, if they are not already installed. The package `tensorflow-gpu` must be installed before attempting to install `nvidia-dali-tf-plugin-cudaXXX`.
 55 | 
 56 | > Note: The packages `nvidia-dali-tf-plugin-cudaXXX` and `nvidia-dali-cudaXXX` should be in exactly the same version. Therefore, installing the latest `nvidia-dali-tf-plugin-cudaXXX`, will replace any older `nvidia-dali-cudaXXX` version already installed. To work with older versions of DALI, provide the version explicitly to the `pip install` command.
 57 | 
 58 | ### pip - Nightly and Weekly Releases¶
 59 | 
 60 | > Note: While binaries available to download from nightly and weekly builds include most recent changes available in the GitHub some functionalities may not work or provide inferior performance comparing to the official releases. Those builds are meant for the early adopters seeking for the most recent version available and being ready to boldly go where no man has gone before.
 61 | 
 62 | > Note: It is recommended to uninstall regular DALI and TensorFlow plugin before installing nightly or weekly builds as they are installed in the same path
 63 | 
 64 | #### Nightly Builds
 65 | To access most recent nightly builds please use flowing release channel:
 66 | 
 67 | * For CUDA 10.2:
 68 | 
 69 |     ```bash
 70 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-nightly-cuda102
 71 |     ```
 72 | 
 73 |     ```
 74 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-tf-plugin-nightly-cuda102
 75 |     ```
 76 | 
 77 | * For CUDA 11.0:
 78 | 
 79 |     ```bash
 80 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-nightly-cuda110
 81 |     ```
 82 | 
 83 |     ```bash
 84 |     pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-tf-plugin-nightly-cuda110
 85 |     ```
 86 | 
 87 | 
 88 | #### Weekly Builds
 89 | 
 90 | Also, there is a weekly release channel with more thorough testing. To access most recent weekly builds please use the following release channel (available only for CUDA 11):
 91 | 
 92 | ```bash
 93 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/weekly --upgrade nvidia-dali-weekly-cuda110
 94 | ```
 95 | 
 96 | ```bash
 97 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/weekly --upgrade nvidia-dali-tf-plugin-week
 98 | ```
 99 | 
100 | 
101 | ---
102 | 
103 | ### For more information about Dali and installation, please refer to [DALI documentation](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/installation.html).
104 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/data/base_dataset.py:
--------------------------------------------------------------------------------
  1 | """This module implements an abstract base class (ABC) 'BaseDataset' for datasets.
  2 | 
  3 | It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses.
  4 | """
  5 | import random
  6 | import numpy as np
  7 | import torch.utils.data as data
  8 | from PIL import Image
  9 | import torchvision.transforms as transforms
 10 | from abc import ABC, abstractmethod
 11 | 
 12 | 
 13 | class BaseDataset(data.Dataset, ABC):
 14 |     """This class is an abstract base class (ABC) for datasets.
 15 | 
 16 |     To create a subclass, you need to implement the following four functions:
 17 |     -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
 18 |     -- <__len__>:                       return the size of dataset.
 19 |     -- <__getitem__>:                   get a data point.
 20 |     -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
 21 |     """
 22 | 
 23 |     def __init__(self, opt):
 24 |         """Initialize the class; save the options in the class
 25 | 
 26 |         Parameters:
 27 |             opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
 28 |         """
 29 |         self.opt = opt
 30 |         # self.root = opt.dataroot
 31 |         self.current_epoch = 0
 32 | 
 33 |     @staticmethod
 34 |     def modify_commandline_options(parser, is_train):
 35 |         """Add new dataset-specific options, and rewrite default values for existing options.
 36 | 
 37 |         Parameters:
 38 |             parser          -- original option parser
 39 |             is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
 40 | 
 41 |         Returns:
 42 |             the modified parser.
 43 |         """
 44 |         return parser
 45 | 
 46 |     @abstractmethod
 47 |     def __len__(self):
 48 |         """Return the total number of images in the dataset."""
 49 |         return 0
 50 | 
 51 |     @abstractmethod
 52 |     def __getitem__(self, index):
 53 |         """Return a data point and its metadata information.
 54 | 
 55 |         Parameters:
 56 |             index - - a random integer for data indexing
 57 | 
 58 |         Returns:
 59 |             a dictionary of data with their names. It ususally contains the data itself and its metadata information.
 60 |         """
 61 |         pass
 62 | 
 63 | 
 64 | def get_transform(grayscale=False):
 65 |     transform_list = []
 66 |     if grayscale:
 67 |         transform_list.append(transforms.Grayscale(1))
 68 |     transform_list += [transforms.ToTensor()]
 69 |     return transforms.Compose(transform_list)
 70 | 
 71 | def get_affine_mat(opt, size):
 72 |     shift_x, shift_y, scale, rot_angle, flip = 0., 0., 1., 0., False
 73 |     w, h = size
 74 | 
 75 |     if 'shift' in opt.preprocess:
 76 |         shift_pixs = int(opt.shift_pixs)
 77 |         shift_x = random.randint(-shift_pixs, shift_pixs)
 78 |         shift_y = random.randint(-shift_pixs, shift_pixs)
 79 |     if 'scale' in opt.preprocess:
 80 |         scale = 1 + opt.scale_delta * (2 * random.random() - 1)
 81 |     if 'rot' in opt.preprocess:
 82 |         rot_angle = opt.rot_angle * (2 * random.random() - 1)
 83 |         rot_rad = -rot_angle * np.pi/180
 84 |     if 'flip' in opt.preprocess:
 85 |         flip = random.random() > 0.5
 86 | 
 87 |     shift_to_origin = np.array([1, 0, -w//2, 0, 1, -h//2, 0, 0, 1]).reshape([3, 3])
 88 |     flip_mat = np.array([-1 if flip else 1, 0, 0, 0, 1, 0, 0, 0, 1]).reshape([3, 3])
 89 |     shift_mat = np.array([1, 0, shift_x, 0, 1, shift_y, 0, 0, 1]).reshape([3, 3])
 90 |     rot_mat = np.array([np.cos(rot_rad), np.sin(rot_rad), 0, -np.sin(rot_rad), np.cos(rot_rad), 0, 0, 0, 1]).reshape([3, 3])
 91 |     scale_mat = np.array([scale, 0, 0, 0, scale, 0, 0, 0, 1]).reshape([3, 3])
 92 |     shift_to_center = np.array([1, 0, w//2, 0, 1, h//2, 0, 0, 1]).reshape([3, 3])
 93 |     
 94 |     affine = shift_to_center @ scale_mat @ rot_mat @ shift_mat @ flip_mat @ shift_to_origin    
 95 |     affine_inv = np.linalg.inv(affine)
 96 |     return affine, affine_inv, flip
 97 | 
 98 | def apply_img_affine(img, affine_inv, method=Image.BICUBIC):
 99 |     return img.transform(img.size, Image.AFFINE, data=affine_inv.flatten()[:6], resample=Image.BICUBIC)
100 | 
101 | def apply_lm_affine(landmark, affine, flip, size):
102 |     _, h = size
103 |     lm = landmark.copy()
104 |     lm[:, 1] = h - 1 - lm[:, 1]
105 |     lm = np.concatenate((lm, np.ones([lm.shape[0], 1])), -1)
106 |     lm = lm @ np.transpose(affine)
107 |     lm[:, :2] = lm[:, :2] / lm[:, 2:]
108 |     lm = lm[:, :2]
109 |     lm[:, 1] = h - 1 - lm[:, 1]
110 |     if flip:
111 |         lm_ = lm.copy()
112 |         lm_[:17] = lm[16::-1]
113 |         lm_[17:22] = lm[26:21:-1]
114 |         lm_[22:27] = lm[21:16:-1]
115 |         lm_[31:36] = lm[35:30:-1]
116 |         lm_[36:40] = lm[45:41:-1]
117 |         lm_[40:42] = lm[47:45:-1]
118 |         lm_[42:46] = lm[39:35:-1]
119 |         lm_[46:48] = lm[41:39:-1]
120 |         lm_[48:55] = lm[54:47:-1]
121 |         lm_[55:60] = lm[59:54:-1]
122 |         lm_[60:65] = lm[64:59:-1]
123 |         lm_[65:68] = lm[67:64:-1]
124 |         lm = lm_
125 |     return lm
126 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/util/load_mats.py:
--------------------------------------------------------------------------------
  1 | """This script is to load 3D face model for Deep3DFaceRecon_pytorch
  2 | """
  3 | 
  4 | import numpy as np
  5 | from PIL import Image
  6 | from scipy.io import loadmat, savemat
  7 | from array import array
  8 | import os.path as osp
  9 | 
 10 | 
 11 | # load expression basis
 12 | def LoadExpBasis(bfm_folder="BFM"):
 13 |     n_vertex = 53215
 14 |     bfm_folder = "deep_3drecon/" + bfm_folder
 15 |     
 16 |     Expbin = open(osp.join(bfm_folder, "Exp_Pca.bin"), "rb")
 17 |     exp_dim = array("i")
 18 |     exp_dim.fromfile(Expbin, 1)
 19 |     expMU = array("f")
 20 |     expPC = array("f")
 21 |     expMU.fromfile(Expbin, 3 * n_vertex)
 22 |     expPC.fromfile(Expbin, 3 * exp_dim[0] * n_vertex)
 23 |     Expbin.close()
 24 | 
 25 |     expPC = np.array(expPC)
 26 |     expPC = np.reshape(expPC, [exp_dim[0], -1])
 27 |     expPC = np.transpose(expPC)
 28 | 
 29 |     expEV = np.loadtxt(osp.join(bfm_folder, "std_exp.txt"))
 30 | 
 31 |     return expPC, expEV
 32 | 
 33 | 
 34 | # transfer original BFM09 to our face model
 35 | def transferBFM09(bfm_folder="BFM"):
 36 |     print("Transfer BFM09 to BFM_model_front......")
 37 |     # seyeon
 38 |     # if "aux_models/aux_models/" in bfm_folder:
 39 |     #     bfm_folder = bfm_folder.replace("aux_models/aux_models/", "aux_models/")
 40 |     original_BFM = loadmat(osp.join(bfm_folder, "01_MorphableModel.mat"))
 41 |    
 42 |     shapePC = original_BFM["shapePC"]  # shape basis
 43 |     shapeEV = original_BFM["shapeEV"]  # corresponding eigen value
 44 |     shapeMU = original_BFM["shapeMU"]  # mean face
 45 |     texPC = original_BFM["texPC"]  # texture basis
 46 |     texEV = original_BFM["texEV"]  # eigen value
 47 |     texMU = original_BFM["texMU"]  # mean texture
 48 | 
 49 |     expPC, expEV = LoadExpBasis()
 50 | 
 51 |     # transfer BFM09 to our face model
 52 | 
 53 |     idBase = shapePC * np.reshape(shapeEV, [-1, 199])
 54 |     idBase = idBase / 1e5  # unify the scale to decimeter
 55 |     idBase = idBase[:, :80]  # use only first 80 basis
 56 | 
 57 |     exBase = expPC * np.reshape(expEV, [-1, 79])
 58 |     exBase = exBase / 1e5  # unify the scale to decimeter
 59 |     exBase = exBase[:, :64]  # use only first 64 basis
 60 | 
 61 |     texBase = texPC * np.reshape(texEV, [-1, 199])
 62 |     texBase = texBase[:, :80]  # use only first 80 basis
 63 | 
 64 |     # our face model is cropped along face landmarks and contains only 35709 vertex.
 65 |     # original BFM09 contains 53490 vertex, and expression basis provided by Guo et al. contains 53215 vertex.
 66 |     # thus we select corresponding vertex to get our face model.
 67 | 
 68 |     index_exp = loadmat(osp.join(bfm_folder, "BFM_front_idx.mat"))
 69 |     index_exp = index_exp["idx"].astype(np.int32) - 1  # starts from 0 (to 53215)
 70 | 
 71 |     index_shape = loadmat(osp.join(bfm_folder, "BFM_exp_idx.mat"))
 72 |     index_shape = index_shape["trimIndex"].astype(np.int32) - 1  # starts from 0 (to 53490)
 73 |     index_shape = index_shape[index_exp]
 74 | 
 75 |     idBase = np.reshape(idBase, [-1, 3, 80])
 76 |     idBase = idBase[index_shape, :, :]
 77 |     idBase = np.reshape(idBase, [-1, 80])
 78 | 
 79 |     texBase = np.reshape(texBase, [-1, 3, 80])
 80 |     texBase = texBase[index_shape, :, :]
 81 |     texBase = np.reshape(texBase, [-1, 80])
 82 | 
 83 |     exBase = np.reshape(exBase, [-1, 3, 64])
 84 |     exBase = exBase[index_exp, :, :]
 85 |     exBase = np.reshape(exBase, [-1, 64])
 86 | 
 87 |     meanshape = np.reshape(shapeMU, [-1, 3]) / 1e5
 88 |     meanshape = meanshape[index_shape, :]
 89 |     meanshape = np.reshape(meanshape, [1, -1])
 90 | 
 91 |     meantex = np.reshape(texMU, [-1, 3])
 92 |     meantex = meantex[index_shape, :]
 93 |     meantex = np.reshape(meantex, [1, -1])
 94 | 
 95 |     # other info contains triangles, region used for computing photometric loss,
 96 |     # region used for skin texture regularization, and 68 landmarks index etc.
 97 |     other_info = loadmat(osp.join(bfm_folder, "facemodel_info.mat"))
 98 |     frontmask2_idx = other_info["frontmask2_idx"]
 99 |     skinmask = other_info["skinmask"]
100 |     keypoints = other_info["keypoints"]
101 |     point_buf = other_info["point_buf"]
102 |     tri = other_info["tri"]
103 |     tri_mask2 = other_info["tri_mask2"]
104 | 
105 |     # save our face model
106 |     savemat(
107 |         osp.join(bfm_folder, "BFM_model_front.mat"),
108 |         {
109 |             "meanshape": meanshape,
110 |             "meantex": meantex,
111 |             "idBase": idBase,
112 |             "exBase": exBase,
113 |             "texBase": texBase,
114 |             "tri": tri,
115 |             "point_buf": point_buf,
116 |             "tri_mask2": tri_mask2,
117 |             "keypoints": keypoints,
118 |             "frontmask2_idx": frontmask2_idx,
119 |             "skinmask": skinmask,
120 |         },
121 |     )
122 | 
123 | 
124 | # load landmarks for standard face, which is used for image preprocessing
125 | def load_lm3d(bfm_folder):
126 |     Lm3D = loadmat(osp.join(bfm_folder, "similarity_Lm3D_all.mat"))
127 |     Lm3D = Lm3D["lm"]
128 | 
129 |     # calculate 5 facial landmarks using 68 landmarks
130 |     lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
131 |     Lm3D = np.stack([Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean(Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :], Lm3D[lm_idx[6], :]], axis=0)
132 |     Lm3D = Lm3D[[1, 2, 0, 3, 4], :]
133 | 
134 |     return Lm3D
135 | 


--------------------------------------------------------------------------------
/MToV/exps/first_stage.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | 
  4 | import torch
  5 | 
  6 | from tools.trainer import first_stage_train, first_stage_x_l_train
  7 | from tools.dataloader import get_loaders
  8 | from models.autoencoder.autoencoder_vit import ViTAutoencoder
  9 | from losses.perceptual import LPIPSWithDiscriminator
 10 | 
 11 | from utils import file_name, Logger
 12 | import pdb
 13 | 
 14 | # ----------------------------------------------------------------------------
 15 | 
 16 | _num_moments = 3  # [num_scalars, sum_of_scalars, sum_of_squares]
 17 | _reduce_dtype = torch.float32  # Data type to use for initial per-tensor reduction.
 18 | _counter_dtype = torch.float64  # Data type to use for the internal counters.
 19 | _rank = 0  # Rank of the current process.
 20 | _sync_device = None  # Device to use for multiprocess communication. None = single-process.
 21 | _sync_called = False  # Has _sync() been called yet?
 22 | _counters = dict()  # Running counters on each device, updated by report(): name => device => torch.Tensor
 23 | _cumulative = dict()  # Cumulative counters on the CPU, updated by _sync(): name => torch.Tensor
 24 | 
 25 | # ----------------------------------------------------------------------------
 26 | 
 27 | 
 28 | def init_multiprocessing(rank, sync_device):
 29 |     r"""Initializes `torch_utils.training_stats` for collecting statistics
 30 |     across multiple processes.
 31 |     This function must be called after
 32 |     `torch.distributed.init_process_group()` and before `Collector.update()`.
 33 |     The call is not necessary if multi-process collection is not needed.
 34 |     Args:
 35 |         rank:           Rank of the current process.
 36 |         sync_device:    PyTorch device to use for inter-process
 37 |                         communication, or None to disable multi-process
 38 |                         collection. Typically `torch.device('cuda', rank)`.
 39 |     """
 40 |     global _rank, _sync_device
 41 |     assert not _sync_called
 42 |     _rank = rank
 43 |     _sync_device = sync_device
 44 | 
 45 | 
 46 | # ----------------------------------------------------------------------------
 47 | 
 48 | 
 49 | def first_stage(rank, args):
 50 |     device = torch.device("cuda", rank)
 51 | 
 52 |     temp_dir = "./"
 53 |     if args.n_gpus > 1:
 54 |         init_file = os.path.abspath(os.path.join(temp_dir, ".torch_distributed_init"))
 55 |         if os.name == "nt":
 56 |             init_method = "file:///" + init_file.replace("\\", "/")
 57 |             torch.distributed.init_process_group(backend="gloo", init_method=init_method, rank=rank, world_size=args.n_gpus)
 58 |         else:
 59 |             init_method = f"file://{init_file}"
 60 |             torch.distributed.init_process_group(backend="nccl", init_method=init_method, rank=rank, world_size=args.n_gpus)
 61 | 
 62 |     # Init torch_utils.
 63 |     sync_device = torch.device("cuda", rank) if args.n_gpus > 1 else None
 64 |     init_multiprocessing(rank=rank, sync_device=sync_device)
 65 | 
 66 |     """ ROOT DIRECTORY """
 67 |     if rank == 0:
 68 |         fn = file_name(args)
 69 |         logger = Logger(fn)
 70 |         logger.log(args)
 71 |         logger.log(f"Log path: {logger.logdir}")
 72 |         rootdir = logger.logdir
 73 |     else:
 74 |         logger = None
 75 | 
 76 |     if logger is None:
 77 |         log_ = print
 78 |     else:
 79 |         log_ = logger.log
 80 | 
 81 |     """ Get Image """
 82 |     if rank == 0:
 83 |         log_(f"Loading dataset {args.data} with resolution {args.res}")
 84 |     train_loader, test_loader, total_vid = get_loaders(
 85 |         rank, args.data, args.res, args.timesteps, args.skip, args.batch_size, args.n_gpus, args.seed, cond=False
 86 |     )
 87 | 
 88 |     """ Get Model """
 89 |     if rank == 0:
 90 |         log_(f"Generating model")
 91 | 
 92 |     torch.cuda.set_device(rank)
 93 |     model = ViTAutoencoder(args.embed_dim, args.ddconfig)
 94 |     model = model.to(device)
 95 |     criterion = LPIPSWithDiscriminator(disc_start=args.lossconfig.params.disc_start, timesteps=args.ddconfig.timesteps).to(device)
 96 | 
 97 |     opt = torch.optim.AdamW(model.parameters(), lr=args.lr, betas=(0.5, 0.9))
 98 | 
 99 |     d_opt = torch.optim.AdamW(
100 |         list(criterion.discriminator_2d.parameters()) + list(criterion.discriminator_3d.parameters()), lr=args.lr, betas=(0.5, 0.9)
101 |     )
102 | 
103 |     if args.resume and rank == 0:
104 |         model_ckpt = torch.load(args.first_model)
105 |         model.load_state_dict(model_ckpt)
106 |         del model_ckpt
107 | 
108 |     if rank == 0:
109 |         torch.save(model.state_dict(), rootdir + f"net_init.pth")
110 | 
111 |     if args.n_gpus > 1:
112 |         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False)
113 |         criterion = torch.nn.parallel.DistributedDataParallel(criterion, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False)
114 | 
115 |     fp = args.amp
116 |     if args.typetype == "x":
117 |         print("x")
118 |         first_stage_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger)
119 |     else:
120 |         print("ldmk")
121 |         first_stage_x_l_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger)
122 | 
123 |     if rank == 0:
124 |         torch.save(model.state_dict(), rootdir + f"net_meta.pth")
125 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/util/mesh_renderer.py:
--------------------------------------------------------------------------------
  1 | """This script is the differentiable renderer for Deep3DFaceRecon_pytorch
  2 |     Attention, antialiasing step is missing in current version.
  3 | """
  4 | # sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
  5 | # print(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
  6 | # sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
  7 | import torch
  8 | import pytorch3d.ops
  9 | import torch
 10 | import torch.nn.functional as F
 11 | import kornia
 12 | from kornia.geometry.camera import pixel2cam
 13 | import numpy as np
 14 | from typing import List
 15 | from scipy.io import loadmat
 16 | from torch import nn
 17 | 
 18 | from pytorch3d.structures import Meshes
 19 | from pytorch3d.renderer import (
 20 |     look_at_view_transform,
 21 |     FoVPerspectiveCameras,
 22 |     DirectionalLights,
 23 |     RasterizationSettings,
 24 |     MeshRenderer,
 25 |     MeshRasterizer,
 26 |     SoftPhongShader,
 27 |     TexturesUV,
 28 | )
 29 | 
 30 | # def ndc_projection(x=0.1, n=1.0, f=50.0):
 31 | #     return np.array([[n/x,    0,            0,              0],
 32 | #                      [  0, n/-x,            0,              0],
 33 | #                      [  0,    0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
 34 | #                      [  0,    0,           -1,              0]]).astype(np.float32)
 35 | 
 36 | class MeshRenderer(nn.Module):
 37 |     def __init__(self,
 38 |                 rasterize_fov,
 39 |                 znear=0.1,
 40 |                 zfar=10, 
 41 |                 rasterize_size=224,**args):
 42 |         super(MeshRenderer, self).__init__()
 43 | 
 44 |         # x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear
 45 |         # self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul(
 46 |         #         torch.diag(torch.tensor([1., -1, -1, 1])))
 47 |         self.rasterize_size = rasterize_size
 48 |         self.fov = rasterize_fov
 49 |         self.znear = znear
 50 |         self.zfar = zfar
 51 | 
 52 |         self.rasterizer = None
 53 |     
 54 |     def forward(self, vertex, tri, feat=None):
 55 |         """
 56 |         Return:
 57 |             mask               -- torch.tensor, size (B, 1, H, W)
 58 |             depth              -- torch.tensor, size (B, 1, H, W)
 59 |             features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None
 60 | 
 61 |         Parameters:
 62 |             vertex          -- torch.tensor, size (B, N, 3)
 63 |             tri             -- torch.tensor, size (B, M, 3) or (M, 3), triangles
 64 |             feat(optional)  -- torch.tensor, size (B, N ,C), features
 65 |         """
 66 |         device = vertex.device
 67 |         rsize = int(self.rasterize_size)
 68 |         # ndc_proj = self.ndc_proj.to(device)
 69 |         # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v
 70 |         if vertex.shape[-1] == 3:
 71 |             vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1)
 72 |             vertex[..., 0] = -vertex[..., 0]
 73 | 
 74 | 
 75 |         # vertex_ndc = vertex @ ndc_proj.t()
 76 |         if self.rasterizer is None:
 77 |             self.rasterizer = MeshRasterizer()
 78 |             print("create rasterizer on device cuda:%d"%device.index)
 79 |         
 80 |         # ranges = None
 81 |         # if isinstance(tri, List) or len(tri.shape) == 3:
 82 |         #     vum = vertex_ndc.shape[1]
 83 |         #     fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device)
 84 |         #     fstartidx = torch.cumsum(fnum, dim=0) - fnum
 85 |         #     ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu()
 86 |         #     for i in range(tri.shape[0]):
 87 |         #         tri[i] = tri[i] + i*vum
 88 |         #     vertex_ndc = torch.cat(vertex_ndc, dim=0)
 89 |         #     tri = torch.cat(tri, dim=0)
 90 | 
 91 |         # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3]
 92 |         tri = tri.type(torch.int32).contiguous()
 93 | 
 94 |         # rasterize
 95 |         cameras = FoVPerspectiveCameras(
 96 |             device=device,
 97 |             fov=self.fov,
 98 |             znear=self.znear,
 99 |             zfar=self.zfar,
100 |         )
101 | 
102 |         raster_settings = RasterizationSettings(
103 |             image_size=rsize
104 |         )
105 | 
106 |         # print(vertex.shape, tri.shape)
107 |         mesh = Meshes(vertex.contiguous()[...,:3], tri.unsqueeze(0))
108 | 
109 |         fragments = self.rasterizer(mesh, cameras = cameras, raster_settings = raster_settings)
110 |         rast_out = fragments.pix_to_face.squeeze(-1)
111 |         depth = fragments.zbuf
112 | 
113 |         # render depth
114 |         depth = depth.permute(0, 3, 1, 2)
115 |         mask = (rast_out > 0).float().unsqueeze(1)
116 |         depth = mask * depth
117 |         
118 | 
119 |         image = None
120 |         if feat is not None:
121 |             attributes = feat.reshape(-1,3)[mesh.faces_packed()]
122 |             image = pytorch3d.ops.interpolate_face_attributes(fragments.pix_to_face,
123 |                                                       fragments.bary_coords,
124 |                                                       attributes)
125 |             # print(image.shape)
126 |             image = image.squeeze(-2).permute(0, 3, 1, 2)
127 |             image = mask * image
128 |         
129 |         return mask, depth, image
130 | 
131 | 


--------------------------------------------------------------------------------
/MToV/exps/first_stage_ldmk.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | 
  4 | import torch
  5 | 
  6 | from tools.trainer import first_stage_train, first_stage_x_l_train
  7 | from tools.dataloader import get_loaders
  8 | from models.autoencoder.autoencoder_vit import ViTAutoencoder
  9 | from losses.perceptual import LPIPSWithDiscriminator
 10 | 
 11 | from utils import file_name, Logger
 12 | import pdb
 13 | 
 14 | # ----------------------------------------------------------------------------
 15 | 
 16 | _num_moments = 3  # [num_scalars, sum_of_scalars, sum_of_squares]
 17 | _reduce_dtype = torch.float32  # Data type to use for initial per-tensor reduction.
 18 | _counter_dtype = torch.float64  # Data type to use for the internal counters.
 19 | _rank = 0  # Rank of the current process.
 20 | _sync_device = None  # Device to use for multiprocess communication. None = single-process.
 21 | _sync_called = False  # Has _sync() been called yet?
 22 | _counters = dict()  # Running counters on each device, updated by report(): name => device => torch.Tensor
 23 | _cumulative = dict()  # Cumulative counters on the CPU, updated by _sync(): name => torch.Tensor
 24 | 
 25 | # ----------------------------------------------------------------------------
 26 | 
 27 | 
 28 | def init_multiprocessing(rank, sync_device):
 29 |     r"""Initializes `torch_utils.training_stats` for collecting statistics
 30 |     across multiple processes.
 31 |     This function must be called after
 32 |     `torch.distributed.init_process_group()` and before `Collector.update()`.
 33 |     The call is not necessary if multi-process collection is not needed.
 34 |     Args:
 35 |         rank:           Rank of the current process.
 36 |         sync_device:    PyTorch device to use for inter-process
 37 |                         communication, or None to disable multi-process
 38 |                         collection. Typically `torch.device('cuda', rank)`.
 39 |     """
 40 |     global _rank, _sync_device
 41 |     assert not _sync_called
 42 |     _rank = rank
 43 |     _sync_device = sync_device
 44 | 
 45 | 
 46 | # ----------------------------------------------------------------------------
 47 | 
 48 | 
 49 | def first_stage_ldmk(rank, args):
 50 |     device = torch.device("cuda", rank)
 51 | 
 52 |     temp_dir = "./"
 53 |     if args.n_gpus > 1:
 54 |         init_file = os.path.abspath(os.path.join(temp_dir, ".torch_distributed_init"))
 55 |         if os.name == "nt":
 56 |             init_method = "file:///" + init_file.replace("\\", "/")
 57 |             torch.distributed.init_process_group(backend="gloo", init_method=init_method, rank=rank, world_size=args.n_gpus)
 58 |         else:
 59 |             init_method = f"file://{init_file}"
 60 |             torch.distributed.init_process_group(backend="nccl", init_method=init_method, rank=rank, world_size=args.n_gpus)
 61 | 
 62 |     # Init torch_utils.
 63 |     sync_device = torch.device("cuda", rank) if args.n_gpus > 1 else None
 64 |     init_multiprocessing(rank=rank, sync_device=sync_device)
 65 | 
 66 |     """ ROOT DIRECTORY """
 67 |     if rank == 0:
 68 |         fn = file_name(args)
 69 |         logger = Logger(fn, logdir=args.log_dir)
 70 |         logger.log(args)
 71 |         logger.log(f"Log path: {logger.logdir}")
 72 |         rootdir = logger.logdir
 73 |     else:
 74 |         logger = None
 75 | 
 76 |     if logger is None:
 77 |         log_ = print
 78 |     else:
 79 |         log_ = logger.log
 80 | 
 81 |     """ Get Image """
 82 |     if rank == 0:
 83 |         log_(f"Loading dataset {args.data} with resolution {args.res}")
 84 | 
 85 |     contain_contour = args.typetype != "ldmk_wo_contour"
 86 |     train_loader, test_loader, total_vid = get_loaders(
 87 |         rank,
 88 |         args.data,
 89 |         args.res,
 90 |         args.timesteps,
 91 |         args.skip,
 92 |         args.batch_size,
 93 |         args.n_gpus,
 94 |         args.seed,
 95 |         cond=False,
 96 |     )
 97 | 
 98 |     """ Get Model """
 99 |     if rank == 0:
100 |         log_(f"Generating model")
101 | 
102 |     torch.cuda.set_device(rank)
103 |     model = ViTAutoencoder(args.embed_dim, args.ddconfig).to(device)
104 |     criterion = LPIPSWithDiscriminator(disc_start=args.lossconfig.params.disc_start, timesteps=args.ddconfig.timesteps).to(device)
105 | 
106 |     opt = torch.optim.AdamW(model.parameters(), lr=args.lr, betas=(0.5, 0.9))
107 |     d_opt = torch.optim.AdamW(
108 |         list(criterion.discriminator_2d.parameters()) + list(criterion.discriminator_3d.parameters()), lr=args.lr, betas=(0.5, 0.9)
109 |     )
110 | 
111 |     if args.resume and rank == 0:
112 |         model_ckpt = torch.load(args.first_model)
113 |         model.load_state_dict(model_ckpt)
114 |         del model_ckpt
115 | 
116 |     if rank == 0:
117 |         torch.save(model.state_dict(), rootdir + f"net_init.pth")
118 | 
119 |     if args.n_gpus > 1:
120 |         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False)
121 |         criterion = torch.nn.parallel.DistributedDataParallel(criterion, device_ids=[device], broadcast_buffers=False, find_unused_parameters=False)
122 | 
123 |     fp = args.amp
124 |     if args.typetype == "x":
125 |         print("x")
126 |         first_stage_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger)
127 |     else:
128 |         print("x_l")
129 |         first_stage_x_l_train(rank, model, opt, d_opt, criterion, train_loader, test_loader, args.first_model, fp, logger)
130 | 
131 |     if rank == 0:
132 |         torch.save(model.state_dict(), rootdir + f"net_meta.pth")
133 | 


--------------------------------------------------------------------------------
/data/data_utils/deep_3drecon/deep_3drecon_models/arcface_torch/docs/speed_benchmark.md:
--------------------------------------------------------------------------------
 1 | ## Test Training Speed
 2 | 
 3 | - Test Commands
 4 | 
 5 | You need to use the following two commands to test the Partial FC training performance. 
 6 | The number of identites is **3 millions** (synthetic data), turn mixed precision  training on, backbone is resnet50, 
 7 | batch size is 1024.
 8 | ```shell
 9 | # Model Parallel
10 | python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions
11 | # Partial FC 0.1
12 | python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions_pfc
13 | ```
14 | 
15 | - GPU Memory
16 | 
17 | ```
18 | # (Model Parallel) gpustat -i
19 | [0] Tesla V100-SXM2-32GB | 64'C,  94 % | 30338 / 32510 MB 
20 | [1] Tesla V100-SXM2-32GB | 60'C,  99 % | 28876 / 32510 MB 
21 | [2] Tesla V100-SXM2-32GB | 60'C,  99 % | 28872 / 32510 MB 
22 | [3] Tesla V100-SXM2-32GB | 69'C,  99 % | 28872 / 32510 MB 
23 | [4] Tesla V100-SXM2-32GB | 66'C,  99 % | 28888 / 32510 MB 
24 | [5] Tesla V100-SXM2-32GB | 60'C,  99 % | 28932 / 32510 MB 
25 | [6] Tesla V100-SXM2-32GB | 68'C, 100 % | 28916 / 32510 MB 
26 | [7] Tesla V100-SXM2-32GB | 65'C,  99 % | 28860 / 32510 MB 
27 | 
28 | # (Partial FC 0.1) gpustat -i
29 | [0] Tesla V100-SXM2-32GB | 60'C,  95 % | 10488 / 32510 MB                                                                                                                                          │·······················
30 | [1] Tesla V100-SXM2-32GB | 60'C,  97 % | 10344 / 32510 MB                                                                                                                                          │·······················
31 | [2] Tesla V100-SXM2-32GB | 61'C,  95 % | 10340 / 32510 MB                                                                                                                                          │·······················
32 | [3] Tesla V100-SXM2-32GB | 66'C,  95 % | 10340 / 32510 MB                                                                                                                                          │·······················
33 | [4] Tesla V100-SXM2-32GB | 65'C,  94 % | 10356 / 32510 MB                                                                                                                                          │·······················
34 | [5] Tesla V100-SXM2-32GB | 61'C,  95 % | 10400 / 32510 MB                                                                                                                                          │·······················
35 | [6] Tesla V100-SXM2-32GB | 68'C,  96 % | 10384 / 32510 MB                                                                                                                                          │·······················
36 | [7] Tesla V100-SXM2-32GB | 64'C,  95 % | 10328 / 32510 MB                                                                                                                                        │·······················
37 | ```
38 | 
39 | - Training Speed
40 | 
41 | ```python
42 | # (Model Parallel) trainging.log
43 | Training: Speed 2271.33 samples/sec   Loss 1.1624   LearningRate 0.2000   Epoch: 0   Global Step: 100 
44 | Training: Speed 2269.94 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 150 
45 | Training: Speed 2272.67 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 200 
46 | Training: Speed 2266.55 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 250 
47 | Training: Speed 2272.54 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 300 
48 | 
49 | # (Partial FC 0.1) trainging.log
50 | Training: Speed 5299.56 samples/sec   Loss 1.0965   LearningRate 0.2000   Epoch: 0   Global Step: 100  
51 | Training: Speed 5296.37 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 150  
52 | Training: Speed 5304.37 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 200  
53 | Training: Speed 5274.43 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 250  
54 | Training: Speed 5300.10 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 300   
55 | ```
56 | 
57 | In this test case, Partial FC 0.1 only use1 1/3 of the GPU memory of the model parallel, 
58 | and the training speed is 2.5 times faster than the model parallel.
59 | 
60 | 
61 | ## Speed Benchmark
62 | 
63 | 1. Training speed of different parallel methods (samples/second), Tesla V100 32GB * 8. (Larger is better)
64 | 
65 | | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
66 | | :---    | :--- | :--- | :--- |
67 | |125000   | 4681 | 4824 | 5004 |
68 | |250000   | 4047 | 4521 | 4976 |
69 | |500000   | 3087 | 4013 | 4900 |
70 | |1000000  | 2090 | 3449 | 4803 |
71 | |1400000  | 1672 | 3043 | 4738 |
72 | |2000000  | -    | 2593 | 4626 |
73 | |4000000  | -    | 1748 | 4208 |
74 | |5500000  | -    | 1389 | 3975 |
75 | |8000000  | -    | -    | 3565 |
76 | |16000000 | -    | -    | 2679 |
77 | |29000000 | -    | -    | 1855 |
78 | 
79 | 2. GPU memory cost of different parallel methods (GB per GPU), Tesla V100 32GB * 8. (Smaller is better)
80 | 
81 | | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
82 | | :---    | :---  | :---  | :---  |
83 | |125000   | 7358  | 5306  | 4868  |
84 | |250000   | 9940  | 5826  | 5004  |
85 | |500000   | 14220 | 7114  | 5202  |
86 | |1000000  | 23708 | 9966  | 5620  |
87 | |1400000  | 32252 | 11178 | 6056  |
88 | |2000000  | -     | 13978 | 6472  |
89 | |4000000  | -     | 23238 | 8284  |
90 | |5500000  | -     | 32188 | 9854  |
91 | |8000000  | -     | -     | 12310 |
92 | |16000000 | -     | -     | 19950 |
93 | |29000000 | -     | -     | 32324 |
94 | 


--------------------------------------------------------------------------------
/data/data_utils/preprocess/process_audio.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from transformers import Wav2Vec2Processor, HubertModel
  3 | import soundfile as sf
  4 | import numpy as np
  5 | import torch
  6 | import pdb
  7 | import glob, os, tqdm
  8 | 
  9 | @torch.no_grad()
 10 | def get_hubert_from_speech(args, speech):
 11 |     device = args.device
 12 |     print(torch.cuda.is_available())
 13 |     print("Loading the Wav2Vec2 Processor...")
 14 |     wav2vec2_processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
 15 |     print("Loading the HuBERT Model...")
 16 |     hubert_model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")
 17 | 
 18 |     # global hubert_model
 19 |     hubert_model = hubert_model.to(device)
 20 |     if speech.ndim == 2:
 21 |         speech = speech[:, 0]  # [T, 2] ==> [T,]
 22 |     input_values_all = wav2vec2_processor(speech, return_tensors="pt", sampling_rate=16000).input_values  # [1, T]
 23 |     input_values_all = input_values_all.to(device)
 24 |     kernel = 400
 25 |     stride = 320
 26 |     clip_length = stride * 1000
 27 |     num_iter = input_values_all.shape[1] // clip_length
 28 |     expected_T = (input_values_all.shape[1] - (kernel - stride)) // stride
 29 |     res_lst = []
 30 |     for i in range(num_iter):
 31 |         if i == 0:
 32 |             start_idx = 0
 33 |             end_idx = clip_length - stride + kernel
 34 |         else:
 35 |             start_idx = clip_length * i
 36 |             end_idx = start_idx + (clip_length - stride + kernel)
 37 |         input_values = input_values_all[:, start_idx:end_idx]
 38 |         hidden_states = hubert_model.forward(input_values).last_hidden_state  # [B=1, T=pts//320, hid=1024]
 39 |         res_lst.append(hidden_states[0])
 40 |     if num_iter > 0:
 41 |         input_values = input_values_all[:, clip_length * num_iter :]
 42 |     else:
 43 |         input_values = input_values_all
 44 |     # if input_values.shape[1] != 0:
 45 |     if input_values.shape[1] >= kernel:  # if the last batch is shorter than kernel_size, skip it
 46 |         hidden_states = hubert_model(input_values).last_hidden_state  # [B=1, T=pts//320, hid=1024]
 47 |         res_lst.append(hidden_states[0])
 48 |     ret = torch.cat(res_lst, dim=0).cpu()  # [T, 1024]
 49 |     # assert ret.shape[0] == expected_T
 50 |     assert abs(ret.shape[0] - expected_T) <= 1
 51 |     if ret.shape[0] < expected_T:
 52 |         ret = torch.nn.functional.pad(ret, (0, 0, 0, expected_T - ret.shape[0]))
 53 |     else:
 54 |         ret = ret[:expected_T]
 55 |     return ret
 56 | 
 57 | def convert_wav_sampling_rate(args):
 58 |     save_root = args.save_sample_dir
 59 |     source_wav_name = args.audio.split("/")[-1].split(".")[0]
 60 |     supported_types = (".wav", ".mp3", ".mp4", ".avi") 
 61 |     os.makedirs(os.path.join(save_root, str(args.sampling_rate)), exist_ok = True)
 62 |     new_wav_name = os.path.join(save_root, str(args.sampling_rate), f"{source_wav_name}")
 63 |     command = f"ffmpeg -i {args.audio} -f wav -ar {args.sampling_rate} {new_wav_name}.wav -y"
 64 |     os.system(command)
 65 |     
 66 | def load_idlist(path):
 67 |     with open(path, "r") as f:
 68 |         lines = f.readlines()
 69 |         id_list = [line.replace("\n", "").replace(".mp4", "").strip() for line in lines]
 70 |     return id_list
 71 | 
 72 | if __name__ == "__main__":
 73 |     args = argparse.ArgumentParser(description="audio sampling match")
 74 |     ### for sampling audio
 75 |     args.add_argument("--audio", type=str, 
 76 |                       default="../inference/audio/LetItGo1.wav", help="path to the audio")
 77 |     args.add_argument("--save_sample_dir", type=str, 
 78 |                       default="../inference/sampled_audio", help="save path to the directory of sampled_audio")
 79 |     args.add_argument("--ref_dir", type=str, 
 80 |                       default="../inference/ref/25fps", help="path to the directory of reference images")
 81 |     args.add_argument("--ref_id_list", type=str, 
 82 |                       default=None, 
 83 |                       help="if ref_id_list is None, then the whole id in the ref_dir will be included")
 84 |     args.add_argument("--sampling_rate", type=int,
 85 |                       default=16000)
 86 |     args.add_argument("--device", type=str,
 87 |                       default="cuda:5")
 88 |     
 89 |     ### for extracting hubert
 90 |     args.add_argument("--wav2vec_proc", type=str,
 91 |                       default="facebook/hubert-large-ls960-ft",
 92 |                       help="the pretrained wav2vec2 processor")
 93 |     args.add_argument("--hubert_model", type=str,
 94 |                       default="facebook/hubert-large-ls960-ft",
 95 |                       help="the pretrained hubert model")
 96 |     args.add_argument("--save_hubert_dir", type=str, 
 97 |                       default="../inference/hubert", help="save path to the directory of converted hubert")
 98 |     
 99 |     args = args.parse_args()
100 |     
101 |     # load id list
102 |     if args.ref_id_list is None: 
103 |         ref_list = os.listdir(args.ref_dir)
104 |     else :
105 |         ref_list = load_idlist(args.ref_id_list)
106 |     
107 |     # convert sampling rate
108 |     convert_wav_sampling_rate(args)
109 |     
110 |     # extract hubert
111 |     # confirm the sampled audio 
112 |     audioname = args.audio.split("/")[-1].split(".")[0] # LetItGo
113 |     
114 |     sampled_audio = os.path.join(args.save_sample_dir, str(args.sampling_rate), f"{audioname}.wav")
115 |     # AToM/data/sampled_audio/19200/LetItGo.wav
116 |     if not os.path.exists(sampled_audio):
117 |         pass # RunTimeError
118 |     hubert_dir = os.path.join(args.save_hubert_dir, str(args.sampling_rate))
119 |     os.makedirs(hubert_dir, exist_ok=True)
120 |     hubert_name = os.path.join(hubert_dir, f"{audioname}.npy")
121 |     speech_, _ = sf.read(sampled_audio)
122 |     hubert_ = get_hubert_from_speech(args, speech_)
123 |     np.save(hubert_name, hubert_.detach().numpy())
124 |     print("Finished preprocessing audio.\n")


--------------------------------------------------------------------------------
/MToV/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.extend(["."])
  4 | 
  5 | import os
  6 | import argparse
  7 | 
  8 | import torch
  9 | from omegaconf import OmegaConf
 10 | 
 11 | from exps.diffusion import diffusion
 12 | from exps.first_stage import first_stage
 13 | from exps.first_stage_ldmk import first_stage_ldmk
 14 | 
 15 | import pdb
 16 | from utils import set_random_seed
 17 | 
 18 | 
 19 | parser = argparse.ArgumentParser()
 20 | parser.add_argument("--exp", type=str, required=True, help="experiment name to run")
 21 | parser.add_argument("--seed", type=int, default=42, help="random seed")
 22 | parser.add_argument("--id", type=str, default="main", help="experiment identifier")
 23 | parser.add_argument("--log_dir", type=str, default=None)
 24 | 
 25 | """ Args about Data """
 26 | parser.add_argument("--data", type=str, default="UCF101")
 27 | parser.add_argument("--batch_size", type=int, default=24)
 28 | parser.add_argument("--timesteps", type=int, default=4)
 29 | parser.add_argument("--ds", type=int, default=4)
 30 | parser.add_argument("--typetype", type=str, default="x")
 31 | 
 32 | """ Args about Model """
 33 | parser.add_argument("--pretrain_config", type=str, default="configs/autoencoder/autoencoder_kl_f4d6_res128.yaml")
 34 | parser.add_argument("--diffusion_config", type=str, default="configs/latent-diffusion/ucf101-ldm-kl-3_res128.yaml")
 35 | parser.add_argument("--train_id_txt", type=str, default=None)
 36 | 
 37 | # for GAN resume
 38 | parser.add_argument(
 39 |     "--first_stage_folder",
 40 |     type=str,
 41 |     default="",
 42 |     help="the folder of first stage experiment before GAN",
 43 | )
 44 | 
 45 | # for diffusion model path specification
 46 | parser.add_argument("--first_model", type=str, default="", help="the path of pretrained model")
 47 | parser.add_argument("--first_model_ldmk", type=str, default="", help="the path of pretrained model")
 48 | parser.add_argument("--second_model", type=str, default="", help="the path of pretrained model")
 49 | parser.add_argument("--scale_lr", action="store_true")
 50 | 
 51 | 
 52 | def main():
 53 |     """Additional args ends here."""
 54 |     args = parser.parse_args()
 55 |     """ FIX THE RANDOMNESS """
 56 |     set_random_seed(args.seed)
 57 |     torch.backends.cudnn.deterministic = True
 58 |     torch.backends.cudnn.benchmark = False
 59 | 
 60 |     args.n_gpus = torch.cuda.device_count()
 61 | 
 62 |     # init and save configs
 63 | 
 64 |     """ RUN THE EXP """
 65 |     if args.exp == "ddpm":
 66 |         config = OmegaConf.load(args.diffusion_config)
 67 |         first_stage_config = OmegaConf.load(args.pretrain_config)
 68 | 
 69 |         args.unetconfig = config.model.params.unet_config
 70 |         args.lr = config.model.base_learning_rate
 71 |         args.scheduler = config.model.params.scheduler_config
 72 |         args.res = first_stage_config.model.params.ddconfig.resolution
 73 |         args.timesteps = first_stage_config.model.params.ddconfig.timesteps
 74 |         args.skip = first_stage_config.model.params.ddconfig.skip
 75 |         args.ddconfig = first_stage_config.model.params.ddconfig
 76 |         args.embed_dim = first_stage_config.model.params.embed_dim
 77 |         args.ddpmconfig = config.model.params
 78 |         args.cond_model = config.model.cond_model
 79 | 
 80 |         if args.n_gpus == 1:
 81 |             diffusion(rank=0, args=args)
 82 |         else:
 83 |             torch.multiprocessing.spawn(fn=diffusion, args=(args,), nprocs=args.n_gpus)
 84 | 
 85 |     elif args.exp == "first_stage":
 86 |         config = OmegaConf.load(args.pretrain_config)
 87 |         args.ddconfig = config.model.params.ddconfig
 88 |         args.embed_dim = config.model.params.embed_dim
 89 |         args.lossconfig = config.model.params.lossconfig
 90 |         args.lr = config.model.base_learning_rate
 91 |         args.res = config.model.params.ddconfig.resolution
 92 |         args.timesteps = config.model.params.ddconfig.timesteps
 93 |         args.skip = config.model.params.ddconfig.skip
 94 |         args.resume = config.model.resume
 95 |         args.amp = config.model.amp
 96 |         if args.n_gpus == 1:
 97 |             first_stage(rank=0, args=args)
 98 |         else:
 99 |             torch.multiprocessing.spawn(fn=first_stage, args=(args,), nprocs=args.n_gpus)
100 | 
101 |     elif args.exp == "first_stage_ldmk":
102 |         config = OmegaConf.load(args.pretrain_config)
103 |         args.ddconfig = config.model.params.ddconfig
104 |         args.embed_dim = config.model.params.embed_dim
105 |         args.lossconfig = config.model.params.lossconfig
106 |         args.lr = config.model.base_learning_rate
107 |         args.res = config.model.params.ddconfig.resolution
108 |         args.timesteps = config.model.params.ddconfig.timesteps
109 |         args.skip = config.model.params.ddconfig.skip
110 |         args.resume = True
111 |         args.amp = config.model.amp
112 |         if args.n_gpus == 1:
113 |             first_stage_ldmk(rank=0, args=args)
114 |         else:
115 |             torch.multiprocessing.spawn(fn=first_stage, args=(args,), nprocs=args.n_gpus)
116 | 
117 |     elif args.exp == "visu_first_stage":
118 |         config = OmegaConf.load(args.pretrain_config)
119 |         args.ddconfig = config.model.params.ddconfig
120 |         args.embed_dim = config.model.params.embed_dim
121 |         args.lossconfig = config.model.params.lossconfig
122 |         args.lr = config.model.base_learning_rate
123 |         args.res = config.model.params.ddconfig.resolution
124 |         args.timesteps = config.model.params.ddconfig.timesteps
125 |         args.skip = config.model.params.ddconfig.skip
126 |         args.resume = config.model.resume
127 |         args.amp = config.model.amp
128 |         if args.n_gpus == 1:
129 |             visu_first_stage(rank=0, args=args)
130 |         else:
131 |             torch.multiprocessing.spawn(fn=first_stage, args=(args,), nprocs=args.n_gpus)
132 | 
133 |     else:
134 |         raise ValueError("Unknown experiment.")
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     main()
139 | 


--------------------------------------------------------------------------------