├── .gitignore ├── LICENSE ├── README.md ├── bash ├── download.sh ├── inference.sh ├── train_video_styleheat.sh └── train_video_warper.sh ├── configs ├── config.py ├── inference.yaml ├── path.py ├── video_styleheat_trainer.yaml └── video_warper_trainer.yaml ├── data ├── __init__.py ├── audio_dataset.py ├── inference_dataset.py ├── video_dataset.py └── vox_dataset.py ├── dnnlib ├── __init__.py └── util.py ├── docs ├── demo │ ├── images │ │ ├── 100.jpg │ │ └── 40.jpg │ └── intuitive_edit │ │ └── expression.pth └── images │ ├── 402_poster.jpg │ └── input.jpg ├── inference.py ├── loss ├── gan_loss.py ├── iterative_inversion_loss.py ├── lpips.py └── perceptual.py ├── models ├── __init__.py ├── e4e │ ├── __init__.py │ ├── e4e.py │ └── helpers.py ├── hfgi │ ├── __init__.py │ ├── backbone.py │ ├── editing.py │ └── hfgi.py ├── stylegan2 │ ├── __init__.py │ ├── model.py │ └── op │ │ ├── __init__.py │ │ ├── fused_act.py │ │ ├── fused_bias_act.cpp │ │ ├── fused_bias_act_kernel.cu │ │ ├── upfirdn2d.cpp │ │ ├── upfirdn2d.py │ │ └── upfirdn2d_kernel.cu └── styleheat │ ├── __init__.py │ ├── base_function.py │ ├── calibration_net.py │ ├── styleheat.py │ └── warper.py ├── requirements.txt ├── third_part ├── Deep3DFaceRecon_pytorch │ ├── models │ │ ├── __init__.py │ │ ├── arcface_torch │ │ │ ├── README.md │ │ │ ├── backbones │ │ │ │ ├── __init__.py │ │ │ │ ├── iresnet.py │ │ │ │ ├── iresnet2060.py │ │ │ │ └── mobilefacenet.py │ │ │ ├── configs │ │ │ │ ├── 3millions.py │ │ │ │ ├── 3millions_pfc.py │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── glint360k_mbf.py │ │ │ │ ├── glint360k_r100.py │ │ │ │ ├── glint360k_r18.py │ │ │ │ ├── glint360k_r34.py │ │ │ │ ├── glint360k_r50.py │ │ │ │ ├── ms1mv3_mbf.py │ │ │ │ ├── ms1mv3_r18.py │ │ │ │ ├── ms1mv3_r2060.py │ │ │ │ ├── ms1mv3_r34.py │ │ │ │ ├── ms1mv3_r50.py │ │ │ │ └── speed.py │ │ │ ├── dataset.py │ │ │ ├── docs │ │ │ │ ├── eval.md │ │ │ │ ├── install.md │ │ │ │ ├── modelzoo.md │ │ │ │ └── speed_benchmark.md │ │ │ ├── eval │ │ │ │ ├── __init__.py │ │ │ │ └── verification.py │ │ │ ├── eval_ijbc.py │ │ │ ├── inference.py │ │ │ ├── losses.py │ │ │ ├── onnx_helper.py │ │ │ ├── onnx_ijbc.py │ │ │ ├── partial_fc.py │ │ │ ├── requirement.txt │ │ │ ├── run.sh │ │ │ ├── torch2onnx.py │ │ │ ├── train.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── plot.py │ │ │ │ ├── utils_amp.py │ │ │ │ ├── utils_callbacks.py │ │ │ │ ├── utils_config.py │ │ │ │ ├── utils_logging.py │ │ │ │ └── utils_os.py │ │ ├── base_model.py │ │ ├── bfm.py │ │ ├── facerecon_model.py │ │ ├── losses.py │ │ ├── networks.py │ │ └── template_model.py │ ├── options │ │ ├── __init__.py │ │ ├── base_options.py │ │ ├── inference_options.py │ │ └── test_options.py │ └── util │ │ ├── BBRegressorParam_r.mat │ │ ├── __init__.py │ │ ├── generate_list.py │ │ ├── html.py │ │ ├── load_mats.py │ │ ├── nvdiffrast.py │ │ ├── preprocess.py │ │ ├── skin_mask.py │ │ ├── test_mean_face.txt │ │ ├── util.py │ │ └── visualizer.py ├── PerceptualSimilarity │ ├── models │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── dist_model.py │ │ ├── models.py │ │ ├── networks_basic.py │ │ └── pretrained_networks.py │ ├── util │ │ ├── __init__.py │ │ ├── html.py │ │ ├── util.py │ │ └── visualizer.py │ └── weights │ │ ├── v0.0 │ │ ├── alex.pth │ │ ├── squeeze.pth │ │ └── vgg.pth │ │ └── v0.1 │ │ ├── alex.pth │ │ ├── squeeze.pth │ │ └── vgg.pth └── decalib │ ├── __init__.py │ ├── datasets │ ├── aflw2000.py │ ├── build_datasets.py │ ├── datasets.py │ ├── detectors.py │ ├── ethnicity.py │ ├── now.py │ ├── train_datasets.py │ ├── vggface.py │ └── vox.py │ ├── deca.py │ ├── models │ ├── FLAME.py │ ├── decoders.py │ ├── encoders.py │ ├── frnet.py │ ├── lbs.py │ └── resnet.py │ ├── trainer.py │ └── utils │ ├── config.py │ ├── lossfunc.py │ ├── rasterizer │ ├── INSTALL.md │ ├── __init__.py │ ├── setup.py │ ├── standard_rasterize_cuda.cpp │ └── standard_rasterize_cuda_kernel.cu │ ├── renderer.py │ ├── rotation_converter.py │ ├── tensor_cropper.py │ ├── trainer.py │ └── util.py ├── train.py ├── trainers ├── base_trainer.py ├── video_styleheat_trainer.py └── video_warper_trainer.py └── utils ├── common.py ├── cudnn.py ├── distributed.py ├── flow_util.py ├── inference_util.py ├── init_weight.py ├── landmark.py ├── logging.py ├── lpips.py ├── meters.py ├── misc.py ├── trainer.py ├── video_preprocess ├── __init__.py ├── align_face.py ├── crop_videos.py ├── crop_videos_inference.py ├── extract_3dmm.py └── extract_landmark.py └── video_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Mobile Tools for Java (J2ME) 2 | #.mtj.tmp/ 3 | 4 | # Package Files # 5 | *.mp4 6 | checkpoints/ 7 | BFM/ 8 | __pycache__ 9 | __MACOSX 10 | *.zip 11 | docs/demo/ 12 | third_part/SadTalker/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Feii 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bash/download.sh: -------------------------------------------------------------------------------- 1 | 2 | mkdir checkpoints 3 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/Encoder_e4e.pth -O ./checkpoints/Encoder_e4e.pth 4 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/hfgi.pth -O ./checkpoints/hfgi.pth 5 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/StyleGAN_e4e.pth -O ./checkpoints/StyleGAN_e4e.pth 6 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/ffhq_pca.pt -O ./checkpoints/ffhq_pca.pt 7 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/ffhq_PCA.npz -O ./checkpoints/ffhq_PCA.npz 8 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/interfacegan_directions-20230323T133213Z-001.zip \ 9 | -O ./checkpoints/interfacegan_directions-20230323T133213Z-001.zip 10 | unzip ./checkpoints/interfacegan_directions-20230323T133213Z-001.zip -d ./checkpoints/ 11 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/stylegan2_d_256.pth -O ./checkpoints/stylegan2_d_256.pth 12 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/model_ir_se50.pth -O ./checkpoints/model_ir_se50.pth 13 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/StyleHEAT_visual.pt -O ./checkpoints/StyleHEAT_visual.pt 14 | mkdir ./checkpoints/Deep3D/ 15 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/epoch_20.pth -O ./checkpoints/epoch_20.pth 16 | mv checkpoints/epoch_20.pth checkpoints/Deep3D/epoch_20.pth 17 | wget https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/BFM_Fitting.zip -O ./checkpoints/BFM_Fitting.zip 18 | unzip ./checkpoints/BFM_Fitting.zip -d ./checkpoints/BFM/ 19 | mv ./checkpoints/BFM/BFM_Fitting/* ./checkpoints/BFM/ 20 | rm -r ./checkpoints/BFM/BFM_Fitting 21 | 22 | wget https://github.com/FeiiYin/StyleHEAT/releases/download/styleheat/videos.zip -O ./checkpoints/videos.zip 23 | unzip ./checkpoints/videos.zip -d ./checkpoints/ 24 | rm -rf ./checkpoints/__MACOSX 25 | rm ./checkpoints/videos.zip 26 | 27 | rm -rf docs/demo/videos/ 28 | rm -rf docs/demo/audios/ 29 | mkdir docs/demo/videos/ 30 | mkdir docs/demo/audios/ 31 | mv ./checkpoints/videos/audios/* docs/demo/audios/ 32 | rm -rf ./checkpoints/videos/audios/ 33 | mv ./checkpoints/videos/* docs/demo/videos/ 34 | 35 | # pip install -i https://mirrors.cloud.tencent.com/pypi/simple pydub==0.25.1 yacs==0.1.8 librosa==0.6.0 numba==0.48.0 resampy==0.3.1 imageio-ffmpeg==0.4.7 -------------------------------------------------------------------------------- /bash/inference.sh: -------------------------------------------------------------------------------- 1 | echo inference 2 | python3 inference.py \ 3 | --config configs/inference.yaml \ 4 | --video_source=docs/demo/videos/RD_Radio34_003_512.mp4 \ 5 | --image_source=docs/demo/images/100.jpg \ 6 | --cross_id \ 7 | --output_dir=docs/demo/output/ \ 8 | --frame_limit=100 --inversion_option=encode --if_align --if_extract 9 | 10 | 11 | python inference.py \ 12 | --config configs/inference.yaml \ 13 | --video_source=./docs/demo/videos/ \ 14 | --output_dir=./docs/demo/output --if_extract -------------------------------------------------------------------------------- /bash/train_video_styleheat.sh: -------------------------------------------------------------------------------- 1 | export BASICSR_JIT='True' 2 | 3 | name=train_video_styleheat 4 | python -m torch.distributed.launch --nproc_per_node=4 --master_port 12347 train.py \ 5 | --checkpoints_dir=./output \ 6 | --config configs/video_styleheat_trainer.yaml --name ${name} 7 | 8 | -------------------------------------------------------------------------------- /bash/train_video_warper.sh: -------------------------------------------------------------------------------- 1 | export BASICSR_JIT='True' 2 | 3 | name=train_video_warper 4 | python -m torch.distributed.launch --nproc_per_node=4 --master_port 12347 train.py \ 5 | --checkpoints_dir=./output \ 6 | --config configs/video_warper_trainer.yaml --name ${name} 7 | 8 | 9 | -------------------------------------------------------------------------------- /configs/inference.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | 3 | model: 4 | mode: inference 5 | enable_audio: False 6 | free_styler_path: checkpoints/StyleHEAT_visual.pt 7 | -------------------------------------------------------------------------------- /configs/path.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | PRETRAINED_MODELS_PATH = { 4 | # models 5 | 'e4e': 'checkpoints/Encoder_e4e.pth', 6 | 'hfgi': 'checkpoints/hfgi.pth', 7 | 'stylegan2': 'checkpoints/StyleGAN_e4e.pth', 8 | # editing 9 | 'interfacegan': 'checkpoints/interfacegan_directions/', 10 | 'ganspace': 'checkpoints/ffhq_pca.pt', 11 | 'FFHQ_PCA': 'checkpoints/ffhq_PCA.npz', 12 | '': '', 13 | # pretrain 14 | 'discriminator': 'checkpoints/stylegan2_d_256.pth', 15 | 'video_warper': 'checkpoints/video_warper.pth', 16 | 'styleheat': 'checkpoints/StyleHEAT_visual.pt', 17 | # id_loss 18 | 'irse50': 'checkpoints/model_ir_se50.pth', 19 | # 3DMM 20 | 'BFM': 'checkpoints/BFM', 21 | '3DMM': 'checkpoints/Deep3D/epoch_20.pth', 22 | } 23 | -------------------------------------------------------------------------------- /configs/video_styleheat_trainer.yaml: -------------------------------------------------------------------------------- 1 | distributed: True 2 | image_to_tensorboard: False 3 | snapshot_save_iter: 20000 4 | snapshot_save_epoch: 1 5 | snapshot_save_start_iter: 10000 6 | snapshot_save_start_epoch: 0 7 | image_save_iter: 1000 8 | max_epoch: 8 9 | logging_iter: 200 10 | results_dir: ./eval_results 11 | 12 | 13 | gen_optimizer: 14 | type: adam 15 | lr: 0.0001 16 | adam_beta1: 0.5 17 | adam_beta2: 0.999 18 | lr_policy: 19 | iteration_mode: True 20 | type: step 21 | step_size: 300000 22 | gamma: 0.2 23 | 24 | trainer: 25 | type: trainers.video_styleheat_trainer::VideoStyleHEATTrainer 26 | pretrain_warp_iteration: 200000 27 | loss_weight: 28 | weight_perceptual_warp: 0.02 29 | weight_perceptual_final: 0.2 30 | weight_perceptual_regular: 0.01 31 | weight_gan_loss: 1.0 32 | weight_local_loss: 2 33 | vgg_param_warp: 34 | network: vgg19 35 | layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'] 36 | use_style_loss: False 37 | num_scales: 4 38 | vgg_param_final: 39 | network: vgg19 40 | layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'] 41 | use_style_loss: True 42 | num_scales: 4 43 | style_to_perceptual: 250 44 | init: 45 | type: 'normal' 46 | gain: 0.02 47 | 48 | 49 | model: 50 | type: models.styleheat.styleheat::StyleHEAT 51 | mode: train_visual_refine 52 | enable_audio: False 53 | visual_warper_path: ./checkpoints/video_warper.pth 54 | path: 55 | optimized_param: calibrator,video_warper 56 | from_scratch_param: calibrator 57 | 58 | 59 | # Data options. 60 | data: 61 | type: data.video_dataset::VideoDataset 62 | path: # HDTFPATH_TOBEMODIFIED 63 | resolution: 512 64 | semantic_radius: 13 65 | train: 66 | batch_size: 4 67 | distributed: True 68 | val: 69 | batch_size: 4 70 | distributed: True 71 | -------------------------------------------------------------------------------- /configs/video_warper_trainer.yaml: -------------------------------------------------------------------------------- 1 | distributed: True 2 | image_to_tensorboard: True 3 | snapshot_save_iter: 20000 4 | snapshot_save_epoch: 2 5 | snapshot_save_start_iter: 20000 6 | snapshot_save_start_epoch: 0 7 | image_save_iter: 1000 8 | max_epoch: 200 9 | logging_iter: 500 10 | results_dir: ./eval_results 11 | 12 | 13 | gen_optimizer: 14 | type: adam 15 | lr: 0.0001 16 | adam_beta1: 0.5 17 | adam_beta2: 0.999 18 | lr_policy: 19 | iteration_mode: True 20 | type: step 21 | step_size: 300000 22 | gamma: 0.2 23 | 24 | trainer: 25 | type: trainers.video_warper_trainer::VideoWarperTrainer 26 | pretrain_warp_iteration: 200000 27 | loss_weight: 28 | weight_perceptual_warp: 1 29 | weight_perceptual_warp_middle: 0.5 30 | vgg_param_warp: 31 | network: vgg19 32 | layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'] 33 | use_style_loss: False 34 | num_scales: 4 35 | init: 36 | type: 'normal' 37 | gain: 0.02 38 | 39 | 40 | model: 41 | type: models.styleheat.warper::VideoWarper 42 | mode: train_video_warper 43 | optimized_param: all 44 | from_scratch_param: all 45 | 46 | 47 | # Data options. 48 | data: 49 | type: data.vox_dataset::VoxDataset 50 | path: # VOXPATH_TOBEMODIFIED 51 | resolution: 256 52 | semantic_radius: 13 53 | train: 54 | batch_size: 16 55 | distributed: True 56 | val: 57 | batch_size: 16 58 | distributed: True 59 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | import torch.utils.data 4 | from utils.distributed import master_only_print as print 5 | 6 | 7 | def find_dataset_using_name(dataset_name): 8 | dataset_filename = dataset_name 9 | module, target = dataset_name.split('::') 10 | datasetlib = importlib.import_module(module) 11 | dataset = None 12 | for name, cls in datasetlib.__dict__.items(): 13 | if name == target: 14 | dataset = cls 15 | 16 | if dataset is None: 17 | raise ValueError("In %s.py, there should be a class " 18 | "with class name that matches %s in lowercase." % 19 | (dataset_filename, target)) 20 | return dataset 21 | 22 | 23 | def get_option_setter(dataset_name): 24 | dataset_class = find_dataset_using_name(dataset_name) 25 | return dataset_class.modify_commandline_options 26 | 27 | 28 | def create_dataloader(opt, is_inference): 29 | dataset = find_dataset_using_name(opt.type) 30 | instance = dataset(opt, is_inference) 31 | phase = 'val' if is_inference else 'training' 32 | batch_size = opt.val.batch_size if is_inference else opt.train.batch_size 33 | print("%s dataset [%s] of size %d was created" % 34 | (phase, opt.type, len(instance))) 35 | dataloader = torch.utils.data.DataLoader( 36 | instance, 37 | batch_size=batch_size, 38 | sampler=data_sampler(instance, shuffle=not is_inference, distributed=opt.train.distributed), 39 | drop_last=not is_inference, 40 | num_workers=getattr(opt, 'num_workers', 0), 41 | ) 42 | 43 | return dataloader 44 | 45 | 46 | def data_sampler(dataset, shuffle, distributed): 47 | if distributed: 48 | return torch.utils.data.distributed.DistributedSampler(dataset, shuffle=shuffle) 49 | if shuffle: 50 | return torch.utils.data.RandomSampler(dataset) 51 | else: 52 | return torch.utils.data.SequentialSampler(dataset) 53 | 54 | 55 | def get_dataloader(opt, is_inference=False): 56 | dataset = create_dataloader(opt, is_inference=is_inference) 57 | return dataset 58 | 59 | 60 | def get_train_val_dataloader(opt): 61 | val_dataset = create_dataloader(opt, is_inference=True) 62 | train_dataset = create_dataloader(opt, is_inference=False) 63 | return val_dataset, train_dataset 64 | -------------------------------------------------------------------------------- /dnnlib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .util import EasyDict, make_cache_dir_path 10 | -------------------------------------------------------------------------------- /docs/demo/images/100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/docs/demo/images/100.jpg -------------------------------------------------------------------------------- /docs/demo/images/40.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/docs/demo/images/40.jpg -------------------------------------------------------------------------------- /docs/demo/intuitive_edit/expression.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/docs/demo/intuitive_edit/expression.pth -------------------------------------------------------------------------------- /docs/images/402_poster.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/docs/images/402_poster.jpg -------------------------------------------------------------------------------- /docs/images/input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/docs/images/input.jpg -------------------------------------------------------------------------------- /loss/gan_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from basicsr.losses import build_loss 6 | from configs.path import PRETRAINED_MODELS_PATH 7 | from models.stylegan2.model import Discriminator 8 | from utils.distributed import master_only_print as print 9 | 10 | 11 | class GANLoss(nn.Module): 12 | 13 | def __init__(self): 14 | super().__init__() 15 | self.net_d = Discriminator(size=256, channel_multiplier=2) 16 | self.net_d = self.net_d.to('cuda') 17 | 18 | print('Load pre-trained StyleGAN2 256 discriminator done.') 19 | pretrained_path = PRETRAINED_MODELS_PATH['discriminator'] 20 | ckpt = torch.load(pretrained_path, map_location='cpu') 21 | self.net_d.load_state_dict(ckpt) 22 | self.net_d.eval() 23 | 24 | gan_opt = { 25 | 'type': 'GANLoss', 26 | 'gan_type': 'wgan_softplus', 27 | 'loss_weight': 1e-1 28 | } 29 | self.gan_loss = build_loss(gan_opt).to('cuda') 30 | 31 | def forward(self, fake_image): 32 | # Note: for use the pre-trained discriminator, the batchsize are supposed to be times of 4 33 | fake_image = F.interpolate(fake_image, (256, 256), mode='bilinear', align_corners=False) 34 | fake_g_pred = self.net_d(fake_image) 35 | loss = self.gan_loss(fake_g_pred, target_is_real=False, is_disc=False) 36 | return loss 37 | -------------------------------------------------------------------------------- /loss/iterative_inversion_loss.py: -------------------------------------------------------------------------------- 1 | # borrowed from https://github.com/ZPdesu/Barbershop/losses/embedding_loss.py 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from loss.perceptual import PerceptualLoss 8 | from configs.path import PRETRAINED_MODELS_PATH 9 | 10 | 11 | class EmbeddingLossBuilder(nn.Module): 12 | def __init__(self): 13 | super(EmbeddingLossBuilder, self).__init__() 14 | # self.parsed_loss = [[opt.l2_lambda, 'l2'], [opt.perceptual_lambda, 'percep']] 15 | # perceptualual loss 16 | self.l2 = torch.nn.MSELoss() 17 | self.perceptual = PerceptualLoss( 18 | network='vgg19', 19 | layers=['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'], 20 | num_scales=4, 21 | use_style_loss=True, 22 | weight_style_to_perceptual=250 23 | ).to('cuda') 24 | # for p_norm loss 25 | self.load_PCA_model() 26 | 27 | def _loss_l2(self, gen_im, ref_im, **kwargs): 28 | return self.l2(gen_im, ref_im) 29 | 30 | def _loss_lpips(self, gen_im, ref_im, **kwargs): 31 | return self.perceptual(gen_im, ref_im) 32 | 33 | def forward(self, fake_image, gt_image, latent_in, latent_F, F_init): 34 | loss = 0 35 | loss_dic = {} 36 | loss_l2 = self._loss_l2(fake_image, gt_image) * 1.0 37 | loss_dic['l2'] = loss_l2 38 | loss += loss_l2 39 | 40 | # TODO maybe downsample is not necessary 41 | fake_image_256 = nn.functional.interpolate(fake_image, (256, 256), mode='bilinear', align_corners=False) 42 | gt_image_256 = nn.functional.interpolate(gt_image, (256, 256), mode='bilinear', align_corners=False) 43 | loss_lpips = self._loss_lpips(fake_image_256, gt_image_256) * 1.0 44 | loss_dic['lpips'] = loss_lpips 45 | loss += loss_lpips 46 | 47 | p_norm_loss = self.cal_p_norm_loss(latent_in) # done 48 | loss_dic['p-norm'] = p_norm_loss 49 | loss += p_norm_loss 50 | 51 | l_F = self.cal_l_F(latent_F, F_init) 52 | loss_dic['l_F'] = l_F 53 | loss += l_F 54 | return loss, loss_dic 55 | 56 | def cal_l_F(self, latent_F, F_init): 57 | self.l_F_lambda = 0.1 58 | return self.l_F_lambda * (latent_F - F_init).pow(2).mean() 59 | 60 | def cal_p_norm_loss(self, latent_in): 61 | latent_p_norm = (torch.nn.LeakyReLU(negative_slope=5)(latent_in) - self.X_mean).bmm( 62 | self.X_comp.T.unsqueeze(0)) / self.X_stdev 63 | p_norm_loss = self.p_norm_lambda * (latent_p_norm.pow(2).mean()) 64 | return p_norm_loss 65 | 66 | def load_PCA_model(self): 67 | device = 'cuda' 68 | PCA_path = PRETRAINED_MODELS_PATH['FFHQ_PCA'] 69 | 70 | PCA_model = np.load(PCA_path) 71 | self.X_mean = torch.from_numpy(PCA_model['X_mean']).float().to(device) 72 | self.X_comp = torch.from_numpy(PCA_model['X_comp']).float().to(device) 73 | self.X_stdev = torch.from_numpy(PCA_model['X_stdev']).float().to(device) 74 | self.p_norm_lambda = 0.001 75 | 76 | # def build_PCA_model(self, PCA_path): 77 | # with torch.no_grad(): 78 | # latent = torch.randn((1000000, 512), dtype=torch.float32) 79 | # # latent = torch.randn((10000, 512), dtype=torch.float32) 80 | # self.generator.style.cpu() 81 | # pulse_space = torch.nn.LeakyReLU(5)(self.generator.style(latent)).numpy() 82 | # self.generator.style.to(self.opts.device) 83 | # 84 | # from utils.PCA_utils import IPCAEstimator 85 | # 86 | # transformer = IPCAEstimator(512) 87 | # X_mean = pulse_space.mean(0) 88 | # transformer.fit(pulse_space - X_mean) 89 | # X_comp, X_stdev, X_var_ratio = transformer.get_components() 90 | # np.savez(PCA_path, X_mean=X_mean, X_comp=X_comp, X_stdev=X_stdev, X_var_ratio=X_var_ratio) 91 | -------------------------------------------------------------------------------- /loss/lpips.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | import torch 3 | import torch.nn as nn 4 | from torchvision import models 5 | from collections import OrderedDict 6 | 7 | 8 | def get_state_dict(net_type: str = 'alex', version: str = '0.1'): 9 | # build url 10 | url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \ 11 | + f'master/lpips/weights/v{version}/{net_type}.pth' 12 | 13 | # download 14 | old_state_dict = torch.hub.load_state_dict_from_url( 15 | url, progress=True, 16 | map_location=None if torch.cuda.is_available() else torch.device('cpu') 17 | ) 18 | 19 | # rename keys 20 | new_state_dict = OrderedDict() 21 | for key, val in old_state_dict.items(): 22 | new_key = key 23 | new_key = new_key.replace('lin', '') 24 | new_key = new_key.replace('model.', '') 25 | new_state_dict[new_key] = val 26 | 27 | return new_state_dict 28 | 29 | 30 | def normalize_activation(x, eps=1e-10): 31 | norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True)) 32 | return x / (norm_factor + eps) 33 | 34 | 35 | class LinLayers(nn.ModuleList): 36 | def __init__(self, n_channels_list): 37 | super(LinLayers, self).__init__([ 38 | nn.Sequential( 39 | nn.Identity(), 40 | nn.Conv2d(nc, 1, 1, 1, 0, bias=False) 41 | ) for nc in n_channels_list 42 | ]) 43 | 44 | for param in self.parameters(): 45 | param.requires_grad = False 46 | 47 | 48 | class BaseNet(nn.Module): 49 | def __init__(self): 50 | super(BaseNet, self).__init__() 51 | 52 | # register buffer 53 | self.register_buffer( 54 | 'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None]) 55 | self.register_buffer( 56 | 'std', torch.Tensor([.458, .448, .450])[None, :, None, None]) 57 | 58 | def set_requires_grad(self, state: bool): 59 | for param in chain(self.parameters(), self.buffers()): 60 | param.requires_grad = state 61 | 62 | def z_score(self, x: torch.Tensor): 63 | return (x - self.mean) / self.std 64 | 65 | def forward(self, x: torch.Tensor): 66 | x = self.z_score(x) 67 | 68 | output = [] 69 | for i, (_, layer) in enumerate(self.layers._modules.items(), 1): 70 | x = layer(x) 71 | if i in self.target_layers: 72 | output.append(normalize_activation(x)) 73 | if len(output) == len(self.target_layers): 74 | break 75 | return output 76 | 77 | 78 | class SqueezeNet(BaseNet): 79 | def __init__(self): 80 | super(SqueezeNet, self).__init__() 81 | 82 | self.layers = models.squeezenet1_1(True).features 83 | self.target_layers = [2, 5, 8, 10, 11, 12, 13] 84 | self.n_channels_list = [64, 128, 256, 384, 384, 512, 512] 85 | 86 | self.set_requires_grad(False) 87 | 88 | 89 | class AlexNet(BaseNet): 90 | def __init__(self): 91 | super(AlexNet, self).__init__() 92 | 93 | self.layers = models.alexnet(True).features 94 | self.target_layers = [2, 5, 8, 10, 12] 95 | self.n_channels_list = [64, 192, 384, 256, 256] 96 | 97 | self.set_requires_grad(False) 98 | 99 | 100 | class VGG16(BaseNet): 101 | def __init__(self): 102 | super(VGG16, self).__init__() 103 | 104 | self.layers = models.vgg16(True).features 105 | self.target_layers = [4, 9, 16, 23, 30] 106 | self.n_channels_list = [64, 128, 256, 512, 512] 107 | 108 | self.set_requires_grad(False) 109 | 110 | 111 | def get_network(net_type: str): 112 | if net_type == 'alex': 113 | return AlexNet() 114 | elif net_type == 'squeeze': 115 | return SqueezeNet() 116 | elif net_type == 'vgg': 117 | return VGG16() 118 | else: 119 | raise NotImplementedError('choose net_type from [alex, squeeze, vgg].') 120 | 121 | 122 | 123 | class LPIPS(nn.Module): 124 | r"""Creates a criterion that measures 125 | Learned Perceptual Image Patch Similarity (LPIPS). 126 | Arguments: 127 | net_type (str): the network type to compare the features: 128 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'. 129 | version (str): the version of LPIPS. Default: 0.1. 130 | """ 131 | def __init__(self, net_type: str = 'alex', version: str = '0.1'): 132 | 133 | assert version in ['0.1'], 'v0.1 is only supported now' 134 | 135 | super(LPIPS, self).__init__() 136 | 137 | # pretrained network 138 | self.net = get_network(net_type).to("cuda") 139 | 140 | # linear layers 141 | self.lin = LinLayers(self.net.n_channels_list).to("cuda") 142 | self.lin.load_state_dict(get_state_dict(net_type, version)) 143 | 144 | def forward(self, x: torch.Tensor, y: torch.Tensor): 145 | feat_x, feat_y = self.net(x), self.net(y) 146 | 147 | diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)] 148 | res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)] 149 | 150 | return torch.sum(torch.cat(res, 0)) / x.shape[0] 151 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/e4e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/models/e4e/__init__.py -------------------------------------------------------------------------------- /models/e4e/e4e.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch.nn as nn 4 | 5 | from enum import Enum 6 | from models.stylegan2.model import EqualLinear 7 | from models.e4e.helpers import get_blocks, bottleneck_IR, bottleneck_IR_SE, _upsample_add 8 | 9 | 10 | class ProgressiveStage(Enum): 11 | WTraining = 0 12 | Delta1Training = 1 13 | Delta2Training = 2 14 | Delta3Training = 3 15 | Delta4Training = 4 16 | Delta5Training = 5 17 | Delta6Training = 6 18 | Delta7Training = 7 19 | Delta8Training = 8 20 | Delta9Training = 9 21 | Delta10Training = 10 22 | Delta11Training = 11 23 | Delta12Training = 12 24 | Delta13Training = 13 25 | Delta14Training = 14 26 | Delta15Training = 15 27 | Delta16Training = 16 28 | Delta17Training = 17 29 | Inference = 18 30 | 31 | 32 | class GradualStyleBlock(nn.Module): 33 | def __init__(self, in_c, out_c, spatial): 34 | super(GradualStyleBlock, self).__init__() 35 | self.out_c = out_c 36 | self.spatial = spatial 37 | num_pools = int(np.log2(spatial)) 38 | modules = [] 39 | modules += [nn.Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1), 40 | nn.LeakyReLU()] 41 | for i in range(num_pools - 1): 42 | modules += [ 43 | nn.Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1), 44 | nn.LeakyReLU() 45 | ] 46 | self.convs = nn.Sequential(*modules) 47 | self.linear = EqualLinear(out_c, out_c, lr_mul=1) 48 | 49 | def forward(self, x): 50 | x = self.convs(x) 51 | x = x.view(-1, self.out_c) 52 | x = self.linear(x) 53 | return x 54 | 55 | 56 | class Encoder4Editing(nn.Module): 57 | def __init__(self, num_layers, mode='ir', stylegan_size=1024): 58 | super(Encoder4Editing, self).__init__() 59 | assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' 60 | assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' 61 | blocks = get_blocks(num_layers) 62 | if mode == 'ir': 63 | unit_module = bottleneck_IR 64 | elif mode == 'ir_se': 65 | unit_module = bottleneck_IR_SE 66 | self.input_layer = nn.Sequential(nn.Conv2d(3, 64, (3, 3), 1, 1, bias=False), 67 | nn.BatchNorm2d(64), 68 | nn.PReLU(64)) 69 | modules = [] 70 | for block in blocks: 71 | for bottleneck in block: 72 | modules.append(unit_module(bottleneck.in_channel, 73 | bottleneck.depth, 74 | bottleneck.stride)) 75 | self.body = nn.Sequential(*modules) 76 | 77 | self.styles = nn.ModuleList() 78 | log_size = int(math.log(stylegan_size, 2)) 79 | self.style_count = 2 * log_size - 2 80 | self.coarse_ind = 3 81 | self.middle_ind = 7 82 | 83 | for i in range(self.style_count): 84 | if i < self.coarse_ind: 85 | style = GradualStyleBlock(512, 512, 16) 86 | elif i < self.middle_ind: 87 | style = GradualStyleBlock(512, 512, 32) 88 | else: 89 | style = GradualStyleBlock(512, 512, 64) 90 | self.styles.append(style) 91 | 92 | self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0) 93 | self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0) 94 | 95 | self.progressive_stage = ProgressiveStage.Inference 96 | 97 | def get_deltas_starting_dimensions(self): 98 | """ Get a list of the initial dimension of every delta from which it is applied """ 99 | return list(range(self.style_count)) # Each dimension has a delta applied to it 100 | 101 | def set_progressive_stage(self, new_stage: ProgressiveStage): 102 | self.progressive_stage = new_stage 103 | print('Changed progressive stage to: ', new_stage) 104 | 105 | def forward(self, x): 106 | x = self.input_layer(x) 107 | 108 | modulelist = list(self.body._modules.values()) 109 | for i, l in enumerate(modulelist): 110 | x = l(x) 111 | if i == 6: 112 | c1 = x 113 | elif i == 20: 114 | c2 = x 115 | elif i == 23: 116 | c3 = x 117 | 118 | # Infer main W and duplicate it 119 | w0 = self.styles[0](c3) 120 | w = w0.repeat(self.style_count, 1, 1).permute(1, 0, 2) 121 | stage = self.progressive_stage.value 122 | features = c3 123 | for i in range(1, min(stage + 1, self.style_count)): # Infer additional deltas 124 | if i == self.coarse_ind: 125 | p2 = _upsample_add(c3, self.latlayer1(c2)) # FPN's middle features 126 | features = p2 127 | elif i == self.middle_ind: 128 | p1 = _upsample_add(p2, self.latlayer2(c1)) # FPN's fine features 129 | features = p1 130 | delta_i = self.styles[i](features) 131 | w[:, i] += delta_i 132 | return w 133 | 134 | 135 | class E4eEncoder(nn.Module): 136 | def __init__(self, latent_avg): 137 | super(E4eEncoder, self).__init__() 138 | self.encoder = Encoder4Editing(50, 'ir_se', stylegan_size=1024) 139 | self.latent_avg = latent_avg 140 | 141 | def forward(self, x): 142 | codes = self.encoder(x) 143 | # normalize with respect to the center of an average face 144 | if codes.ndim == 2: 145 | w_latent = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)[:, 0, :] 146 | else: 147 | w_latent = codes + self.latent_avg.repeat(codes.shape[0], 1, 1) 148 | return w_latent 149 | -------------------------------------------------------------------------------- /models/e4e/helpers.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.nn import Conv2d, BatchNorm2d, PReLU, ReLU, Sigmoid, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module 5 | 6 | """ 7 | ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) 8 | """ 9 | 10 | 11 | class Flatten(Module): 12 | def forward(self, input): 13 | return input.view(input.size(0), -1) 14 | 15 | 16 | def l2_norm(input, axis=1): 17 | norm = torch.norm(input, 2, axis, True) 18 | output = torch.div(input, norm) 19 | return output 20 | 21 | 22 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): 23 | """ A named tuple describing a ResNet block. """ 24 | 25 | 26 | def get_block(in_channel, depth, num_units, stride=2): 27 | return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)] 28 | 29 | 30 | def get_blocks(num_layers): 31 | if num_layers == 50: 32 | blocks = [ 33 | get_block(in_channel=64, depth=64, num_units=3), 34 | get_block(in_channel=64, depth=128, num_units=4), 35 | get_block(in_channel=128, depth=256, num_units=14), 36 | get_block(in_channel=256, depth=512, num_units=3) 37 | ] 38 | elif num_layers == 100: 39 | blocks = [ 40 | get_block(in_channel=64, depth=64, num_units=3), 41 | get_block(in_channel=64, depth=128, num_units=13), 42 | get_block(in_channel=128, depth=256, num_units=30), 43 | get_block(in_channel=256, depth=512, num_units=3) 44 | ] 45 | elif num_layers == 152: 46 | blocks = [ 47 | get_block(in_channel=64, depth=64, num_units=3), 48 | get_block(in_channel=64, depth=128, num_units=8), 49 | get_block(in_channel=128, depth=256, num_units=36), 50 | get_block(in_channel=256, depth=512, num_units=3) 51 | ] 52 | else: 53 | raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers)) 54 | return blocks 55 | 56 | 57 | class SEModule(Module): 58 | def __init__(self, channels, reduction): 59 | super(SEModule, self).__init__() 60 | self.avg_pool = AdaptiveAvgPool2d(1) 61 | self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False) 62 | self.relu = ReLU(inplace=True) 63 | self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False) 64 | self.sigmoid = Sigmoid() 65 | 66 | def forward(self, x): 67 | module_input = x 68 | x = self.avg_pool(x) 69 | x = self.fc1(x) 70 | x = self.relu(x) 71 | x = self.fc2(x) 72 | x = self.sigmoid(x) 73 | return module_input * x 74 | 75 | 76 | class bottleneck_IR(Module): 77 | def __init__(self, in_channel, depth, stride): 78 | super(bottleneck_IR, self).__init__() 79 | if in_channel == depth: 80 | self.shortcut_layer = MaxPool2d(1, stride) 81 | else: 82 | self.shortcut_layer = Sequential( 83 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), 84 | BatchNorm2d(depth) 85 | ) 86 | self.res_layer = Sequential( 87 | BatchNorm2d(in_channel), 88 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth), 89 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth) 90 | ) 91 | 92 | def forward(self, x): 93 | shortcut = self.shortcut_layer(x) 94 | res = self.res_layer(x) 95 | return res + shortcut 96 | 97 | 98 | class bottleneck_IR_SE(Module): 99 | def __init__(self, in_channel, depth, stride): 100 | super(bottleneck_IR_SE, self).__init__() 101 | if in_channel == depth: 102 | self.shortcut_layer = MaxPool2d(1, stride) 103 | else: 104 | self.shortcut_layer = Sequential( 105 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), 106 | BatchNorm2d(depth) 107 | ) 108 | self.res_layer = Sequential( 109 | BatchNorm2d(in_channel), 110 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), 111 | PReLU(depth), 112 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), 113 | BatchNorm2d(depth), 114 | SEModule(depth, 16) 115 | ) 116 | 117 | def forward(self, x): 118 | shortcut = self.shortcut_layer(x) 119 | res = self.res_layer(x) 120 | return res + shortcut 121 | 122 | 123 | def _upsample_add(x, y): 124 | """Upsample and add two feature maps. 125 | Args: 126 | x: (Variable) top feature map to be upsampled. 127 | y: (Variable) lateral feature map. 128 | Returns: 129 | (Variable) added feature map. 130 | Note in PyTorch, when input size is odd, the upsampled feature map 131 | with `F.upsample(..., scale_factor=2, mode='nearest')` 132 | maybe not equal to the lateral feature map size. 133 | e.g. 134 | original input size: [N,_,15,15] -> 135 | conv2d feature map size: [N,_,8,8] -> 136 | upsampled feature map size: [N,_,16,16] 137 | So we choose bilinear upsample which supports arbitrary output sizes. 138 | """ 139 | _, _, H, W = y.size() 140 | return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=False) + y 141 | -------------------------------------------------------------------------------- /models/hfgi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/models/hfgi/__init__.py -------------------------------------------------------------------------------- /models/hfgi/backbone.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from models.stylegan2.model import EqualConv2d, ScaledLeakyReLU 4 | from models.e4e.helpers import bottleneck_IR 5 | 6 | 7 | # Consultation encoder 8 | class ResidualEncoder(nn.Module): 9 | def __init__(self, opts=None): 10 | super(ResidualEncoder, self).__init__() 11 | self.conv_layer1 = nn.Sequential(nn.Conv2d(3, 32, (3, 3), 1, 1, bias=False), 12 | nn.BatchNorm2d(32), 13 | nn.PReLU(32)) 14 | 15 | self.conv_layer2 = nn.Sequential(*[bottleneck_IR(32,48,2), bottleneck_IR(48,48,1), bottleneck_IR(48,48,1)]) 16 | 17 | self.conv_layer3 = nn.Sequential(*[bottleneck_IR(48,64,2), bottleneck_IR(64,64,1), bottleneck_IR(64,64,1)]) 18 | 19 | self.condition_scale3 = nn.Sequential( 20 | EqualConv2d(64, 512, 3, stride=1, padding=1, bias=True ), 21 | ScaledLeakyReLU(0.2), 22 | EqualConv2d(512, 512, 3, stride=1, padding=1, bias=True )) 23 | 24 | self.condition_shift3 = nn.Sequential( 25 | EqualConv2d(64, 512, 3, stride=1, padding=1, bias=True ), 26 | ScaledLeakyReLU(0.2), 27 | EqualConv2d(512, 512, 3, stride=1, padding=1, bias=True )) 28 | 29 | def get_deltas_starting_dimensions(self): 30 | ''' Get a list of the initial dimension of every delta from which it is applied ''' 31 | return list(range(self.style_count)) # Each dimension has a delta applied to it 32 | 33 | def forward(self, x): 34 | conditions = [] 35 | feat1 = self.conv_layer1(x) 36 | feat2 = self.conv_layer2(feat1) 37 | feat3 = self.conv_layer3(feat2) 38 | 39 | scale = self.condition_scale3(feat3) 40 | scale = nn.functional.interpolate(scale, size=(64,64) , mode='bilinear') 41 | conditions.append(scale.clone()) 42 | shift = self.condition_shift3(feat3) 43 | shift = nn.functional.interpolate(shift, size=(64,64) , mode='bilinear') 44 | conditions.append(shift.clone()) 45 | return conditions 46 | 47 | 48 | # ADA 49 | class ResidualAligner(nn.Module): 50 | def __init__(self, opts=None): 51 | super(ResidualAligner, self).__init__() 52 | self.conv_layer1 = nn.Sequential(nn.Conv2d(6, 16, (3, 3), 1, 1, bias=False), 53 | nn.BatchNorm2d(16), 54 | nn.PReLU(16)) 55 | 56 | self.conv_layer2 = nn.Sequential(*[bottleneck_IR(16, 32, 2), bottleneck_IR(32, 32, 1), bottleneck_IR(32, 32, 1)]) 57 | self.conv_layer3 = nn.Sequential(*[bottleneck_IR(32, 48, 2), bottleneck_IR(48, 48, 1), bottleneck_IR(48, 48, 1)]) 58 | self.conv_layer4 = nn.Sequential(*[bottleneck_IR(48, 64, 2), bottleneck_IR(64, 64, 1), bottleneck_IR(64, 64, 1)]) 59 | 60 | self.dconv_layer1 = nn.Sequential(*[bottleneck_IR(112, 64, 1), bottleneck_IR(64, 32, 1), bottleneck_IR(32, 32, 1)]) 61 | self.dconv_layer2 = nn.Sequential(*[bottleneck_IR(64, 32, 1), bottleneck_IR(32, 16, 1), bottleneck_IR(16, 16, 1)]) 62 | self.dconv_layer3 = nn.Sequential(*[bottleneck_IR(32, 16, 1), bottleneck_IR(16, 3, 1), bottleneck_IR(3, 3, 1)]) 63 | 64 | def forward(self, x): 65 | feat1 = self.conv_layer1(x) 66 | feat2 = self.conv_layer2(feat1) 67 | feat3 = self.conv_layer3(feat2) 68 | feat4 = self.conv_layer4(feat3) 69 | 70 | feat4 = nn.functional.interpolate(feat4, size=(64, 64), mode='bilinear') 71 | dfea1 = self.dconv_layer1(torch.cat((feat4, feat3), 1)) 72 | dfea1 = nn.functional.interpolate(dfea1, size=(128, 128), mode='bilinear') 73 | dfea2 = self.dconv_layer2(torch.cat((dfea1, feat2), 1)) 74 | dfea2 = nn.functional.interpolate(dfea2, size=(256, 256), mode='bilinear') 75 | dfea3 = self.dconv_layer3(torch.cat((dfea2, feat1), 1)) 76 | 77 | res_aligned = dfea3 78 | return res_aligned 79 | -------------------------------------------------------------------------------- /models/hfgi/editing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | 5 | def get_delta(pca, latent, idx, strength): 6 | w_centered = latent - pca['mean'].to('cuda') 7 | lat_comp = pca['comp'].to('cuda') 8 | lat_std = pca['std'].to('cuda') 9 | w_coord = torch.sum(w_centered[0].reshape(-1)*lat_comp[idx].reshape(-1)) / lat_std[idx] 10 | delta = (strength - w_coord)*lat_comp[idx]*lat_std[idx] 11 | return delta 12 | 13 | 14 | def ganspace_edit(latents, pca, edit_directions): 15 | edit_latents = [] 16 | for latent in latents: 17 | for pca_idx, start, end, strength in edit_directions: 18 | delta = get_delta(pca, latent, pca_idx, strength) 19 | delta_padded = torch.zeros(latent.shape).to('cuda') 20 | delta_padded[start:end] += delta.repeat(end - start, 1) 21 | edit_latents.append(latent + delta_padded) 22 | return torch.stack(edit_latents) 23 | 24 | 25 | class LatentEditor(object): 26 | # Edit style code only without generating images 27 | def __init__(self): 28 | self.age_direction = None 29 | self.pose_direction = None 30 | self.ganspace_pca = None 31 | self.ganspace_directions = { 32 | # 'eyes': (54, 7, 8, 20), 33 | 'beard': (58, 7, 9, -20), 34 | 'lip': (34, 10, 11, 20) 35 | } 36 | 37 | def load(self, path_dic): 38 | interfacegan_root = path_dic['interfacegan'] 39 | self.age_direction = torch.load(os.path.join(interfacegan_root, 'age.pt')).cuda() 40 | self.pose_direction = torch.load(os.path.join(interfacegan_root, 'pose.pt')).cuda() 41 | ganspace_path = path_dic['ganspace'] 42 | self.ganspace_pca = torch.load(ganspace_path) 43 | 44 | # def apply_ganspace(self, latent, ganspace_pca, edit_directions): 45 | # edit_latents = ganspace_edit(latent, ganspace_pca, edit_directions) 46 | # return self._latents_to_image(edit_latents), edit_latents 47 | # 48 | # def apply_interfacegan(self, latent, direction, factor=None): 49 | # edit_latents = latent + factor * direction 50 | # return self._latents_to_image(edit_latents), edit_latents 51 | 52 | def edit_style_code(self, wx, factor, choice): 53 | assert choice in ['young', 'old', 'beard', 'lip', 'pose'] 54 | if choice in ['young', 'old']: 55 | # recommend factor is 5/-5 56 | edit_latents = wx + factor * self.age_direction 57 | elif choice == 'pose': 58 | edit_latents = wx + factor * self.pose_direction 59 | else: 60 | # recommend factor is 20/-20 61 | direction = self.ganspace_directions[choice] 62 | edit_direction = (direction[0], direction[1], direction[2], factor) 63 | edit_latents = ganspace_edit(wx, self.ganspace_pca, [edit_direction]) 64 | return edit_latents 65 | 66 | -------------------------------------------------------------------------------- /models/stylegan2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/models/stylegan2/__init__.py -------------------------------------------------------------------------------- /models/stylegan2/op/__init__.py: -------------------------------------------------------------------------------- 1 | from .fused_act import FusedLeakyReLU, fused_leaky_relu 2 | from .upfirdn2d import upfirdn2d 3 | -------------------------------------------------------------------------------- /models/stylegan2/op/fused_act.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from torch import nn 5 | from torch.autograd import Function 6 | from torch.utils.cpp_extension import load 7 | 8 | module_path = os.path.dirname(__file__) 9 | fused = load( 10 | 'fused', 11 | sources=[ 12 | os.path.join(module_path, 'fused_bias_act.cpp'), 13 | os.path.join(module_path, 'fused_bias_act_kernel.cu'), 14 | ], 15 | ) 16 | 17 | 18 | class FusedLeakyReLUFunctionBackward(Function): 19 | @staticmethod 20 | def forward(ctx, grad_output, out, negative_slope, scale): 21 | ctx.save_for_backward(out) 22 | ctx.negative_slope = negative_slope 23 | ctx.scale = scale 24 | 25 | empty = grad_output.new_empty(0) 26 | 27 | grad_input = fused.fused_bias_act( 28 | grad_output, empty, out, 3, 1, negative_slope, scale 29 | ) 30 | 31 | dim = [0] 32 | 33 | if grad_input.ndim > 2: 34 | dim += list(range(2, grad_input.ndim)) 35 | 36 | grad_bias = grad_input.sum(dim).detach() 37 | 38 | return grad_input, grad_bias 39 | 40 | @staticmethod 41 | def backward(ctx, gradgrad_input, gradgrad_bias): 42 | out, = ctx.saved_tensors 43 | gradgrad_out = fused.fused_bias_act( 44 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 45 | ) 46 | 47 | return gradgrad_out, None, None, None 48 | 49 | 50 | class FusedLeakyReLUFunction(Function): 51 | @staticmethod 52 | def forward(ctx, input, bias, negative_slope, scale): 53 | empty = input.new_empty(0) 54 | out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale) 55 | ctx.save_for_backward(out) 56 | ctx.negative_slope = negative_slope 57 | ctx.scale = scale 58 | 59 | return out 60 | 61 | @staticmethod 62 | def backward(ctx, grad_output): 63 | out, = ctx.saved_tensors 64 | 65 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 66 | grad_output, out, ctx.negative_slope, ctx.scale 67 | ) 68 | 69 | return grad_input, grad_bias, None, None 70 | 71 | 72 | class FusedLeakyReLU(nn.Module): 73 | def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5): 74 | super().__init__() 75 | 76 | self.bias = nn.Parameter(torch.zeros(channel)) 77 | self.negative_slope = negative_slope 78 | self.scale = scale 79 | 80 | def forward(self, input): 81 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) 82 | 83 | 84 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5): 85 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 86 | -------------------------------------------------------------------------------- /models/stylegan2/op/fused_bias_act.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, 5 | int act, int grad, float alpha, float scale); 6 | 7 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 8 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 9 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 10 | 11 | torch::Tensor fused_bias_act(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, 12 | int act, int grad, float alpha, float scale) { 13 | CHECK_CUDA(input); 14 | CHECK_CUDA(bias); 15 | 16 | return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale); 17 | } 18 | 19 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 20 | m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)"); 21 | } -------------------------------------------------------------------------------- /models/stylegan2/op/fused_bias_act_kernel.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. 2 | // 3 | // This work is made available under the Nvidia Source Code License-NC. 4 | // To view a copy of this license, visit 5 | // https://nvlabs.github.io/stylegan2/license.html 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | 18 | template 19 | static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref, 20 | int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) { 21 | int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x; 22 | 23 | scalar_t zero = 0.0; 24 | 25 | for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) { 26 | scalar_t x = p_x[xi]; 27 | 28 | if (use_bias) { 29 | x += p_b[(xi / step_b) % size_b]; 30 | } 31 | 32 | scalar_t ref = use_ref ? p_ref[xi] : zero; 33 | 34 | scalar_t y; 35 | 36 | switch (act * 10 + grad) { 37 | default: 38 | case 10: y = x; break; 39 | case 11: y = x; break; 40 | case 12: y = 0.0; break; 41 | 42 | case 30: y = (x > 0.0) ? x : x * alpha; break; 43 | case 31: y = (ref > 0.0) ? x : x * alpha; break; 44 | case 32: y = 0.0; break; 45 | } 46 | 47 | out[xi] = y * scale; 48 | } 49 | } 50 | 51 | 52 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, 53 | int act, int grad, float alpha, float scale) { 54 | int curDevice = -1; 55 | cudaGetDevice(&curDevice); 56 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); 57 | 58 | auto x = input.contiguous(); 59 | auto b = bias.contiguous(); 60 | auto ref = refer.contiguous(); 61 | 62 | int use_bias = b.numel() ? 1 : 0; 63 | int use_ref = ref.numel() ? 1 : 0; 64 | 65 | int size_x = x.numel(); 66 | int size_b = b.numel(); 67 | int step_b = 1; 68 | 69 | for (int i = 1 + 1; i < x.dim(); i++) { 70 | step_b *= x.size(i); 71 | } 72 | 73 | int loop_x = 4; 74 | int block_size = 4 * 32; 75 | int grid_size = (size_x - 1) / (loop_x * block_size) + 1; 76 | 77 | auto y = torch::empty_like(x); 78 | 79 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] { 80 | fused_bias_act_kernel<<>>( 81 | y.data_ptr(), 82 | x.data_ptr(), 83 | b.data_ptr(), 84 | ref.data_ptr(), 85 | act, 86 | grad, 87 | alpha, 88 | scale, 89 | loop_x, 90 | size_x, 91 | step_b, 92 | size_b, 93 | use_bias, 94 | use_ref 95 | ); 96 | }); 97 | 98 | return y; 99 | } -------------------------------------------------------------------------------- /models/stylegan2/op/upfirdn2d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel, 5 | int up_x, int up_y, int down_x, int down_y, 6 | int pad_x0, int pad_x1, int pad_y0, int pad_y1); 7 | 8 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 9 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 10 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 11 | 12 | torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel, 13 | int up_x, int up_y, int down_x, int down_y, 14 | int pad_x0, int pad_x1, int pad_y0, int pad_y1) { 15 | CHECK_CUDA(input); 16 | CHECK_CUDA(kernel); 17 | 18 | return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); 19 | } 20 | 21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 22 | m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)"); 23 | } -------------------------------------------------------------------------------- /models/styleheat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/models/styleheat/__init__.py -------------------------------------------------------------------------------- /models/styleheat/styleheat.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from utils import flow_util 8 | from utils.distributed import master_only_print as print 9 | from models.hfgi.hfgi import HFGI 10 | from models.styleheat.calibration_net import CalibrationNet 11 | from models.styleheat.warper import AudioWarper, VideoWarper 12 | 13 | 14 | class StyleHEAT(nn.Module): 15 | 16 | def __init__(self, opt, path_dic): 17 | super(StyleHEAT, self).__init__() 18 | 19 | self.opt = opt 20 | self.video_warper = VideoWarper() 21 | self.calibrator = CalibrationNet( 22 | out_size=64, # check whether can enlarge the out size 23 | input_channel=512, # refine feature channel is 512 24 | num_style_feat=512, 25 | channel_multiplier=2, 26 | # for stylegan decoder 27 | narrow=1 28 | ) 29 | self.generator = HFGI() 30 | 31 | self.enable_audio = opt.enable_audio 32 | if self.enable_audio: 33 | self.audio_warper = AudioWarper() 34 | print('Enable audio driven.') 35 | 36 | self.load_checkpoint(opt, path_dic) 37 | 38 | self.frozen_params = ['video_warper'] 39 | self.freeze_models() 40 | 41 | def freeze_models(self): 42 | for n in self.frozen_params: 43 | for p in self.__getattr__(n).parameters(): 44 | p.requires_grad = False 45 | 46 | def load_checkpoint(self, opt, path_dic): 47 | self.generator.load_checkpoint(path_dic) 48 | print(f'Stage: {opt.mode}') 49 | if opt.mode == 'train_visual_refine': 50 | # Load from origin PIRender 51 | path = opt.visual_warper_path 52 | ckpt = torch.load(path, map_location='cpu')['net_G_ema'] 53 | self.video_warper.load_state_dict(ckpt, strict=True) 54 | self.video_warper.eval() 55 | print(f'Load pre-trained VideoWarper [net_G_ema] from {opt.visual_warper_path} done') 56 | elif opt.mode == 'inference' or opt.mode == 'train_audio_refine': 57 | # Load from FreeStyler path 58 | path = opt.free_styler_path 59 | ckpt = torch.load(path, map_location='cpu')['net_G_ema'] 60 | self.load_state_dict(ckpt, strict=False) # should be full without StyleGAN 61 | self.eval() 62 | print(f'Load pre-trained StyleHEAT [net_G_ema] from {opt.free_styler_path} done') 63 | 64 | if opt.mode == 'train_audio_refine' and self.enable_audio: 65 | path = opt.audio_warper_path 66 | ckpt = torch.load(path, map_location='cpu')['net_G_ema'] 67 | self.audio_warper.load_state_dict(ckpt, strict=True) 68 | self.audio_warper.eval() 69 | print(f'Load pre-trained AudioWarper from {path} done.') 70 | else: 71 | raise NotImplementedError 72 | 73 | def forward(self, input_image, driven_3dmm, driven_audio=None, inv_data=None, imsize=512): 74 | # Stage 1: Inversion 75 | if inv_data is None: 76 | with torch.no_grad(): 77 | ix, wx, fx, inversion_condition = self.generator.inverse(input_image) 78 | else: 79 | # be careful about the batch case 80 | ix, wx, fx, inversion_condition = inv_data 81 | 82 | # Stage 2: Visual Warping 83 | video_output = self.video_warper(ix, driven_3dmm) # Input: 256*256 84 | flow = video_output['flow_field'] 85 | descriptor = video_output['descriptor'] 86 | video_warping_condition = flow_util.convert_flow_to_deformation(flow) 87 | 88 | warping_condition = [video_warping_condition] 89 | 90 | fx_warp = flow_util.warp_image(fx, video_warping_condition) 91 | video_warp_img, _, _ = self.generator( 92 | [wx], 93 | warping_condition=warping_condition, 94 | inversion_condition=inversion_condition 95 | ) 96 | video_warp_img = F.interpolate(video_warp_img, size=(imsize, imsize), mode="bilinear", align_corners=False) 97 | 98 | # Stage 3: Audio Warping 99 | if self.enable_audio: 100 | video_warp_img_256 = F.interpolate(video_warp_img, size=(256, 256), mode="bilinear", align_corners=False) 101 | flow = self.audio_warper(video_warp_img_256, driven_audio)['flow_field'] # Input: 256*256 102 | # TODO: trick flow: (B, 2, 64, 64) for inference only 103 | flow[:, :, :32] = 0 104 | 105 | audio_warping_condition = flow_util.convert_flow_to_deformation(flow) 106 | warping_condition.append(audio_warping_condition) 107 | 108 | fx_warp = flow_util.warp_image(fx_warp, audio_warping_condition) 109 | audio_warp_img, _, _ = self.generator( 110 | [wx], 111 | warping_condition=warping_condition, 112 | inversion_condition=inversion_condition 113 | ) 114 | audio_warp_img = F.interpolate(audio_warp_img, size=(imsize, imsize), mode="bilinear", align_corners=False) 115 | else: 116 | audio_warp_img = None 117 | 118 | refining_condition = self.calibrator(fx_warp, descriptor) 119 | # refining_condition = self.calibrator(fx_warp) 120 | fake, _, _ = self.generator( 121 | [wx], 122 | f_condition=fx, 123 | refining_condition=refining_condition, 124 | warping_condition=warping_condition, 125 | inversion_condition=inversion_condition 126 | ) 127 | fake = F.interpolate(fake, size=(imsize, imsize), mode="bilinear", align_corners=False) 128 | return { 129 | 'fake_image': fake, 130 | 'audio_warp_image': audio_warp_img, 131 | 'video_warp_image': video_warp_img, 132 | 'fx_warp': fx_warp 133 | } 134 | 135 | -------------------------------------------------------------------------------- /models/styleheat/warper.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import numpy as np 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from utils import flow_util 9 | from models.styleheat.base_function import LayerNorm2d, ADAINHourglass 10 | 11 | 12 | class VideoWarper(nn.Module): 13 | def __init__( 14 | self 15 | ): 16 | super(VideoWarper, self).__init__() 17 | self.mapping_net = MappingNet( 18 | coeff_nc=73, 19 | descriptor_nc=256, 20 | layer=3 21 | ) 22 | self.warping_net = WarpingNet( 23 | encoder_layer=5, 24 | decoder_layer=3, 25 | base_nc=32, 26 | image_nc=3, 27 | descriptor_nc=256, 28 | max_nc=256, 29 | use_spect=False 30 | ) 31 | 32 | def forward( 33 | self, 34 | input_image, 35 | driving_source 36 | ): 37 | """ 38 | :param input_image: 39 | :param driving_source: 40 | :return: output: dict: {'warp_image', 'flow_field', 'descriptor'} 41 | """ 42 | descriptor = self.mapping_net(driving_source) 43 | output = self.warping_net(input_image, descriptor) 44 | output['descriptor'] = descriptor 45 | return output 46 | 47 | 48 | class AudioWarper(nn.Module): 49 | 50 | def __init__(self): 51 | super(AudioWarper, self).__init__() 52 | self.audio_encoder = MappingNet( 53 | coeff_nc=80, 54 | descriptor_nc=256, 55 | layer=3 56 | ) 57 | self.warpping_net = WarpingNet( 58 | encoder_layer=5, 59 | decoder_layer=3, 60 | base_nc=32, 61 | image_nc=3, 62 | descriptor_nc=256, 63 | max_nc=256, 64 | use_spect=False 65 | ) 66 | 67 | def forward( 68 | self, 69 | input_image, 70 | driving_source 71 | ): 72 | descriptor = self.audio_encoder(driving_source) 73 | # print(f'descritor.shape: {descriptor.shape}') 74 | output = self.warpping_net(input_image, descriptor) 75 | output['descriptor'] = descriptor 76 | return output 77 | 78 | 79 | class MappingNet(nn.Module): 80 | 81 | def __init__(self, coeff_nc, descriptor_nc, layer): 82 | super(MappingNet, self).__init__() 83 | 84 | self.layer = layer 85 | nonlinearity = nn.LeakyReLU(0.1) 86 | 87 | self.first = nn.Sequential( 88 | torch.nn.Conv1d(coeff_nc, descriptor_nc, kernel_size=7, padding=0, bias=True)) 89 | 90 | for i in range(layer): 91 | net = nn.Sequential(nonlinearity, 92 | torch.nn.Conv1d(descriptor_nc, descriptor_nc, kernel_size=3, padding=0, dilation=3)) 93 | setattr(self, 'encoder' + str(i), net) 94 | 95 | self.pooling = nn.AdaptiveAvgPool1d(1) 96 | self.output_nc = descriptor_nc 97 | 98 | def forward(self, input_3dmm): 99 | out = self.first(input_3dmm) 100 | for i in range(self.layer): 101 | model = getattr(self, 'encoder' + str(i)) 102 | out = model(out) + out[:, :, 3:-3] 103 | out = self.pooling(out) 104 | return out 105 | 106 | 107 | class WarpingNet(nn.Module): 108 | 109 | def __init__( 110 | self, 111 | image_nc, 112 | descriptor_nc, 113 | base_nc, 114 | max_nc, 115 | encoder_layer, 116 | decoder_layer, 117 | use_spect 118 | ): 119 | super(WarpingNet, self).__init__() 120 | 121 | nonlinearity = nn.LeakyReLU(0.1) 122 | norm_layer = functools.partial(LayerNorm2d, affine=True) 123 | kwargs = {'nonlinearity': nonlinearity, 'use_spect': use_spect} 124 | 125 | self.descriptor_nc = descriptor_nc 126 | self.hourglass = ADAINHourglass(image_nc, self.descriptor_nc, base_nc, 127 | max_nc, encoder_layer, decoder_layer, **kwargs) 128 | 129 | self.flow_out = nn.Sequential(norm_layer(self.hourglass.output_nc), 130 | nonlinearity, 131 | nn.Conv2d(self.hourglass.output_nc, 2, kernel_size=7, stride=1, padding=3)) 132 | 133 | self.pool = nn.AdaptiveAvgPool2d(1) 134 | 135 | def forward(self, input_image, descriptor): 136 | final_output = {} 137 | output = self.hourglass(input_image, descriptor) 138 | final_output['flow_field'] = self.flow_out(output) 139 | 140 | deformation = flow_util.convert_flow_to_deformation(final_output['flow_field']) 141 | final_output['warp_image'] = flow_util.warp_image(input_image, deformation) 142 | return final_output 143 | 144 | 145 | def test_audio_warper(): 146 | model = AudioWarper().cuda() 147 | img = torch.randn(2, 3, 256, 256).cuda() 148 | wav = torch.randn(2, 80, 32).cuda() # 2, 5, 149 | output = model(img, wav) 150 | print(output['flow_field'].shape) 151 | print(output['warp_image'].shape) 152 | 153 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | basicsr==1.3.4.7 2 | certifi==2020.12.5 3 | cffi 4 | dlib==19.22.1 5 | face-alignment==1.3.5 6 | glob2 7 | imageio==2.9.0 8 | imageio-ffmpeg==0.4.5 9 | kornia==0.5.5 10 | lmdb==1.2.1 11 | numba==0.54.1 12 | numpy==1.19.0 13 | opencv-python==3.4.9.33 14 | pandas==1.3.4 15 | Pillow==6.2.1 16 | python-dateutil==2.8.2 17 | PyYAML==5.3.1 18 | scikit-image==0.16.2 19 | scipy==1.4.1 20 | tensorboard==2.7.0 21 | torch==1.7.1 22 | torchvision==0.8.2 23 | tqdm 24 | trimesh==3.9.20 25 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/__init__.py: -------------------------------------------------------------------------------- 1 | """This package contains modules related to objective functions, optimizations, and network architectures. 2 | 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. 4 | You need to implement the following five functions: 5 | -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). 6 | -- : unpack data from dataset and apply preprocessing. 7 | -- : produce intermediate results. 8 | -- : calculate loss, gradients, and update network weights. 9 | -- : (optionally) add model-specific options and set default options. 10 | 11 | In the function <__init__>, you need to define four lists: 12 | -- self.loss_names (str list): specify the training losses that you want to plot and save. 13 | -- self.model_names (str list): define networks used in our training. 14 | -- self.visual_names (str list): specify the images that you want to display and save. 15 | -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. 16 | 17 | Now you can use the model class by specifying flag '--model dummy'. 18 | See our template model class 'template_model.py' for more details. 19 | """ 20 | 21 | import importlib 22 | from .base_model import BaseModel 23 | 24 | 25 | def find_model_using_name(model_name): 26 | """Import the module "models/[model_name]_model.py". 27 | 28 | In the file, the class called DatasetNameModel() will 29 | be instantiated. It has to be a subclass of BaseModel, 30 | and it is case-insensitive. 31 | """ 32 | model_filename = "third_part.Deep3DFaceRecon_pytorch.models." + model_name + "_model" 33 | # model_filename = "models." + model_name + "_model" 34 | modellib = importlib.import_module(model_filename) 35 | model = None 36 | target_model_name = model_name.replace('_', '') + 'model' 37 | for name, cls in modellib.__dict__.items(): 38 | if name.lower() == target_model_name.lower() \ 39 | and issubclass(cls, BaseModel): 40 | model = cls 41 | 42 | if model is None: 43 | print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) 44 | exit(0) 45 | 46 | return model 47 | 48 | 49 | def get_option_setter(model_name): 50 | """Return the static method of the model class.""" 51 | model_class = find_model_using_name(model_name) 52 | return model_class.modify_commandline_options 53 | 54 | 55 | def create_model(opt): 56 | """Create a model given the option. 57 | 58 | This function warps the class CustomDatasetDataLoader. 59 | This is the main interface between this package and 'train.py'/'test.py' 60 | 61 | Example: 62 | >>> from models import create_model 63 | >>> model = create_model(opt) 64 | """ 65 | model = find_model_using_name(opt.model) 66 | instance = model(opt) 67 | print("model [%s] was created" % type(instance).__name__) 68 | return instance 69 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200 2 | from .mobilefacenet import get_mbf 3 | 4 | 5 | def get_model(name, **kwargs): 6 | # resnet 7 | if name == "r18": 8 | return iresnet18(False, **kwargs) 9 | elif name == "r34": 10 | return iresnet34(False, **kwargs) 11 | elif name == "r50": 12 | return iresnet50(False, **kwargs) 13 | elif name == "r100": 14 | return iresnet100(False, **kwargs) 15 | elif name == "r200": 16 | return iresnet200(False, **kwargs) 17 | elif name == "r2060": 18 | from .iresnet2060 import iresnet2060 19 | return iresnet2060(False, **kwargs) 20 | elif name == "mbf": 21 | fp16 = kwargs.get("fp16", False) 22 | num_features = kwargs.get("num_features", 512) 23 | return get_mbf(fp16=fp16, num_features=num_features) 24 | else: 25 | raise ValueError() -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/backbones/mobilefacenet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Adapted from https://github.com/cavalleria/cavaface.pytorch/blob/master/backbone/mobilefacenet.py 3 | Original author cavalleria 4 | ''' 5 | 6 | import torch.nn as nn 7 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Sequential, Module 8 | import torch 9 | 10 | 11 | class Flatten(Module): 12 | def forward(self, x): 13 | return x.view(x.size(0), -1) 14 | 15 | 16 | class ConvBlock(Module): 17 | def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): 18 | super(ConvBlock, self).__init__() 19 | self.layers = nn.Sequential( 20 | Conv2d(in_c, out_c, kernel, groups=groups, stride=stride, padding=padding, bias=False), 21 | BatchNorm2d(num_features=out_c), 22 | PReLU(num_parameters=out_c) 23 | ) 24 | 25 | def forward(self, x): 26 | return self.layers(x) 27 | 28 | 29 | class LinearBlock(Module): 30 | def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): 31 | super(LinearBlock, self).__init__() 32 | self.layers = nn.Sequential( 33 | Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False), 34 | BatchNorm2d(num_features=out_c) 35 | ) 36 | 37 | def forward(self, x): 38 | return self.layers(x) 39 | 40 | 41 | class DepthWise(Module): 42 | def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): 43 | super(DepthWise, self).__init__() 44 | self.residual = residual 45 | self.layers = nn.Sequential( 46 | ConvBlock(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)), 47 | ConvBlock(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride), 48 | LinearBlock(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) 49 | ) 50 | 51 | def forward(self, x): 52 | short_cut = None 53 | if self.residual: 54 | short_cut = x 55 | x = self.layers(x) 56 | if self.residual: 57 | output = short_cut + x 58 | else: 59 | output = x 60 | return output 61 | 62 | 63 | class Residual(Module): 64 | def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): 65 | super(Residual, self).__init__() 66 | modules = [] 67 | for _ in range(num_block): 68 | modules.append(DepthWise(c, c, True, kernel, stride, padding, groups)) 69 | self.layers = Sequential(*modules) 70 | 71 | def forward(self, x): 72 | return self.layers(x) 73 | 74 | 75 | class GDC(Module): 76 | def __init__(self, embedding_size): 77 | super(GDC, self).__init__() 78 | self.layers = nn.Sequential( 79 | LinearBlock(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)), 80 | Flatten(), 81 | Linear(512, embedding_size, bias=False), 82 | BatchNorm1d(embedding_size)) 83 | 84 | def forward(self, x): 85 | return self.layers(x) 86 | 87 | 88 | class MobileFaceNet(Module): 89 | def __init__(self, fp16=False, num_features=512): 90 | super(MobileFaceNet, self).__init__() 91 | scale = 2 92 | self.fp16 = fp16 93 | self.layers = nn.Sequential( 94 | ConvBlock(3, 64 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1)), 95 | ConvBlock(64 * scale, 64 * scale, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64), 96 | DepthWise(64 * scale, 64 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128), 97 | Residual(64 * scale, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), 98 | DepthWise(64 * scale, 128 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256), 99 | Residual(128 * scale, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), 100 | DepthWise(128 * scale, 128 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512), 101 | Residual(128 * scale, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), 102 | ) 103 | self.conv_sep = ConvBlock(128 * scale, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) 104 | self.features = GDC(num_features) 105 | self._initialize_weights() 106 | 107 | def _initialize_weights(self): 108 | for m in self.modules(): 109 | if isinstance(m, nn.Conv2d): 110 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 111 | if m.bias is not None: 112 | m.bias.data.zero_() 113 | elif isinstance(m, nn.BatchNorm2d): 114 | m.weight.data.fill_(1) 115 | m.bias.data.zero_() 116 | elif isinstance(m, nn.Linear): 117 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 118 | if m.bias is not None: 119 | m.bias.data.zero_() 120 | 121 | def forward(self, x): 122 | with torch.cuda.amp.autocast(self.fp16): 123 | x = self.layers(x) 124 | x = self.conv_sep(x.float() if self.fp16 else x) 125 | x = self.features(x) 126 | return x 127 | 128 | 129 | def get_mbf(fp16, num_features): 130 | return MobileFaceNet(fp16, num_features) -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/3millions.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.loss = "arcface" 7 | config.network = "r50" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 1.0 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 128 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 300 * 10000 20 | config.num_epoch = 30 21 | config.warmup_epoch = -1 22 | config.decay_epoch = [10, 16, 22] 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/3millions_pfc.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.loss = "arcface" 7 | config.network = "r50" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 0.1 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 128 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 300 * 10000 20 | config.num_epoch = 30 21 | config.warmup_epoch = -1 22 | config.decay_epoch = [10, 16, 22] 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/__init__.py -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/base.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = "ms1mv3_arcface_r50" 12 | 13 | config.dataset = "ms1m-retinaface-t1" 14 | config.embedding_size = 512 15 | config.sample_rate = 1 16 | config.fp16 = False 17 | config.momentum = 0.9 18 | config.weight_decay = 5e-4 19 | config.batch_size = 128 20 | config.lr = 0.1 # batch size is 512 21 | 22 | if config.dataset == "emore": 23 | config.rec = "/train_tmp/faces_emore" 24 | config.num_classes = 85742 25 | config.num_image = 5822653 26 | config.num_epoch = 16 27 | config.warmup_epoch = -1 28 | config.decay_epoch = [8, 14, ] 29 | config.val_targets = ["lfw", ] 30 | 31 | elif config.dataset == "ms1m-retinaface-t1": 32 | config.rec = "/train_tmp/ms1m-retinaface-t1" 33 | config.num_classes = 93431 34 | config.num_image = 5179510 35 | config.num_epoch = 25 36 | config.warmup_epoch = -1 37 | config.decay_epoch = [11, 17, 22] 38 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 39 | 40 | elif config.dataset == "glint360k": 41 | config.rec = "/train_tmp/glint360k" 42 | config.num_classes = 360232 43 | config.num_image = 17091657 44 | config.num_epoch = 20 45 | config.warmup_epoch = -1 46 | config.decay_epoch = [8, 12, 15, 18] 47 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 48 | 49 | elif config.dataset == "webface": 50 | config.rec = "/train_tmp/faces_webface_112x112" 51 | config.num_classes = 10572 52 | config.num_image = "forget" 53 | config.num_epoch = 34 54 | config.warmup_epoch = -1 55 | config.decay_epoch = [20, 28, 32] 56 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 57 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/glint360k_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.1 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 2e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/glint360k_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/glint360k_r18.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r18" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/glint360k_r34.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r34" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/glint360k_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/ms1mv3_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 2e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 30 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 20, 25] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/ms1mv3_r18.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r18" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/ms1mv3_r2060.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r2060" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 64 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/ms1mv3_r34.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r34" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/ms1mv3_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/configs/speed.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.loss = "arcface" 7 | config.network = "r50" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 1.0 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 128 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 100 * 10000 20 | config.num_epoch = 30 21 | config.warmup_epoch = -1 22 | config.decay_epoch = [10, 16, 22] 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/dataset.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import os 3 | import queue as Queue 4 | import threading 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | import torch 9 | from torch.utils.data import DataLoader, Dataset 10 | from torchvision import transforms 11 | 12 | 13 | class BackgroundGenerator(threading.Thread): 14 | def __init__(self, generator, local_rank, max_prefetch=6): 15 | super(BackgroundGenerator, self).__init__() 16 | self.queue = Queue.Queue(max_prefetch) 17 | self.generator = generator 18 | self.local_rank = local_rank 19 | self.daemon = True 20 | self.start() 21 | 22 | def run(self): 23 | torch.cuda.set_device(self.local_rank) 24 | for item in self.generator: 25 | self.queue.put(item) 26 | self.queue.put(None) 27 | 28 | def next(self): 29 | next_item = self.queue.get() 30 | if next_item is None: 31 | raise StopIteration 32 | return next_item 33 | 34 | def __next__(self): 35 | return self.next() 36 | 37 | def __iter__(self): 38 | return self 39 | 40 | 41 | class DataLoaderX(DataLoader): 42 | 43 | def __init__(self, local_rank, **kwargs): 44 | super(DataLoaderX, self).__init__(**kwargs) 45 | self.stream = torch.cuda.Stream(local_rank) 46 | self.local_rank = local_rank 47 | 48 | def __iter__(self): 49 | self.iter = super(DataLoaderX, self).__iter__() 50 | self.iter = BackgroundGenerator(self.iter, self.local_rank) 51 | self.preload() 52 | return self 53 | 54 | def preload(self): 55 | self.batch = next(self.iter, None) 56 | if self.batch is None: 57 | return None 58 | with torch.cuda.stream(self.stream): 59 | for k in range(len(self.batch)): 60 | self.batch[k] = self.batch[k].to(device=self.local_rank, non_blocking=True) 61 | 62 | def __next__(self): 63 | torch.cuda.current_stream().wait_stream(self.stream) 64 | batch = self.batch 65 | if batch is None: 66 | raise StopIteration 67 | self.preload() 68 | return batch 69 | 70 | 71 | class MXFaceDataset(Dataset): 72 | def __init__(self, root_dir, local_rank): 73 | super(MXFaceDataset, self).__init__() 74 | self.transform = transforms.Compose( 75 | [transforms.ToPILImage(), 76 | transforms.RandomHorizontalFlip(), 77 | transforms.ToTensor(), 78 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), 79 | ]) 80 | self.root_dir = root_dir 81 | self.local_rank = local_rank 82 | path_imgrec = os.path.join(root_dir, 'train.rec') 83 | path_imgidx = os.path.join(root_dir, 'train.idx') 84 | self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') 85 | s = self.imgrec.read_idx(0) 86 | header, _ = mx.recordio.unpack(s) 87 | if header.flag > 0: 88 | self.header0 = (int(header.label[0]), int(header.label[1])) 89 | self.imgidx = np.array(range(1, int(header.label[0]))) 90 | else: 91 | self.imgidx = np.array(list(self.imgrec.keys)) 92 | 93 | def __getitem__(self, index): 94 | idx = self.imgidx[index] 95 | s = self.imgrec.read_idx(idx) 96 | header, img = mx.recordio.unpack(s) 97 | label = header.label 98 | if not isinstance(label, numbers.Number): 99 | label = label[0] 100 | label = torch.tensor(label, dtype=torch.long) 101 | sample = mx.image.imdecode(img).asnumpy() 102 | if self.transform is not None: 103 | sample = self.transform(sample) 104 | return sample, label 105 | 106 | def __len__(self): 107 | return len(self.imgidx) 108 | 109 | 110 | class SyntheticDataset(Dataset): 111 | def __init__(self, local_rank): 112 | super(SyntheticDataset, self).__init__() 113 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) 114 | img = np.transpose(img, (2, 0, 1)) 115 | img = torch.from_numpy(img).squeeze(0).float() 116 | img = ((img / 255) - 0.5) / 0.5 117 | self.img = img 118 | self.label = 1 119 | 120 | def __getitem__(self, index): 121 | return self.img, self.label 122 | 123 | def __len__(self): 124 | return 1000000 125 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/docs/eval.md: -------------------------------------------------------------------------------- 1 | ## Eval on ICCV2021-MFR 2 | 3 | coming soon. 4 | 5 | 6 | ## Eval IJBC 7 | You can eval ijbc with pytorch or onnx. 8 | 9 | 10 | 1. Eval IJBC With Onnx 11 | ```shell 12 | CUDA_VISIBLE_DEVICES=0 python onnx_ijbc.py --model-root ms1mv3_arcface_r50 --image-path IJB_release/IJBC --result-dir ms1mv3_arcface_r50 13 | ``` 14 | 15 | 2. Eval IJBC With Pytorch 16 | ```shell 17 | CUDA_VISIBLE_DEVICES=0,1 python eval_ijbc.py \ 18 | --model-prefix ms1mv3_arcface_r50/backbone.pth \ 19 | --image-path IJB_release/IJBC \ 20 | --result-dir ms1mv3_arcface_r50 \ 21 | --batch-size 128 \ 22 | --job ms1mv3_arcface_r50 \ 23 | --target IJBC \ 24 | --network iresnet50 25 | ``` 26 | 27 | ## Inference 28 | 29 | ```shell 30 | python inference.py --weight ms1mv3_arcface_r50/backbone.pth --network r50 31 | ``` 32 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/docs/install.md: -------------------------------------------------------------------------------- 1 | ## v1.8.0 2 | ### Linux and Windows 3 | ```shell 4 | # CUDA 11.0 5 | pip --default-timeout=100 install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html 6 | 7 | # CUDA 10.2 8 | pip --default-timeout=100 install torch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 9 | 10 | # CPU only 11 | pip --default-timeout=100 install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html 12 | 13 | ``` 14 | 15 | 16 | ## v1.7.1 17 | ### Linux and Windows 18 | ```shell 19 | # CUDA 11.0 20 | pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 21 | 22 | # CUDA 10.2 23 | pip install torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 24 | 25 | # CUDA 10.1 26 | pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 27 | 28 | # CUDA 9.2 29 | pip install torch==1.7.1+cu92 torchvision==0.8.2+cu92 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 30 | 31 | # CPU only 32 | pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 33 | ``` 34 | 35 | 36 | ## v1.6.0 37 | 38 | ### Linux and Windows 39 | ```shell 40 | # CUDA 10.2 41 | pip install torch==1.6.0 torchvision==0.7.0 42 | 43 | # CUDA 10.1 44 | pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html 45 | 46 | # CUDA 9.2 47 | pip install torch==1.6.0+cu92 torchvision==0.7.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html 48 | 49 | # CPU only 50 | pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 51 | ``` -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/docs/modelzoo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/docs/modelzoo.md -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/docs/speed_benchmark.md: -------------------------------------------------------------------------------- 1 | ## Test Training Speed 2 | 3 | - Test Commands 4 | 5 | You need to use the following two commands to test the Partial FC training performance. 6 | The number of identites is **3 millions** (synthetic data), turn mixed precision training on, backbone is resnet50, 7 | batch size is 1024. 8 | ```shell 9 | # Model Parallel 10 | python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions 11 | # Partial FC 0.1 12 | python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions_pfc 13 | ``` 14 | 15 | - GPU Memory 16 | 17 | ``` 18 | # (Model Parallel) gpustat -i 19 | [0] Tesla V100-SXM2-32GB | 64'C, 94 % | 30338 / 32510 MB 20 | [1] Tesla V100-SXM2-32GB | 60'C, 99 % | 28876 / 32510 MB 21 | [2] Tesla V100-SXM2-32GB | 60'C, 99 % | 28872 / 32510 MB 22 | [3] Tesla V100-SXM2-32GB | 69'C, 99 % | 28872 / 32510 MB 23 | [4] Tesla V100-SXM2-32GB | 66'C, 99 % | 28888 / 32510 MB 24 | [5] Tesla V100-SXM2-32GB | 60'C, 99 % | 28932 / 32510 MB 25 | [6] Tesla V100-SXM2-32GB | 68'C, 100 % | 28916 / 32510 MB 26 | [7] Tesla V100-SXM2-32GB | 65'C, 99 % | 28860 / 32510 MB 27 | 28 | # (Partial FC 0.1) gpustat -i 29 | [0] Tesla V100-SXM2-32GB | 60'C, 95 % | 10488 / 32510 MB │······················· 30 | [1] Tesla V100-SXM2-32GB | 60'C, 97 % | 10344 / 32510 MB │······················· 31 | [2] Tesla V100-SXM2-32GB | 61'C, 95 % | 10340 / 32510 MB │······················· 32 | [3] Tesla V100-SXM2-32GB | 66'C, 95 % | 10340 / 32510 MB │······················· 33 | [4] Tesla V100-SXM2-32GB | 65'C, 94 % | 10356 / 32510 MB │······················· 34 | [5] Tesla V100-SXM2-32GB | 61'C, 95 % | 10400 / 32510 MB │······················· 35 | [6] Tesla V100-SXM2-32GB | 68'C, 96 % | 10384 / 32510 MB │······················· 36 | [7] Tesla V100-SXM2-32GB | 64'C, 95 % | 10328 / 32510 MB │······················· 37 | ``` 38 | 39 | - Training Speed 40 | 41 | ```python 42 | # (Model Parallel) trainging.log 43 | Training: Speed 2271.33 samples/sec Loss 1.1624 LearningRate 0.2000 Epoch: 0 Global Step: 100 44 | Training: Speed 2269.94 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 150 45 | Training: Speed 2272.67 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 200 46 | Training: Speed 2266.55 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 250 47 | Training: Speed 2272.54 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 300 48 | 49 | # (Partial FC 0.1) trainging.log 50 | Training: Speed 5299.56 samples/sec Loss 1.0965 LearningRate 0.2000 Epoch: 0 Global Step: 100 51 | Training: Speed 5296.37 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 150 52 | Training: Speed 5304.37 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 200 53 | Training: Speed 5274.43 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 250 54 | Training: Speed 5300.10 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 300 55 | ``` 56 | 57 | In this test case, Partial FC 0.1 only use1 1/3 of the GPU memory of the model parallel, 58 | and the training speed is 2.5 times faster than the model parallel. 59 | 60 | 61 | ## Speed Benchmark 62 | 63 | 1. Training speed of different parallel methods (samples/second), Tesla V100 32GB * 8. (Larger is better) 64 | 65 | | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | 66 | | :--- | :--- | :--- | :--- | 67 | |125000 | 4681 | 4824 | 5004 | 68 | |250000 | 4047 | 4521 | 4976 | 69 | |500000 | 3087 | 4013 | 4900 | 70 | |1000000 | 2090 | 3449 | 4803 | 71 | |1400000 | 1672 | 3043 | 4738 | 72 | |2000000 | - | 2593 | 4626 | 73 | |4000000 | - | 1748 | 4208 | 74 | |5500000 | - | 1389 | 3975 | 75 | |8000000 | - | - | 3565 | 76 | |16000000 | - | - | 2679 | 77 | |29000000 | - | - | 1855 | 78 | 79 | 2. GPU memory cost of different parallel methods (GB per GPU), Tesla V100 32GB * 8. (Smaller is better) 80 | 81 | | Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | 82 | | :--- | :--- | :--- | :--- | 83 | |125000 | 7358 | 5306 | 4868 | 84 | |250000 | 9940 | 5826 | 5004 | 85 | |500000 | 14220 | 7114 | 5202 | 86 | |1000000 | 23708 | 9966 | 5620 | 87 | |1400000 | 32252 | 11178 | 6056 | 88 | |2000000 | - | 13978 | 6472 | 89 | |4000000 | - | 23238 | 8284 | 90 | |5500000 | - | 32188 | 9854 | 91 | |8000000 | - | - | 12310 | 92 | |16000000 | - | - | 19950 | 93 | |29000000 | - | - | 32324 | 94 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/eval/__init__.py -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | 7 | from backbones import get_model 8 | 9 | 10 | @torch.no_grad() 11 | def inference(weight, name, img): 12 | if img is None: 13 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8) 14 | else: 15 | img = cv2.imread(img) 16 | img = cv2.resize(img, (112, 112)) 17 | 18 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 19 | img = np.transpose(img, (2, 0, 1)) 20 | img = torch.from_numpy(img).unsqueeze(0).float() 21 | img.div_(255).sub_(0.5).div_(0.5) 22 | net = get_model(name, fp16=False) 23 | net.load_state_dict(torch.load(weight)) 24 | net.eval() 25 | feat = net(img).numpy() 26 | print(feat) 27 | 28 | 29 | if __name__ == "__main__": 30 | parser = argparse.ArgumentParser(description='PyTorch ArcFace Training') 31 | parser.add_argument('--network', type=str, default='r50', help='backbone network') 32 | parser.add_argument('--weight', type=str, default='') 33 | parser.add_argument('--img', type=str, default=None) 34 | args = parser.parse_args() 35 | inference(args.weight, args.network, args.img) 36 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | def get_loss(name): 6 | if name == "cosface": 7 | return CosFace() 8 | elif name == "arcface": 9 | return ArcFace() 10 | else: 11 | raise ValueError() 12 | 13 | 14 | class CosFace(nn.Module): 15 | def __init__(self, s=64.0, m=0.40): 16 | super(CosFace, self).__init__() 17 | self.s = s 18 | self.m = m 19 | 20 | def forward(self, cosine, label): 21 | index = torch.where(label != -1)[0] 22 | m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device) 23 | m_hot.scatter_(1, label[index, None], self.m) 24 | cosine[index] -= m_hot 25 | ret = cosine * self.s 26 | return ret 27 | 28 | 29 | class ArcFace(nn.Module): 30 | def __init__(self, s=64.0, m=0.5): 31 | super(ArcFace, self).__init__() 32 | self.s = s 33 | self.m = m 34 | 35 | def forward(self, cosine: torch.Tensor, label): 36 | index = torch.where(label != -1)[0] 37 | m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device) 38 | m_hot.scatter_(1, label[index, None], self.m) 39 | cosine.acos_() 40 | cosine[index] += m_hot 41 | cosine.cos_().mul_(self.s) 42 | return cosine 43 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/requirement.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | easydict 3 | mxnet 4 | onnx 5 | sklearn 6 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r50 2 | ps -ef | grep "train" | grep -v grep | awk '{print "kill -9 "$2}' | sh 3 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/torch2onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnx 3 | import torch 4 | 5 | 6 | def convert_onnx(net, path_module, output, opset=11, simplify=False): 7 | assert isinstance(net, torch.nn.Module) 8 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) 9 | img = img.astype(np.float) 10 | img = (img / 255. - 0.5) / 0.5 # torch style norm 11 | img = img.transpose((2, 0, 1)) 12 | img = torch.from_numpy(img).unsqueeze(0).float() 13 | 14 | weight = torch.load(path_module) 15 | net.load_state_dict(weight) 16 | net.eval() 17 | torch.onnx.export(net, img, output, keep_initializers_as_inputs=False, verbose=False, opset_version=opset) 18 | model = onnx.load(output) 19 | graph = model.graph 20 | graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None' 21 | if simplify: 22 | from onnxsim import simplify 23 | model, check = simplify(model) 24 | assert check, "Simplified ONNX model could not be validated" 25 | onnx.save(model, output) 26 | 27 | 28 | if __name__ == '__main__': 29 | import os 30 | import argparse 31 | from backbones import get_model 32 | 33 | parser = argparse.ArgumentParser(description='ArcFace PyTorch to onnx') 34 | parser.add_argument('input', type=str, help='input backbone.pth file or path') 35 | parser.add_argument('--output', type=str, default=None, help='output onnx path') 36 | parser.add_argument('--network', type=str, default=None, help='backbone network') 37 | parser.add_argument('--simplify', type=bool, default=False, help='onnx simplify') 38 | args = parser.parse_args() 39 | input_file = args.input 40 | if os.path.isdir(input_file): 41 | input_file = os.path.join(input_file, "backbone.pth") 42 | assert os.path.exists(input_file) 43 | model_name = os.path.basename(os.path.dirname(input_file)).lower() 44 | params = model_name.split("_") 45 | if len(params) >= 3 and params[1] in ('arcface', 'cosface'): 46 | if args.network is None: 47 | args.network = params[2] 48 | assert args.network is not None 49 | print(args) 50 | backbone_onnx = get_model(args.network, dropout=0) 51 | 52 | output_path = args.output 53 | if output_path is None: 54 | output_path = os.path.join(os.path.dirname(__file__), 'onnx') 55 | if not os.path.exists(output_path): 56 | os.makedirs(output_path) 57 | assert os.path.isdir(output_path) 58 | output_file = os.path.join(output_path, "%s.onnx" % model_name) 59 | convert_onnx(backbone_onnx, input_file, output_file, simplify=args.simplify) 60 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/__init__.py -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/plot.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | from pathlib import Path 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap 10 | from prettytable import PrettyTable 11 | from sklearn.metrics import roc_curve, auc 12 | 13 | image_path = "/data/anxiang/IJB_release/IJBC" 14 | files = [ 15 | "./ms1mv3_arcface_r100/ms1mv3_arcface_r100/ijbc.npy" 16 | ] 17 | 18 | 19 | def read_template_pair_list(path): 20 | pairs = pd.read_csv(path, sep=' ', header=None).values 21 | t1 = pairs[:, 0].astype(np.int) 22 | t2 = pairs[:, 1].astype(np.int) 23 | label = pairs[:, 2].astype(np.int) 24 | return t1, t2, label 25 | 26 | 27 | p1, p2, label = read_template_pair_list( 28 | os.path.join('%s/meta' % image_path, 29 | '%s_template_pair_label.txt' % 'ijbc')) 30 | 31 | methods = [] 32 | scores = [] 33 | for file in files: 34 | methods.append(file.split('/')[-2]) 35 | scores.append(np.load(file)) 36 | 37 | methods = np.array(methods) 38 | scores = dict(zip(methods, scores)) 39 | colours = dict( 40 | zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2'))) 41 | x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1] 42 | tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels]) 43 | fig = plt.figure() 44 | for method in methods: 45 | fpr, tpr, _ = roc_curve(label, scores[method]) 46 | roc_auc = auc(fpr, tpr) 47 | fpr = np.flipud(fpr) 48 | tpr = np.flipud(tpr) # select largest tpr at same fpr 49 | plt.plot(fpr, 50 | tpr, 51 | color=colours[method], 52 | lw=1, 53 | label=('[%s (AUC = %0.4f %%)]' % 54 | (method.split('-')[-1], roc_auc * 100))) 55 | tpr_fpr_row = [] 56 | tpr_fpr_row.append("%s-%s" % (method, "IJBC")) 57 | for fpr_iter in np.arange(len(x_labels)): 58 | _, min_index = min( 59 | list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr))))) 60 | tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100)) 61 | tpr_fpr_table.add_row(tpr_fpr_row) 62 | plt.xlim([10 ** -6, 0.1]) 63 | plt.ylim([0.3, 1.0]) 64 | plt.grid(linestyle='--', linewidth=1) 65 | plt.xticks(x_labels) 66 | plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) 67 | plt.xscale('log') 68 | plt.xlabel('False Positive Rate') 69 | plt.ylabel('True Positive Rate') 70 | plt.title('ROC on IJB') 71 | plt.legend(loc="lower right") 72 | print(tpr_fpr_table) 73 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/utils_amp.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import torch 4 | 5 | if torch.__version__ < '1.9': 6 | Iterable = torch._six.container_abcs.Iterable 7 | else: 8 | import collections 9 | 10 | Iterable = collections.abc.Iterable 11 | from torch.cuda.amp import GradScaler 12 | 13 | 14 | class _MultiDeviceReplicator(object): 15 | """ 16 | Lazily serves copies of a tensor to requested devices. Copies are cached per-device. 17 | """ 18 | 19 | def __init__(self, master_tensor: torch.Tensor) -> None: 20 | assert master_tensor.is_cuda 21 | self.master = master_tensor 22 | self._per_device_tensors: Dict[torch.device, torch.Tensor] = {} 23 | 24 | def get(self, device) -> torch.Tensor: 25 | retval = self._per_device_tensors.get(device, None) 26 | if retval is None: 27 | retval = self.master.to(device=device, non_blocking=True, copy=True) 28 | self._per_device_tensors[device] = retval 29 | return retval 30 | 31 | 32 | class MaxClipGradScaler(GradScaler): 33 | def __init__(self, init_scale, max_scale: float, growth_interval=100): 34 | GradScaler.__init__(self, init_scale=init_scale, growth_interval=growth_interval) 35 | self.max_scale = max_scale 36 | 37 | def scale_clip(self): 38 | if self.get_scale() == self.max_scale: 39 | self.set_growth_factor(1) 40 | elif self.get_scale() < self.max_scale: 41 | self.set_growth_factor(2) 42 | elif self.get_scale() > self.max_scale: 43 | self._scale.fill_(self.max_scale) 44 | self.set_growth_factor(1) 45 | 46 | def scale(self, outputs): 47 | """ 48 | Multiplies ('scales') a tensor or list of tensors by the scale factor. 49 | 50 | Returns scaled outputs. If this instance of :class:`GradScaler` is not enabled, outputs are returned 51 | unmodified. 52 | 53 | Arguments: 54 | outputs (Tensor or iterable of Tensors): Outputs to scale. 55 | """ 56 | if not self._enabled: 57 | return outputs 58 | self.scale_clip() 59 | # Short-circuit for the common case. 60 | if isinstance(outputs, torch.Tensor): 61 | assert outputs.is_cuda 62 | if self._scale is None: 63 | self._lazy_init_scale_growth_tracker(outputs.device) 64 | assert self._scale is not None 65 | return outputs * self._scale.to(device=outputs.device, non_blocking=True) 66 | 67 | # Invoke the more complex machinery only if we're treating multiple outputs. 68 | stash: List[_MultiDeviceReplicator] = [] # holds a reference that can be overwritten by apply_scale 69 | 70 | def apply_scale(val): 71 | if isinstance(val, torch.Tensor): 72 | assert val.is_cuda 73 | if len(stash) == 0: 74 | if self._scale is None: 75 | self._lazy_init_scale_growth_tracker(val.device) 76 | assert self._scale is not None 77 | stash.append(_MultiDeviceReplicator(self._scale)) 78 | return val * stash[0].get(val.device) 79 | elif isinstance(val, Iterable): 80 | iterable = map(apply_scale, val) 81 | if isinstance(val, list) or isinstance(val, tuple): 82 | return type(val)(iterable) 83 | else: 84 | return iterable 85 | else: 86 | raise ValueError("outputs must be a Tensor or an iterable of Tensors") 87 | 88 | return apply_scale(outputs) 89 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/utils_callbacks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import time 4 | from typing import List 5 | 6 | import torch 7 | 8 | from eval import verification 9 | from utils.utils_logging import AverageMeter 10 | 11 | 12 | class CallBackVerification(object): 13 | def __init__(self, frequent, rank, val_targets, rec_prefix, image_size=(112, 112)): 14 | self.frequent: int = frequent 15 | self.rank: int = rank 16 | self.highest_acc: float = 0.0 17 | self.highest_acc_list: List[float] = [0.0] * len(val_targets) 18 | self.ver_list: List[object] = [] 19 | self.ver_name_list: List[str] = [] 20 | if self.rank is 0: 21 | self.init_dataset(val_targets=val_targets, data_dir=rec_prefix, image_size=image_size) 22 | 23 | def ver_test(self, backbone: torch.nn.Module, global_step: int): 24 | results = [] 25 | for i in range(len(self.ver_list)): 26 | acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( 27 | self.ver_list[i], backbone, 10, 10) 28 | logging.info('[%s][%d]XNorm: %f' % (self.ver_name_list[i], global_step, xnorm)) 29 | logging.info('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (self.ver_name_list[i], global_step, acc2, std2)) 30 | if acc2 > self.highest_acc_list[i]: 31 | self.highest_acc_list[i] = acc2 32 | logging.info( 33 | '[%s][%d]Accuracy-Highest: %1.5f' % (self.ver_name_list[i], global_step, self.highest_acc_list[i])) 34 | results.append(acc2) 35 | 36 | def init_dataset(self, val_targets, data_dir, image_size): 37 | for name in val_targets: 38 | path = os.path.join(data_dir, name + ".bin") 39 | if os.path.exists(path): 40 | data_set = verification.load_bin(path, image_size) 41 | self.ver_list.append(data_set) 42 | self.ver_name_list.append(name) 43 | 44 | def __call__(self, num_update, backbone: torch.nn.Module): 45 | if self.rank is 0 and num_update > 0 and num_update % self.frequent == 0: 46 | backbone.eval() 47 | self.ver_test(backbone, num_update) 48 | backbone.train() 49 | 50 | 51 | class CallBackLogging(object): 52 | def __init__(self, frequent, rank, total_step, batch_size, world_size, writer=None): 53 | self.frequent: int = frequent 54 | self.rank: int = rank 55 | self.time_start = time.time() 56 | self.total_step: int = total_step 57 | self.batch_size: int = batch_size 58 | self.world_size: int = world_size 59 | self.writer = writer 60 | 61 | self.init = False 62 | self.tic = 0 63 | 64 | def __call__(self, 65 | global_step: int, 66 | loss: AverageMeter, 67 | epoch: int, 68 | fp16: bool, 69 | learning_rate: float, 70 | grad_scaler: torch.cuda.amp.GradScaler): 71 | if self.rank == 0 and global_step > 0 and global_step % self.frequent == 0: 72 | if self.init: 73 | try: 74 | speed: float = self.frequent * self.batch_size / (time.time() - self.tic) 75 | speed_total = speed * self.world_size 76 | except ZeroDivisionError: 77 | speed_total = float('inf') 78 | 79 | time_now = (time.time() - self.time_start) / 3600 80 | time_total = time_now / ((global_step + 1) / self.total_step) 81 | time_for_end = time_total - time_now 82 | if self.writer is not None: 83 | self.writer.add_scalar('time_for_end', time_for_end, global_step) 84 | self.writer.add_scalar('learning_rate', learning_rate, global_step) 85 | self.writer.add_scalar('loss', loss.avg, global_step) 86 | if fp16: 87 | msg = "Speed %.2f samples/sec Loss %.4f LearningRate %.4f Epoch: %d Global Step: %d " \ 88 | "Fp16 Grad Scale: %2.f Required: %1.f hours" % ( 89 | speed_total, loss.avg, learning_rate, epoch, global_step, 90 | grad_scaler.get_scale(), time_for_end 91 | ) 92 | else: 93 | msg = "Speed %.2f samples/sec Loss %.4f LearningRate %.4f Epoch: %d Global Step: %d " \ 94 | "Required: %1.f hours" % ( 95 | speed_total, loss.avg, learning_rate, epoch, global_step, time_for_end 96 | ) 97 | logging.info(msg) 98 | loss.reset() 99 | self.tic = time.time() 100 | else: 101 | self.init = True 102 | self.tic = time.time() 103 | 104 | 105 | class CallBackModelCheckpoint(object): 106 | def __init__(self, rank, output="./"): 107 | self.rank: int = rank 108 | self.output: str = output 109 | 110 | def __call__(self, global_step, backbone, partial_fc, ): 111 | if global_step > 100 and self.rank == 0: 112 | path_module = os.path.join(self.output, "backbone.pth") 113 | torch.save(backbone.module.state_dict(), path_module) 114 | logging.info("Pytorch Model Saved in '{}'".format(path_module)) 115 | 116 | if global_step > 100 and partial_fc is not None: 117 | partial_fc.save_params() 118 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/utils_config.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os.path as osp 3 | 4 | 5 | def get_config(config_file): 6 | assert config_file.startswith('configs/'), 'config file setting must start with configs/' 7 | temp_config_name = osp.basename(config_file) 8 | temp_module_name = osp.splitext(temp_config_name)[0] 9 | config = importlib.import_module("configs.base") 10 | cfg = config.config 11 | config = importlib.import_module("configs.%s" % temp_module_name) 12 | job_cfg = config.config 13 | cfg.update(job_cfg) 14 | if cfg.output is None: 15 | cfg.output = osp.join('work_dirs', temp_module_name) 16 | return cfg -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/utils_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | 6 | class AverageMeter(object): 7 | """Computes and stores the average and current value 8 | """ 9 | 10 | def __init__(self): 11 | self.val = None 12 | self.avg = None 13 | self.sum = None 14 | self.count = None 15 | self.reset() 16 | 17 | def reset(self): 18 | self.val = 0 19 | self.avg = 0 20 | self.sum = 0 21 | self.count = 0 22 | 23 | def update(self, val, n=1): 24 | self.val = val 25 | self.sum += val * n 26 | self.count += n 27 | self.avg = self.sum / self.count 28 | 29 | 30 | def init_logging(rank, models_root): 31 | if rank == 0: 32 | log_root = logging.getLogger() 33 | log_root.setLevel(logging.INFO) 34 | formatter = logging.Formatter("Training: %(asctime)s-%(message)s") 35 | handler_file = logging.FileHandler(os.path.join(models_root, "training.log")) 36 | handler_stream = logging.StreamHandler(sys.stdout) 37 | handler_file.setFormatter(formatter) 38 | handler_stream.setFormatter(formatter) 39 | log_root.addHandler(handler_file) 40 | log_root.addHandler(handler_stream) 41 | log_root.info('rank_id: %d' % rank) 42 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/utils_os.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/Deep3DFaceRecon_pytorch/models/arcface_torch/utils/utils_os.py -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/models/losses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from kornia.geometry import warp_affine 5 | import torch.nn.functional as F 6 | 7 | def resize_n_crop(image, M, dsize=112): 8 | # image: (b, c, h, w) 9 | # M : (b, 2, 3) 10 | return warp_affine(image, M, dsize=(dsize, dsize)) 11 | 12 | ### perceptual level loss 13 | class PerceptualLoss(nn.Module): 14 | def __init__(self, recog_net, input_size=112): 15 | super(PerceptualLoss, self).__init__() 16 | self.recog_net = recog_net 17 | self.preprocess = lambda x: 2 * x - 1 18 | self.input_size=input_size 19 | def forward(imageA, imageB, M): 20 | """ 21 | 1 - cosine distance 22 | Parameters: 23 | imageA --torch.tensor (B, 3, H, W), range (0, 1) , RGB order 24 | imageB --same as imageA 25 | """ 26 | 27 | imageA = self.preprocess(resize_n_crop(imageA, M, self.input_size)) 28 | imageB = self.preprocess(resize_n_crop(imageB, M, self.input_size)) 29 | 30 | # freeze bn 31 | self.recog_net.eval() 32 | 33 | id_featureA = F.normalize(self.recog_net(imageA), dim=-1, p=2) 34 | id_featureB = F.normalize(self.recog_net(imageB), dim=-1, p=2) 35 | cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) 36 | # assert torch.sum((cosine_d > 1).float()) == 0 37 | return torch.sum(1 - cosine_d) / cosine_d.shape[0] 38 | 39 | def perceptual_loss(id_featureA, id_featureB): 40 | cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) 41 | # assert torch.sum((cosine_d > 1).float()) == 0 42 | return torch.sum(1 - cosine_d) / cosine_d.shape[0] 43 | 44 | ### image level loss 45 | def photo_loss(imageA, imageB, mask, eps=1e-6): 46 | """ 47 | l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur) 48 | Parameters: 49 | imageA --torch.tensor (B, 3, H, W), range (0, 1), RGB order 50 | imageB --same as imageA 51 | """ 52 | loss = torch.sqrt(eps + torch.sum((imageA - imageB) ** 2, dim=1, keepdims=True)) * mask 53 | loss = torch.sum(loss) / torch.max(torch.sum(mask), torch.tensor(1.0).to(mask.device)) 54 | return loss 55 | 56 | def landmark_loss(predict_lm, gt_lm, weight=None): 57 | """ 58 | weighted mse loss 59 | Parameters: 60 | predict_lm --torch.tensor (B, 68, 2) 61 | gt_lm --torch.tensor (B, 68, 2) 62 | weight --numpy.array (1, 68) 63 | """ 64 | if not weight: 65 | weight = np.ones([68]) 66 | weight[28:31] = 20 67 | weight[-8:] = 20 68 | weight = np.expand_dims(weight, 0) 69 | weight = torch.tensor(weight).to(predict_lm.device) 70 | loss = torch.sum((predict_lm - gt_lm)**2, dim=-1) * weight 71 | loss = torch.sum(loss) / (predict_lm.shape[0] * predict_lm.shape[1]) 72 | return loss 73 | 74 | 75 | ### regulization 76 | def reg_loss(coeffs_dict, opt=None): 77 | """ 78 | l2 norm without the sqrt, from yu's implementation (mse) 79 | tf.nn.l2_loss https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss 80 | Parameters: 81 | coeffs_dict -- a dict of torch.tensors , keys: id, exp, tex, angle, gamma, trans 82 | 83 | """ 84 | # coefficient regularization to ensure plausible 3d faces 85 | if opt: 86 | w_id, w_exp, w_tex = opt.w_id, opt.w_exp, opt.w_tex 87 | else: 88 | w_id, w_exp, w_tex = 1, 1, 1, 1 89 | creg_loss = w_id * torch.sum(coeffs_dict['id'] ** 2) + \ 90 | w_exp * torch.sum(coeffs_dict['exp'] ** 2) + \ 91 | w_tex * torch.sum(coeffs_dict['tex'] ** 2) 92 | creg_loss = creg_loss / coeffs_dict['id'].shape[0] 93 | 94 | # gamma regularization to ensure a nearly-monochromatic light 95 | gamma = coeffs_dict['gamma'].reshape([-1, 3, 9]) 96 | gamma_mean = torch.mean(gamma, dim=1, keepdims=True) 97 | gamma_loss = torch.mean((gamma - gamma_mean) ** 2) 98 | 99 | return creg_loss, gamma_loss 100 | 101 | def reflectance_loss(texture, mask): 102 | """ 103 | minimize texture variance (mse), albedo regularization to ensure an uniform skin albedo 104 | Parameters: 105 | texture --torch.tensor, (B, N, 3) 106 | mask --torch.tensor, (N), 1 or 0 107 | 108 | """ 109 | mask = mask.reshape([1, mask.shape[0], 1]) 110 | texture_mean = torch.sum(mask * texture, dim=1, keepdims=True) / torch.sum(mask) 111 | loss = torch.sum(((texture - texture_mean) * mask)**2) / (texture.shape[0] * torch.sum(mask)) 112 | return loss 113 | 114 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/options/__init__.py: -------------------------------------------------------------------------------- 1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test).""" 2 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/options/inference_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class InferenceOptions(BaseOptions): 5 | """This class includes test options. 6 | 7 | It also includes shared options defined in BaseOptions. 8 | """ 9 | 10 | def initialize(self, parser): 11 | parser = BaseOptions.initialize(self, parser) # define shared options 12 | parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 13 | parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') 14 | 15 | parser.add_argument('--input_dir', type=str, help='the folder of the input files') 16 | parser.add_argument('--keypoint_dir', type=str, help='the folder of the keypoint files') 17 | parser.add_argument('--output_dir', type=str, default='mp4', help='the output dir to save the extracted coefficients') 18 | parser.add_argument('--save_split_files', action='store_true', help='save split files or not') 19 | parser.add_argument('--inference_batch_size', type=int, default=8) 20 | 21 | # Dropout and Batchnorm has different behavior during training and test. 22 | self.isTrain = False 23 | return parser 24 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/options/test_options.py: -------------------------------------------------------------------------------- 1 | """This script contains the test options for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | from .base_options import BaseOptions 5 | 6 | 7 | class TestOptions(BaseOptions): 8 | """This class includes test options. 9 | 10 | It also includes shared options defined in BaseOptions. 11 | """ 12 | 13 | def initialize(self, parser): 14 | parser = BaseOptions.initialize(self, parser) # define shared options 15 | parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 16 | parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') 17 | parser.add_argument('--img_folder', type=str, default='examples', help='folder for test images.') 18 | 19 | # Dropout and Batchnorm has different behavior during training and test. 20 | self.isTrain = False 21 | return parser 22 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes a miscellaneous collection of useful helper functions.""" 2 | from .util import * 3 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/generate_list.py: -------------------------------------------------------------------------------- 1 | """This script is to generate training list files for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import os 5 | 6 | # save path to training data 7 | def write_list(lms_list, imgs_list, msks_list, mode='train',save_folder='datalist', save_name=''): 8 | save_path = os.path.join(save_folder, mode) 9 | if not os.path.isdir(save_path): 10 | os.makedirs(save_path) 11 | with open(os.path.join(save_path, save_name + 'landmarks.txt'), 'w') as fd: 12 | fd.writelines([i + '\n' for i in lms_list]) 13 | 14 | with open(os.path.join(save_path, save_name + 'images.txt'), 'w') as fd: 15 | fd.writelines([i + '\n' for i in imgs_list]) 16 | 17 | with open(os.path.join(save_path, save_name + 'masks.txt'), 'w') as fd: 18 | fd.writelines([i + '\n' for i in msks_list]) 19 | 20 | # check if the path is valid 21 | def check_list(rlms_list, rimgs_list, rmsks_list): 22 | lms_list, imgs_list, msks_list = [], [], [] 23 | for i in range(len(rlms_list)): 24 | flag = 'false' 25 | lm_path = rlms_list[i] 26 | im_path = rimgs_list[i] 27 | msk_path = rmsks_list[i] 28 | if os.path.isfile(lm_path) and os.path.isfile(im_path) and os.path.isfile(msk_path): 29 | flag = 'true' 30 | lms_list.append(rlms_list[i]) 31 | imgs_list.append(rimgs_list[i]) 32 | msks_list.append(rmsks_list[i]) 33 | print(i, rlms_list[i], flag) 34 | return lms_list, imgs_list, msks_list 35 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import meta, h3, table, tr, td, p, a, img, br 3 | import os 4 | 5 | 6 | class HTML: 7 | """This HTML class allows us to save images and write texts into a single HTML file. 8 | 9 | It consists of functions such as (add a text header to the HTML file), 10 | (add a row of images to the HTML file), and (save the HTML to the disk). 11 | It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API. 12 | """ 13 | 14 | def __init__(self, web_dir, title, refresh=0): 15 | """Initialize the HTML classes 16 | 17 | Parameters: 18 | web_dir (str) -- a directory that stores the webpage. HTML file will be created at /index.html; images will be saved at 0: 32 | with self.doc.head: 33 | meta(http_equiv="refresh", content=str(refresh)) 34 | 35 | def get_image_dir(self): 36 | """Return the directory that stores images""" 37 | return self.img_dir 38 | 39 | def add_header(self, text): 40 | """Insert a header to the HTML file 41 | 42 | Parameters: 43 | text (str) -- the header text 44 | """ 45 | with self.doc: 46 | h3(text) 47 | 48 | def add_images(self, ims, txts, links, width=400): 49 | """add images to the HTML file 50 | 51 | Parameters: 52 | ims (str list) -- a list of image paths 53 | txts (str list) -- a list of image names shown on the website 54 | links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page 55 | """ 56 | self.t = table(border=1, style="table-layout: fixed;") # Insert a table 57 | self.doc.add(self.t) 58 | with self.t: 59 | with tr(): 60 | for im, txt, link in zip(ims, txts, links): 61 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 62 | with p(): 63 | with a(href=os.path.join('images', link)): 64 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 65 | br() 66 | p(txt) 67 | 68 | def save(self): 69 | """save the current content to the HMTL file""" 70 | html_file = '%s/index.html' % self.web_dir 71 | f = open(html_file, 'wt') 72 | f.write(self.doc.render()) 73 | f.close() 74 | 75 | 76 | if __name__ == '__main__': # we show an example usage here. 77 | html = HTML('web/', 'test_html') 78 | html.add_header('hello world') 79 | 80 | ims, txts, links = [], [], [] 81 | for n in range(4): 82 | ims.append('image_%d.png' % n) 83 | txts.append('text_%d' % n) 84 | links.append('image_%d.png' % n) 85 | html.add_images(ims, txts, links) 86 | html.save() 87 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/load_mats.py: -------------------------------------------------------------------------------- 1 | """This script is to load 3D face model for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import numpy as np 5 | from PIL import Image 6 | from scipy.io import loadmat, savemat 7 | from array import array 8 | import os.path as osp 9 | 10 | # load expression basis 11 | def LoadExpBasis(bfm_folder='BFM'): 12 | n_vertex = 53215 13 | Expbin = open(osp.join(bfm_folder, 'Exp_Pca.bin'), 'rb') 14 | exp_dim = array('i') 15 | exp_dim.fromfile(Expbin, 1) 16 | expMU = array('f') 17 | expPC = array('f') 18 | expMU.fromfile(Expbin, 3*n_vertex) 19 | expPC.fromfile(Expbin, 3*exp_dim[0]*n_vertex) 20 | Expbin.close() 21 | 22 | expPC = np.array(expPC) 23 | expPC = np.reshape(expPC, [exp_dim[0], -1]) 24 | expPC = np.transpose(expPC) 25 | 26 | expEV = np.loadtxt(osp.join(bfm_folder, 'std_exp.txt')) 27 | 28 | return expPC, expEV 29 | 30 | 31 | # transfer original BFM09 to our face model 32 | def transferBFM09(bfm_folder='BFM'): 33 | print('Transfer BFM09 to BFM_model_front......') 34 | original_BFM = loadmat(osp.join(bfm_folder, '01_MorphableModel.mat')) 35 | shapePC = original_BFM['shapePC'] # shape basis 36 | shapeEV = original_BFM['shapeEV'] # corresponding eigen value 37 | shapeMU = original_BFM['shapeMU'] # mean face 38 | texPC = original_BFM['texPC'] # texture basis 39 | texEV = original_BFM['texEV'] # eigen value 40 | texMU = original_BFM['texMU'] # mean texture 41 | 42 | expPC, expEV = LoadExpBasis(bfm_folder) 43 | 44 | # transfer BFM09 to our face model 45 | 46 | idBase = shapePC*np.reshape(shapeEV, [-1, 199]) 47 | idBase = idBase/1e5 # unify the scale to decimeter 48 | idBase = idBase[:, :80] # use only first 80 basis 49 | 50 | exBase = expPC*np.reshape(expEV, [-1, 79]) 51 | exBase = exBase/1e5 # unify the scale to decimeter 52 | exBase = exBase[:, :64] # use only first 64 basis 53 | 54 | texBase = texPC*np.reshape(texEV, [-1, 199]) 55 | texBase = texBase[:, :80] # use only first 80 basis 56 | 57 | # our face model is cropped along face landmarks and contains only 35709 vertex. 58 | # original BFM09 contains 53490 vertex, and expression basis provided by Guo et al. contains 53215 vertex. 59 | # thus we select corresponding vertex to get our face model. 60 | 61 | index_exp = loadmat(osp.join(bfm_folder, 'BFM_front_idx.mat')) 62 | index_exp = index_exp['idx'].astype(np.int32) - 1 # starts from 0 (to 53215) 63 | 64 | index_shape = loadmat(osp.join(bfm_folder, 'BFM_exp_idx.mat')) 65 | index_shape = index_shape['trimIndex'].astype( 66 | np.int32) - 1 # starts from 0 (to 53490) 67 | index_shape = index_shape[index_exp] 68 | 69 | idBase = np.reshape(idBase, [-1, 3, 80]) 70 | idBase = idBase[index_shape, :, :] 71 | idBase = np.reshape(idBase, [-1, 80]) 72 | 73 | texBase = np.reshape(texBase, [-1, 3, 80]) 74 | texBase = texBase[index_shape, :, :] 75 | texBase = np.reshape(texBase, [-1, 80]) 76 | 77 | exBase = np.reshape(exBase, [-1, 3, 64]) 78 | exBase = exBase[index_exp, :, :] 79 | exBase = np.reshape(exBase, [-1, 64]) 80 | 81 | meanshape = np.reshape(shapeMU, [-1, 3])/1e5 82 | meanshape = meanshape[index_shape, :] 83 | meanshape = np.reshape(meanshape, [1, -1]) 84 | 85 | meantex = np.reshape(texMU, [-1, 3]) 86 | meantex = meantex[index_shape, :] 87 | meantex = np.reshape(meantex, [1, -1]) 88 | 89 | # other info contains triangles, region used for computing photometric loss, 90 | # region used for skin texture regularization, and 68 landmarks index etc. 91 | other_info = loadmat(osp.join(bfm_folder, 'facemodel_info.mat')) 92 | frontmask2_idx = other_info['frontmask2_idx'] 93 | skinmask = other_info['skinmask'] 94 | keypoints = other_info['keypoints'] 95 | point_buf = other_info['point_buf'] 96 | tri = other_info['tri'] 97 | tri_mask2 = other_info['tri_mask2'] 98 | 99 | # save our face model 100 | savemat(osp.join(bfm_folder, 'BFM_model_front.mat'), {'meanshape': meanshape, 'meantex': meantex, 'idBase': idBase, 'exBase': exBase, 'texBase': texBase, 101 | 'tri': tri, 'point_buf': point_buf, 'tri_mask2': tri_mask2, 'keypoints': keypoints, 'frontmask2_idx': frontmask2_idx, 'skinmask': skinmask}) 102 | 103 | 104 | # load landmarks for standard face, which is used for image preprocessing 105 | def load_lm3d(bfm_folder): 106 | 107 | Lm3D = loadmat(osp.join(bfm_folder, 'similarity_Lm3D_all.mat')) 108 | Lm3D = Lm3D['lm'] 109 | 110 | # calculate 5 facial landmarks using 68 landmarks 111 | lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1 112 | Lm3D = np.stack([Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean( 113 | Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :], Lm3D[lm_idx[6], :]], axis=0) 114 | Lm3D = Lm3D[[1, 2, 0, 3, 4], :] 115 | 116 | return Lm3D 117 | 118 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/nvdiffrast.py: -------------------------------------------------------------------------------- 1 | """This script is the differentiable renderer for Deep3DFaceRecon_pytorch 2 | Attention, antialiasing step is missing in current version. 3 | """ 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | import kornia 8 | from kornia.geometry.camera import pixel2cam 9 | import numpy as np 10 | from typing import List 11 | import nvdiffrast.torch as dr 12 | from scipy.io import loadmat 13 | from torch import nn 14 | 15 | def ndc_projection(x=0.1, n=1.0, f=50.0): 16 | return np.array([[n/x, 0, 0, 0], 17 | [ 0, n/-x, 0, 0], 18 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 19 | [ 0, 0, -1, 0]]).astype(np.float32) 20 | 21 | class MeshRenderer(nn.Module): 22 | def __init__(self, 23 | rasterize_fov, 24 | znear=0.1, 25 | zfar=10, 26 | rasterize_size=224): 27 | super(MeshRenderer, self).__init__() 28 | 29 | x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear 30 | self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul( 31 | torch.diag(torch.tensor([1., -1, -1, 1]))) 32 | self.rasterize_size = rasterize_size 33 | self.glctx = None 34 | 35 | def forward(self, vertex, tri, feat=None): 36 | """ 37 | Return: 38 | mask -- torch.tensor, size (B, 1, H, W) 39 | depth -- torch.tensor, size (B, 1, H, W) 40 | features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None 41 | 42 | Parameters: 43 | vertex -- torch.tensor, size (B, N, 3) 44 | tri -- torch.tensor, size (B, M, 3) or (M, 3), triangles 45 | feat(optional) -- torch.tensor, size (B, C), features 46 | """ 47 | device = vertex.device 48 | rsize = int(self.rasterize_size) 49 | ndc_proj = self.ndc_proj.to(device) 50 | # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v 51 | if vertex.shape[-1] == 3: 52 | vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1) 53 | vertex[..., 1] = -vertex[..., 1] 54 | 55 | 56 | vertex_ndc = vertex @ ndc_proj.t() 57 | if self.glctx is None: 58 | self.glctx = dr.RasterizeGLContext(device=device) 59 | print("create glctx on device cuda:%d"%device.index) 60 | 61 | ranges = None 62 | if isinstance(tri, List) or len(tri.shape) == 3: 63 | vum = vertex_ndc.shape[1] 64 | fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device) 65 | fstartidx = torch.cumsum(fnum, dim=0) - fnum 66 | ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu() 67 | for i in range(tri.shape[0]): 68 | tri[i] = tri[i] + i*vum 69 | vertex_ndc = torch.cat(vertex_ndc, dim=0) 70 | tri = torch.cat(tri, dim=0) 71 | 72 | # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3] 73 | tri = tri.type(torch.int32).contiguous() 74 | rast_out, _ = dr.rasterize(self.glctx, vertex_ndc.contiguous(), tri, resolution=[rsize, rsize], ranges=ranges) 75 | 76 | depth, _ = dr.interpolate(vertex.reshape([-1,4])[...,2].unsqueeze(1).contiguous(), rast_out, tri) 77 | depth = depth.permute(0, 3, 1, 2) 78 | mask = (rast_out[..., 3] > 0).float().unsqueeze(1) 79 | depth = mask * depth 80 | 81 | 82 | image = None 83 | if feat is not None: 84 | image, _ = dr.interpolate(feat, rast_out, tri) 85 | image = image.permute(0, 3, 1, 2) 86 | image = mask * image 87 | 88 | return mask, depth, image 89 | 90 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/skin_mask.py: -------------------------------------------------------------------------------- 1 | """This script is to generate skin attention mask for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import math 5 | import numpy as np 6 | import os 7 | import cv2 8 | 9 | class GMM: 10 | def __init__(self, dim, num, w, mu, cov, cov_det, cov_inv): 11 | self.dim = dim # feature dimension 12 | self.num = num # number of Gaussian components 13 | self.w = w # weights of Gaussian components (a list of scalars) 14 | self.mu= mu # mean of Gaussian components (a list of 1xdim vectors) 15 | self.cov = cov # covariance matrix of Gaussian components (a list of dimxdim matrices) 16 | self.cov_det = cov_det # pre-computed determinet of covariance matrices (a list of scalars) 17 | self.cov_inv = cov_inv # pre-computed inverse covariance matrices (a list of dimxdim matrices) 18 | 19 | self.factor = [0]*num 20 | for i in range(self.num): 21 | self.factor[i] = (2*math.pi)**(self.dim/2) * self.cov_det[i]**0.5 22 | 23 | def likelihood(self, data): 24 | assert(data.shape[1] == self.dim) 25 | N = data.shape[0] 26 | lh = np.zeros(N) 27 | 28 | for i in range(self.num): 29 | data_ = data - self.mu[i] 30 | 31 | tmp = np.matmul(data_,self.cov_inv[i]) * data_ 32 | tmp = np.sum(tmp,axis=1) 33 | power = -0.5 * tmp 34 | 35 | p = np.array([math.exp(power[j]) for j in range(N)]) 36 | p = p/self.factor[i] 37 | lh += p*self.w[i] 38 | 39 | return lh 40 | 41 | 42 | def _rgb2ycbcr(rgb): 43 | m = np.array([[65.481, 128.553, 24.966], 44 | [-37.797, -74.203, 112], 45 | [112, -93.786, -18.214]]) 46 | shape = rgb.shape 47 | rgb = rgb.reshape((shape[0] * shape[1], 3)) 48 | ycbcr = np.dot(rgb, m.transpose() / 255.) 49 | ycbcr[:, 0] += 16. 50 | ycbcr[:, 1:] += 128. 51 | return ycbcr.reshape(shape) 52 | 53 | 54 | def _bgr2ycbcr(bgr): 55 | rgb = bgr[..., ::-1] 56 | return _rgb2ycbcr(rgb) 57 | 58 | 59 | gmm_skin_w = [0.24063933, 0.16365987, 0.26034665, 0.33535415] 60 | gmm_skin_mu = [np.array([113.71862, 103.39613, 164.08226]), 61 | np.array([150.19858, 105.18467, 155.51428]), 62 | np.array([183.92976, 107.62468, 152.71820]), 63 | np.array([114.90524, 113.59782, 151.38217])] 64 | gmm_skin_cov_det = [5692842.5, 5851930.5, 2329131., 1585971.] 65 | gmm_skin_cov_inv = [np.array([[0.0019472069, 0.0020450759, -0.00060243998],[0.0020450759, 0.017700525, 0.0051420014],[-0.00060243998, 0.0051420014, 0.0081308950]]), 66 | np.array([[0.0027110141, 0.0011036990, 0.0023122299],[0.0011036990, 0.010707724, 0.010742856],[0.0023122299, 0.010742856, 0.017481629]]), 67 | np.array([[0.0048026871, 0.00022935172, 0.0077668377],[0.00022935172, 0.011729696, 0.0081661865],[0.0077668377, 0.0081661865, 0.025374353]]), 68 | np.array([[0.0011989699, 0.0022453172, -0.0010748957],[0.0022453172, 0.047758564, 0.020332102],[-0.0010748957, 0.020332102, 0.024502251]])] 69 | 70 | gmm_skin = GMM(3, 4, gmm_skin_w, gmm_skin_mu, [], gmm_skin_cov_det, gmm_skin_cov_inv) 71 | 72 | gmm_nonskin_w = [0.12791070, 0.31130761, 0.34245777, 0.21832393] 73 | gmm_nonskin_mu = [np.array([99.200851, 112.07533, 140.20602]), 74 | np.array([110.91392, 125.52969, 130.19237]), 75 | np.array([129.75864, 129.96107, 126.96808]), 76 | np.array([112.29587, 128.85121, 129.05431])] 77 | gmm_nonskin_cov_det = [458703648., 6466488., 90611376., 133097.63] 78 | gmm_nonskin_cov_inv = [np.array([[0.00085371657, 0.00071197288, 0.00023958916],[0.00071197288, 0.0025935620, 0.00076557708],[0.00023958916, 0.00076557708, 0.0015042332]]), 79 | np.array([[0.00024650150, 0.00045542428, 0.00015019422],[0.00045542428, 0.026412144, 0.018419769],[0.00015019422, 0.018419769, 0.037497383]]), 80 | np.array([[0.00037054974, 0.00038146760, 0.00040408765],[0.00038146760, 0.0085505722, 0.0079136286],[0.00040408765, 0.0079136286, 0.010982352]]), 81 | np.array([[0.00013709733, 0.00051228428, 0.00012777430],[0.00051228428, 0.28237113, 0.10528370],[0.00012777430, 0.10528370, 0.23468947]])] 82 | 83 | gmm_nonskin = GMM(3, 4, gmm_nonskin_w, gmm_nonskin_mu, [], gmm_nonskin_cov_det, gmm_nonskin_cov_inv) 84 | 85 | prior_skin = 0.8 86 | prior_nonskin = 1 - prior_skin 87 | 88 | 89 | # calculate skin attention mask 90 | def skinmask(imbgr): 91 | im = _bgr2ycbcr(imbgr) 92 | 93 | data = im.reshape((-1,3)) 94 | 95 | lh_skin = gmm_skin.likelihood(data) 96 | lh_nonskin = gmm_nonskin.likelihood(data) 97 | 98 | tmp1 = prior_skin * lh_skin 99 | tmp2 = prior_nonskin * lh_nonskin 100 | post_skin = tmp1 / (tmp1+tmp2) # posterior probability 101 | 102 | post_skin = post_skin.reshape((im.shape[0],im.shape[1])) 103 | 104 | post_skin = np.round(post_skin*255) 105 | post_skin = post_skin.astype(np.uint8) 106 | post_skin = np.tile(np.expand_dims(post_skin,2),[1,1,3]) # reshape to H*W*3 107 | 108 | return post_skin 109 | 110 | 111 | def get_skin_mask(img_path): 112 | print('generating skin masks......') 113 | names = [i for i in sorted(os.listdir( 114 | img_path)) if 'jpg' in i or 'png' in i or 'jpeg' in i or 'PNG' in i] 115 | save_path = os.path.join(img_path, 'mask') 116 | if not os.path.isdir(save_path): 117 | os.makedirs(save_path) 118 | 119 | for i in range(0, len(names)): 120 | name = names[i] 121 | print('%05d' % (i), ' ', name) 122 | full_image_name = os.path.join(img_path, name) 123 | img = cv2.imread(full_image_name).astype(np.float32) 124 | skin_img = skinmask(img) 125 | cv2.imwrite(os.path.join(save_path, name), skin_img.astype(np.uint8)) 126 | -------------------------------------------------------------------------------- /third_part/Deep3DFaceRecon_pytorch/util/test_mean_face.txt: -------------------------------------------------------------------------------- 1 | -5.228591537475585938e+01 2 | 2.078247070312500000e-01 3 | -5.064269638061523438e+01 4 | -1.315765380859375000e+01 5 | -4.952939224243164062e+01 6 | -2.592591094970703125e+01 7 | -4.793047332763671875e+01 8 | -3.832135772705078125e+01 9 | -4.512159729003906250e+01 10 | -5.059623336791992188e+01 11 | -3.917720794677734375e+01 12 | -6.043736648559570312e+01 13 | -2.929953765869140625e+01 14 | -6.861183166503906250e+01 15 | -1.719801330566406250e+01 16 | -7.572736358642578125e+01 17 | -1.961936950683593750e+00 18 | -7.862001037597656250e+01 19 | 1.467941284179687500e+01 20 | -7.607844543457031250e+01 21 | 2.744073486328125000e+01 22 | -6.915261840820312500e+01 23 | 3.855677795410156250e+01 24 | -5.950350570678710938e+01 25 | 4.478240966796875000e+01 26 | -4.867547225952148438e+01 27 | 4.714337158203125000e+01 28 | -3.800830078125000000e+01 29 | 4.940315246582031250e+01 30 | -2.496297454833984375e+01 31 | 5.117234802246093750e+01 32 | -1.241538238525390625e+01 33 | 5.190507507324218750e+01 34 | 8.244247436523437500e-01 35 | -4.150688934326171875e+01 36 | 2.386329650878906250e+01 37 | -3.570307159423828125e+01 38 | 3.017010498046875000e+01 39 | -2.790358734130859375e+01 40 | 3.212951660156250000e+01 41 | -1.941773223876953125e+01 42 | 3.156523132324218750e+01 43 | -1.138106536865234375e+01 44 | 2.841992187500000000e+01 45 | 5.993263244628906250e+00 46 | 2.895182800292968750e+01 47 | 1.343590545654296875e+01 48 | 3.189880371093750000e+01 49 | 2.203153991699218750e+01 50 | 3.302221679687500000e+01 51 | 2.992478942871093750e+01 52 | 3.099150085449218750e+01 53 | 3.628388977050781250e+01 54 | 2.765748596191406250e+01 55 | -1.933914184570312500e+00 56 | 1.405374145507812500e+01 57 | -2.153038024902343750e+00 58 | 5.772636413574218750e+00 59 | -2.270050048828125000e+00 60 | -2.121643066406250000e+00 61 | -2.218330383300781250e+00 62 | -1.068978118896484375e+01 63 | -1.187252044677734375e+01 64 | -1.997912597656250000e+01 65 | -6.879402160644531250e+00 66 | -2.143579864501953125e+01 67 | -1.227821350097656250e+00 68 | -2.193494415283203125e+01 69 | 4.623237609863281250e+00 70 | -2.152721405029296875e+01 71 | 9.721397399902343750e+00 72 | -1.953671264648437500e+01 73 | -3.648714447021484375e+01 74 | 9.811126708984375000e+00 75 | -3.130242919921875000e+01 76 | 1.422447967529296875e+01 77 | -2.212834930419921875e+01 78 | 1.493019866943359375e+01 79 | -1.500880432128906250e+01 80 | 1.073588562011718750e+01 81 | -2.095037078857421875e+01 82 | 9.054298400878906250e+00 83 | -3.050099182128906250e+01 84 | 8.704177856445312500e+00 85 | 1.173237609863281250e+01 86 | 1.054329681396484375e+01 87 | 1.856353759765625000e+01 88 | 1.535009765625000000e+01 89 | 2.893331909179687500e+01 90 | 1.451992797851562500e+01 91 | 3.452944946289062500e+01 92 | 1.065280151367187500e+01 93 | 2.875990295410156250e+01 94 | 8.654792785644531250e+00 95 | 1.942100524902343750e+01 96 | 9.422447204589843750e+00 97 | -2.204488372802734375e+01 98 | -3.983994293212890625e+01 99 | -1.324458312988281250e+01 100 | -3.467377471923828125e+01 101 | -6.749649047851562500e+00 102 | -3.092894744873046875e+01 103 | -9.183349609375000000e-01 104 | -3.196458435058593750e+01 105 | 4.220649719238281250e+00 106 | -3.090406036376953125e+01 107 | 1.089889526367187500e+01 108 | -3.497008514404296875e+01 109 | 1.874589538574218750e+01 110 | -4.065438079833984375e+01 111 | 1.124106597900390625e+01 112 | -4.438417816162109375e+01 113 | 5.181709289550781250e+00 114 | -4.649170684814453125e+01 115 | -1.158607482910156250e+00 116 | -4.680406951904296875e+01 117 | -7.918922424316406250e+00 118 | -4.671575164794921875e+01 119 | -1.452505493164062500e+01 120 | -4.416526031494140625e+01 121 | -2.005007171630859375e+01 122 | -3.997841644287109375e+01 123 | -1.054919433593750000e+01 124 | -3.849683380126953125e+01 125 | -1.051826477050781250e+00 126 | -3.794863128662109375e+01 127 | 6.412681579589843750e+00 128 | -3.804645538330078125e+01 129 | 1.627674865722656250e+01 130 | -4.039697265625000000e+01 131 | 6.373878479003906250e+00 132 | -4.087213897705078125e+01 133 | -8.551712036132812500e-01 134 | -4.157129669189453125e+01 135 | -1.014953613281250000e+01 136 | -4.128469085693359375e+01 137 | -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/models/__init__.py -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/models/base_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.autograd import Variable 4 | from pdb import set_trace as st 5 | from IPython import embed 6 | 7 | class BaseModel(): 8 | def __init__(self): 9 | pass; 10 | 11 | def name(self): 12 | return 'BaseModel' 13 | 14 | def initialize(self, use_gpu=True): 15 | self.use_gpu = use_gpu 16 | self.Tensor = torch.cuda.FloatTensor if self.use_gpu else torch.Tensor 17 | # self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) 18 | 19 | def forward(self): 20 | pass 21 | 22 | def get_image_paths(self): 23 | pass 24 | 25 | def optimize_parameters(self): 26 | pass 27 | 28 | def get_current_visuals(self): 29 | return self.input 30 | 31 | def get_current_errors(self): 32 | return {} 33 | 34 | def save(self, label): 35 | pass 36 | 37 | # helper saving function that can be used by subclasses 38 | def save_network(self, network, path, network_label, epoch_label): 39 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 40 | save_path = os.path.join(path, save_filename) 41 | torch.save(network.state_dict(), save_path) 42 | 43 | # helper loading function that can be used by subclasses 44 | def load_network(self, network, network_label, epoch_label): 45 | # embed() 46 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 47 | save_path = os.path.join(self.save_dir, save_filename) 48 | print('Loading network from %s'%save_path) 49 | network.load_state_dict(torch.load(save_path)) 50 | 51 | def update_learning_rate(): 52 | pass 53 | 54 | def get_image_paths(self): 55 | return self.image_paths 56 | 57 | def save_done(self, flag=False): 58 | np.save(os.path.join(self.save_dir, 'done_flag'),flag) 59 | np.savetxt(os.path.join(self.save_dir, 'done_flag'),[flag,],fmt='%i') 60 | 61 | -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/models/models.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | def create_model(opt): 4 | model = None 5 | print(opt.model) 6 | from .siam_model import * 7 | model = DistModel() 8 | model.initialize(opt, opt.batchSize, ) 9 | print("model [%s] was created" % (model.name())) 10 | return model 11 | 12 | -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/util/__init__.py -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import * 3 | import os 4 | 5 | 6 | class HTML: 7 | def __init__(self, web_dir, title, image_subdir='', reflesh=0): 8 | self.title = title 9 | self.web_dir = web_dir 10 | # self.img_dir = os.path.join(self.web_dir, ) 11 | self.img_subdir = image_subdir 12 | self.img_dir = os.path.join(self.web_dir, image_subdir) 13 | if not os.path.exists(self.web_dir): 14 | os.makedirs(self.web_dir) 15 | if not os.path.exists(self.img_dir): 16 | os.makedirs(self.img_dir) 17 | # print(self.img_dir) 18 | 19 | self.doc = dominate.document(title=title) 20 | if reflesh > 0: 21 | with self.doc.head: 22 | meta(http_equiv="reflesh", content=str(reflesh)) 23 | 24 | def get_image_dir(self): 25 | return self.img_dir 26 | 27 | def add_header(self, str): 28 | with self.doc: 29 | h3(str) 30 | 31 | def add_table(self, border=1): 32 | self.t = table(border=border, style="table-layout: fixed;") 33 | self.doc.add(self.t) 34 | 35 | def add_images(self, ims, txts, links, width=400): 36 | self.add_table() 37 | with self.t: 38 | with tr(): 39 | for im, txt, link in zip(ims, txts, links): 40 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 41 | with p(): 42 | with a(href=os.path.join(link)): 43 | img(style="width:%dpx" % width, src=os.path.join(im)) 44 | br() 45 | p(txt) 46 | 47 | def save(self,file='index'): 48 | html_file = '%s/%s.html' % (self.web_dir,file) 49 | f = open(html_file, 'wt') 50 | f.write(self.doc.render()) 51 | f.close() 52 | 53 | 54 | if __name__ == '__main__': 55 | html = HTML('web/', 'test_html') 56 | html.add_header('hello world') 57 | 58 | ims = [] 59 | txts = [] 60 | links = [] 61 | for n in range(4): 62 | ims.append('image_%d.png' % n) 63 | txts.append('text_%d' % n) 64 | links.append('image_%d.png' % n) 65 | html.add_images(ims, txts, links) 66 | html.save() 67 | -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/weights/v0.0/alex.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/weights/v0.0/alex.pth -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/weights/v0.0/squeeze.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/weights/v0.0/squeeze.pth -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/weights/v0.0/vgg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/weights/v0.0/vgg.pth -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/weights/v0.1/alex.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/weights/v0.1/alex.pth -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/weights/v0.1/squeeze.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/weights/v0.1/squeeze.pth -------------------------------------------------------------------------------- /third_part/PerceptualSimilarity/weights/v0.1/vgg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/PerceptualSimilarity/weights/v0.1/vgg.pth -------------------------------------------------------------------------------- /third_part/decalib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/decalib/__init__.py -------------------------------------------------------------------------------- /third_part/decalib/datasets/aflw2000.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torchvision.transforms as transforms 4 | import numpy as np 5 | import cv2 6 | import scipy 7 | from skimage.io import imread, imsave 8 | from skimage.transform import estimate_transform, warp, resize, rescale 9 | from glob import glob 10 | from torch.utils.data import Dataset, DataLoader, ConcatDataset 11 | import scipy.io 12 | 13 | class AFLW2000(Dataset): 14 | def __init__(self, testpath='/ps/scratch/yfeng/Data/AFLW2000/GT', crop_size=224): 15 | ''' 16 | data class for loading AFLW2000 dataset 17 | make sure each image has corresponding mat file, which provides cropping infromation 18 | ''' 19 | if os.path.isdir(testpath): 20 | self.imagepath_list = glob(testpath + '/*.jpg') + glob(testpath + '/*.png') 21 | elif isinstance(testpath, list): 22 | self.imagepath_list = testpath 23 | elif os.path.isfile(testpath) and (testpath[-3:] in ['jpg', 'png']): 24 | self.imagepath_list = [testpath] 25 | else: 26 | print('please check the input path') 27 | exit() 28 | print('total {} images'.format(len(self.imagepath_list))) 29 | self.imagepath_list = sorted(self.imagepath_list) 30 | self.crop_size = crop_size 31 | self.scale = 1.6 32 | self.resolution_inp = crop_size 33 | 34 | def __len__(self): 35 | return len(self.imagepath_list) 36 | 37 | def __getitem__(self, index): 38 | imagepath = self.imagepath_list[index] 39 | imagename = imagepath.split('/')[-1].split('.')[0] 40 | image = imread(imagepath)[:,:,:3] 41 | kpt = scipy.io.loadmat(imagepath.replace('jpg', 'mat'))['pt3d_68'].T 42 | left = np.min(kpt[:,0]); right = np.max(kpt[:,0]); 43 | top = np.min(kpt[:,1]); bottom = np.max(kpt[:,1]) 44 | 45 | h, w, _ = image.shape 46 | old_size = (right - left + bottom - top)/2 47 | center = np.array([right - (right - left) / 2.0, bottom - (bottom - top) / 2.0 ])#+ old_size*0.1]) 48 | size = int(old_size*self.scale) 49 | 50 | # crop image 51 | src_pts = np.array([[center[0]-size/2, center[1]-size/2], [center[0] - size/2, center[1]+size/2], [center[0]+size/2, center[1]-size/2]]) 52 | DST_PTS = np.array([[0,0], [0,self.resolution_inp - 1], [self.resolution_inp - 1, 0]]) 53 | tform = estimate_transform('similarity', src_pts, DST_PTS) 54 | 55 | image = image/255. 56 | dst_image = warp(image, tform.inverse, output_shape=(self.resolution_inp, self.resolution_inp)) 57 | dst_image = dst_image.transpose(2,0,1) 58 | return {'image': torch.tensor(dst_image).float(), 59 | 'imagename': imagename, 60 | # 'tform': tform, 61 | # 'original_image': torch.tensor(image.transpose(2,0,1)).float(), 62 | } -------------------------------------------------------------------------------- /third_part/decalib/datasets/build_datasets.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | from torch.utils.data import Dataset, ConcatDataset 4 | import torchvision.transforms as transforms 5 | import numpy as np 6 | import cv2 7 | import scipy 8 | from skimage.io import imread, imsave 9 | from skimage.transform import estimate_transform, warp, resize, rescale 10 | from glob import glob 11 | 12 | from .vggface import VGGFace2Dataset 13 | from .ethnicity import EthnicityDataset 14 | from .aflw2000 import AFLW2000 15 | from .now import NoWDataset 16 | from .vox import VoxelDataset 17 | 18 | def build_train(config, is_train=True): 19 | data_list = [] 20 | if 'vox2' in config.training_data: 21 | data_list.append(VoxelDataset(dataname='vox2', K=config.K, image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale, isSingle=config.isSingle)) 22 | if 'vggface2' in config.training_data: 23 | data_list.append(VGGFace2Dataset(K=config.K, image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale, isSingle=config.isSingle)) 24 | if 'vggface2hq' in config.training_data: 25 | data_list.append(VGGFace2HQDataset(K=config.K, image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale, isSingle=config.isSingle)) 26 | if 'ethnicity' in config.training_data: 27 | data_list.append(EthnicityDataset(K=config.K, image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale, isSingle=config.isSingle)) 28 | if 'coco' in config.training_data: 29 | data_list.append(COCODataset(image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale)) 30 | if 'celebahq' in config.training_data: 31 | data_list.append(CelebAHQDataset(image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale)) 32 | dataset = ConcatDataset(data_list) 33 | 34 | return dataset 35 | 36 | def build_val(config, is_train=True): 37 | data_list = [] 38 | if 'vggface2' in config.eval_data: 39 | data_list.append(VGGFace2Dataset(isEval=True, K=config.K, image_size=config.image_size, scale=[config.scale_min, config.scale_max], trans_scale=config.trans_scale, isSingle=config.isSingle)) 40 | if 'now' in config.eval_data: 41 | data_list.append(NoWDataset()) 42 | if 'aflw2000' in config.eval_data: 43 | data_list.append(AFLW2000()) 44 | dataset = ConcatDataset(data_list) 45 | 46 | return dataset 47 | -------------------------------------------------------------------------------- /third_part/decalib/datasets/detectors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # Using this computer program means that you agree to the terms 6 | # in the LICENSE file included with this software distribution. 7 | # Any use not explicitly granted by the LICENSE is prohibited. 8 | # 9 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 10 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 11 | # for Intelligent Systems. All rights reserved. 12 | # 13 | # For comments or questions, please email us at deca@tue.mpg.de 14 | # For commercial licensing contact, please contact ps-license@tuebingen.mpg.de 15 | 16 | import numpy as np 17 | import torch 18 | 19 | class FAN(object): 20 | def __init__(self): 21 | import face_alignment 22 | self.model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) 23 | 24 | def run(self, image): 25 | ''' 26 | image: 0-255, uint8, rgb, [h, w, 3] 27 | return: detected box list 28 | ''' 29 | out = self.model.get_landmarks(image) 30 | if out is None: 31 | return [0], 'kpt68' 32 | else: 33 | kpt = out[0].squeeze() 34 | left = np.min(kpt[:,0]); right = np.max(kpt[:,0]); 35 | top = np.min(kpt[:,1]); bottom = np.max(kpt[:,1]) 36 | bbox = [left,top, right, bottom] 37 | return bbox, 'kpt68' 38 | 39 | class MTCNN(object): 40 | def __init__(self, device = 'cpu'): 41 | ''' 42 | https://github.com/timesler/facenet-pytorch/blob/master/examples/infer.ipynb 43 | ''' 44 | from facenet_pytorch import MTCNN as mtcnn 45 | self.device = device 46 | self.model = mtcnn(keep_all=True) 47 | def run(self, input): 48 | ''' 49 | image: 0-255, uint8, rgb, [h, w, 3] 50 | return: detected box 51 | ''' 52 | out = self.model.detect(input[None,...]) 53 | if out[0][0] is None: 54 | return [0] 55 | else: 56 | bbox = out[0][0].squeeze() 57 | return bbox, 'bbox' 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /third_part/decalib/datasets/now.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torchvision.transforms as transforms 4 | import numpy as np 5 | import cv2 6 | import scipy 7 | from skimage.io import imread, imsave 8 | from skimage.transform import estimate_transform, warp, resize, rescale 9 | from glob import glob 10 | from torch.utils.data import Dataset, DataLoader, ConcatDataset 11 | 12 | class NoWDataset(Dataset): 13 | def __init__(self, ring_elements=6, crop_size=224, scale=1.6): 14 | folder = '/ps/scratch/yfeng/other-github/now_evaluation/data/NoW_Dataset' 15 | self.data_path = os.path.join(folder, 'imagepathsvalidation.txt') 16 | with open(self.data_path) as f: 17 | self.data_lines = f.readlines() 18 | 19 | self.imagefolder = os.path.join(folder, 'final_release_version', 'iphone_pictures') 20 | self.bbxfolder = os.path.join(folder, 'final_release_version', 'detected_face') 21 | 22 | # self.data_path = '/ps/scratch/face2d3d/ringnetpp/eccv/test_data/evaluation/NoW_Dataset/final_release_version/test_image_paths_ring_6_elements.npy' 23 | # self.imagepath = '/ps/scratch/face2d3d/ringnetpp/eccv/test_data/evaluation/NoW_Dataset/final_release_version/iphone_pictures/' 24 | # self.bbxpath = '/ps/scratch/face2d3d/ringnetpp/eccv/test_data/evaluation/NoW_Dataset/final_release_version/detected_face/' 25 | self.crop_size = crop_size 26 | self.scale = scale 27 | 28 | def __len__(self): 29 | return len(self.data_lines) 30 | 31 | def __getitem__(self, index): 32 | imagepath = os.path.join(self.imagefolder, self.data_lines[index].strip()) #+ '.jpg' 33 | bbx_path = os.path.join(self.bbxfolder, self.data_lines[index].strip().replace('.jpg', '.npy')) 34 | bbx_data = np.load(bbx_path, allow_pickle=True, encoding='latin1').item() 35 | # box = np.array([[bbx_data['left'], bbx_data['top']], [bbx_data['right'], bbx_data['bottom']]]).astype('float32') 36 | left = bbx_data['left']; right = bbx_data['right'] 37 | top = bbx_data['top']; bottom = bbx_data['bottom'] 38 | 39 | imagename = imagepath.split('/')[-1].split('.')[0] 40 | image = imread(imagepath)[:,:,:3] 41 | 42 | h, w, _ = image.shape 43 | old_size = (right - left + bottom - top)/2 44 | center = np.array([right - (right - left) / 2.0, bottom - (bottom - top) / 2.0 ]) 45 | size = int(old_size*self.scale) 46 | 47 | # crop image 48 | src_pts = np.array([[center[0]-size/2, center[1]-size/2], [center[0] - size/2, center[1]+size/2], [center[0]+size/2, center[1]-size/2]]) 49 | DST_PTS = np.array([[0,0], [0,self.crop_size - 1], [self.crop_size - 1, 0]]) 50 | tform = estimate_transform('similarity', src_pts, DST_PTS) 51 | 52 | image = image/255. 53 | dst_image = warp(image, tform.inverse, output_shape=(self.crop_size, self.crop_size)) 54 | dst_image = dst_image.transpose(2,0,1) 55 | return {'image': torch.tensor(dst_image).float(), 56 | 'imagename': self.data_lines[index].strip().replace('.jpg', ''), 57 | # 'tform': tform, 58 | # 'original_image': torch.tensor(image.transpose(2,0,1)).float(), 59 | } -------------------------------------------------------------------------------- /third_part/decalib/models/decoders.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # Using this computer program means that you agree to the terms 6 | # in the LICENSE file included with this software distribution. 7 | # Any use not explicitly granted by the LICENSE is prohibited. 8 | # 9 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 10 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 11 | # for Intelligent Systems. All rights reserved. 12 | # 13 | # For comments or questions, please email us at deca@tue.mpg.de 14 | # For commercial licensing contact, please contact ps-license@tuebingen.mpg.de 15 | 16 | import torch 17 | import torch.nn as nn 18 | 19 | class Generator(nn.Module): 20 | def __init__(self, latent_dim=100, out_channels=1, out_scale=0.01, sample_mode = 'bilinear'): 21 | super(Generator, self).__init__() 22 | self.out_scale = out_scale 23 | 24 | self.init_size = 32 // 4 # Initial size before upsampling 25 | self.l1 = nn.Sequential(nn.Linear(latent_dim, 128 * self.init_size ** 2)) 26 | self.conv_blocks = nn.Sequential( 27 | nn.BatchNorm2d(128), 28 | nn.Upsample(scale_factor=2, mode=sample_mode), #16 29 | nn.Conv2d(128, 128, 3, stride=1, padding=1), 30 | nn.BatchNorm2d(128, 0.8), 31 | nn.LeakyReLU(0.2, inplace=True), 32 | nn.Upsample(scale_factor=2, mode=sample_mode), #32 33 | nn.Conv2d(128, 64, 3, stride=1, padding=1), 34 | nn.BatchNorm2d(64, 0.8), 35 | nn.LeakyReLU(0.2, inplace=True), 36 | nn.Upsample(scale_factor=2, mode=sample_mode), #64 37 | nn.Conv2d(64, 64, 3, stride=1, padding=1), 38 | nn.BatchNorm2d(64, 0.8), 39 | nn.LeakyReLU(0.2, inplace=True), 40 | nn.Upsample(scale_factor=2, mode=sample_mode), #128 41 | nn.Conv2d(64, 32, 3, stride=1, padding=1), 42 | nn.BatchNorm2d(32, 0.8), 43 | nn.LeakyReLU(0.2, inplace=True), 44 | nn.Upsample(scale_factor=2, mode=sample_mode), #256 45 | nn.Conv2d(32, 16, 3, stride=1, padding=1), 46 | nn.BatchNorm2d(16, 0.8), 47 | nn.LeakyReLU(0.2, inplace=True), 48 | nn.Conv2d(16, out_channels, 3, stride=1, padding=1), 49 | nn.Tanh(), 50 | ) 51 | 52 | def forward(self, noise): 53 | out = self.l1(noise) 54 | out = out.view(out.shape[0], 128, self.init_size, self.init_size) 55 | img = self.conv_blocks(out) 56 | return img*self.out_scale -------------------------------------------------------------------------------- /third_part/decalib/models/encoders.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # Using this computer program means that you agree to the terms 6 | # in the LICENSE file included with this software distribution. 7 | # Any use not explicitly granted by the LICENSE is prohibited. 8 | # 9 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 10 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 11 | # for Intelligent Systems. All rights reserved. 12 | # 13 | # For comments or questions, please email us at deca@tue.mpg.de 14 | # For commercial licensing contact, please contact ps-license@tuebingen.mpg.de 15 | 16 | import numpy as np 17 | import torch.nn as nn 18 | import torch 19 | import torch.nn.functional as F 20 | from . import resnet 21 | 22 | class ResnetEncoder(nn.Module): 23 | def __init__(self, outsize, last_op=None): 24 | super(ResnetEncoder, self).__init__() 25 | feature_size = 2048 26 | self.encoder = resnet.load_ResNet50Model() #out: 2048 27 | ### regressor 28 | self.layers = nn.Sequential( 29 | nn.Linear(feature_size, 1024), 30 | nn.ReLU(), 31 | nn.Linear(1024, outsize) 32 | ) 33 | self.last_op = last_op 34 | 35 | def forward(self, inputs): 36 | features = self.encoder(inputs) 37 | parameters = self.layers(features) 38 | if self.last_op: 39 | parameters = self.last_op(parameters) 40 | return parameters 41 | -------------------------------------------------------------------------------- /third_part/decalib/utils/config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Default config for DECA 3 | ''' 4 | from yacs.config import CfgNode as CN 5 | import argparse 6 | import yaml 7 | import os 8 | 9 | cfg = CN() 10 | 11 | # abs_deca_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 12 | # cfg.deca_dir = abs_deca_dir 13 | cfg.deca_dir = 'Custom_DECA_directory/' 14 | cfg.device = 'cuda' 15 | cfg.device_id = '0' 16 | 17 | cfg.pretrained_modelpath = os.path.join(cfg.deca_dir, 'data', 'deca_model.tar') 18 | cfg.output_dir = '' 19 | cfg.rasterizer_type = 'pytorch3d' 20 | # ---------------------------------------------------------------------------- # 21 | # Options for Face model 22 | # ---------------------------------------------------------------------------- # 23 | cfg.model = CN() 24 | cfg.model.topology_path = os.path.join(cfg.deca_dir, 'data', 'head_template.obj') 25 | # texture data original from http://files.is.tue.mpg.de/tbolkart/FLAME/FLAME_texture_data.zip 26 | cfg.model.dense_template_path = os.path.join(cfg.deca_dir, 'data', 'texture_data_256.npy') 27 | cfg.model.fixed_displacement_path = os.path.join(cfg.deca_dir, 'data', 'fixed_displacement_256.npy') 28 | cfg.model.flame_model_path = os.path.join(cfg.deca_dir, 'data', 'generic_model.pkl') 29 | cfg.model.flame_lmk_embedding_path = os.path.join(cfg.deca_dir, 'data', 'landmark_embedding.npy') 30 | cfg.model.face_mask_path = os.path.join(cfg.deca_dir, 'data', 'uv_face_mask.png') 31 | cfg.model.face_eye_mask_path = os.path.join(cfg.deca_dir, 'data', 'uv_face_eye_mask.png') 32 | cfg.model.mean_tex_path = os.path.join(cfg.deca_dir, 'data', 'mean_texture.jpg') 33 | cfg.model.tex_path = os.path.join(cfg.deca_dir, 'data', 'FLAME_albedo_from_BFM.npz') 34 | cfg.model.tex_type = 'BFM' # BFM, FLAME, albedoMM 35 | cfg.model.uv_size = 256 36 | cfg.model.param_list = ['shape', 'tex', 'exp', 'pose', 'cam', 'light'] 37 | cfg.model.n_shape = 100 38 | cfg.model.n_tex = 50 39 | cfg.model.n_exp = 50 40 | cfg.model.n_cam = 3 41 | cfg.model.n_pose = 6 42 | cfg.model.n_light = 27 43 | cfg.model.use_tex = True 44 | cfg.model.jaw_type = 'aa' # default use axis angle, another option: euler. Note that: aa is not stable in the beginning 45 | # face recognition model 46 | cfg.model.fr_model_path = os.path.join(cfg.deca_dir, 'data', 'resnet50_ft_weight.pkl') 47 | 48 | ## details 49 | cfg.model.n_detail = 128 50 | cfg.model.max_z = 0.01 51 | 52 | # ---------------------------------------------------------------------------- # 53 | # Options for Dataset 54 | # ---------------------------------------------------------------------------- # 55 | cfg.dataset = CN() 56 | cfg.dataset.training_data = ['vggface2', 'ethnicity'] 57 | # cfg.dataset.training_data = ['ethnicity'] 58 | cfg.dataset.eval_data = ['aflw2000'] 59 | cfg.dataset.test_data = [''] 60 | cfg.dataset.batch_size = 2 61 | cfg.dataset.K = 4 62 | cfg.dataset.isSingle = False 63 | cfg.dataset.num_workers = 2 64 | cfg.dataset.image_size = 224 65 | cfg.dataset.scale_min = 1.4 66 | cfg.dataset.scale_max = 1.8 67 | cfg.dataset.trans_scale = 0. 68 | 69 | # ---------------------------------------------------------------------------- # 70 | # Options for training 71 | # ---------------------------------------------------------------------------- # 72 | cfg.train = CN() 73 | cfg.train.train_detail = False 74 | cfg.train.max_epochs = 500 75 | cfg.train.max_steps = 1000000 76 | cfg.train.lr = 1e-4 77 | cfg.train.log_dir = 'logs' 78 | cfg.train.log_steps = 10 79 | cfg.train.vis_dir = 'train_images' 80 | cfg.train.vis_steps = 200 81 | cfg.train.write_summary = True 82 | cfg.train.checkpoint_steps = 500 83 | cfg.train.val_steps = 500 84 | cfg.train.val_vis_dir = 'val_images' 85 | cfg.train.eval_steps = 5000 86 | cfg.train.resume = True 87 | 88 | # ---------------------------------------------------------------------------- # 89 | # Options for Losses 90 | # ---------------------------------------------------------------------------- # 91 | cfg.loss = CN() 92 | cfg.loss.lmk = 1.0 93 | cfg.loss.useWlmk = True 94 | cfg.loss.eyed = 1.0 95 | cfg.loss.lipd = 0.5 96 | cfg.loss.photo = 2.0 97 | cfg.loss.useSeg = True 98 | cfg.loss.id = 0.2 99 | cfg.loss.id_shape_only = True 100 | cfg.loss.reg_shape = 1e-04 101 | cfg.loss.reg_exp = 1e-04 102 | cfg.loss.reg_tex = 1e-04 103 | cfg.loss.reg_light = 1. 104 | cfg.loss.reg_jaw_pose = 0. #1. 105 | cfg.loss.use_gender_prior = False 106 | cfg.loss.shape_consistency = True 107 | # loss for detail 108 | cfg.loss.detail_consistency = True 109 | cfg.loss.useConstraint = True 110 | cfg.loss.mrf = 5e-2 111 | cfg.loss.photo_D = 2. 112 | cfg.loss.reg_sym = 0.005 113 | cfg.loss.reg_z = 0.005 114 | cfg.loss.reg_diff = 0.005 115 | 116 | 117 | def get_cfg_defaults(): 118 | """Get a yacs CfgNode object with default values for my_project.""" 119 | # Return a clone so that the defaults will not be altered 120 | # This is for the "local variable" use pattern 121 | return cfg.clone() 122 | 123 | def update_cfg(cfg, cfg_file): 124 | cfg.merge_from_file(cfg_file) 125 | return cfg.clone() 126 | 127 | def parse_args(): 128 | parser = argparse.ArgumentParser() 129 | parser.add_argument('--cfg', type=str, help='cfg file path') 130 | parser.add_argument('--mode', type=str, default = 'train', help='deca mode') 131 | 132 | args = parser.parse_args() 133 | print(args, end='\n\n') 134 | 135 | cfg = get_cfg_defaults() 136 | cfg.cfg_file = None 137 | cfg.mode = args.mode 138 | # import ipdb; ipdb.set_trace() 139 | if args.cfg is not None: 140 | cfg_file = args.cfg 141 | cfg = update_cfg(cfg, args.cfg) 142 | cfg.cfg_file = cfg_file 143 | 144 | return cfg 145 | -------------------------------------------------------------------------------- /third_part/decalib/utils/rasterizer/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | from standard_rasterize_cuda import standard_rasterize 3 | # from .rasterizer.standard_rasterize_cuda import standard_rasterize 4 | 5 | in this folder, run 6 | ```python setup.py build_ext -i ``` 7 | 8 | then remember to set --rasterizer_type=standard when runing demos :) 9 | 10 | ## Alg 11 | https://www.scratchapixel.com/lessons/3d-basic-rendering/rasterization-practical-implementation 12 | 13 | ## Speed Comparison 14 | runtime for raterization only 15 | In PIXIE, number of faces in SMPLX: 20908 16 | 17 | for image size = 1024 18 | pytorch3d: 0.031s 19 | standard: 0.01s 20 | 21 | for image size = 224 22 | pytorch3d: 0.0035s 23 | standard: 0.0014s 24 | 25 | why standard rasterizer is faster than pytorch3d? 26 | Ref: https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu 27 | pytorch3d: for each pixel in image space (each pixel is parallel in cuda), loop through the faces, check if this pixel is in the projection bounding box of the face, then sorting faces according to z, record the face id of closest K faces. 28 | standard rasterization: for each face in mesh (each face is parallel in cuda), loop through pixels in the projection bounding box (normally a very samll number), compare z, record face id of that pixel 29 | 30 | -------------------------------------------------------------------------------- /third_part/decalib/utils/rasterizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/third_part/decalib/utils/rasterizer/__init__.py -------------------------------------------------------------------------------- /third_part/decalib/utils/rasterizer/setup.py: -------------------------------------------------------------------------------- 1 | # To install, run 2 | # python setup.py build_ext -i 3 | # Ref: https://github.com/pytorch/pytorch/blob/11a40410e755b1fe74efe9eaa635e7ba5712846b/test/cpp_extensions/setup.py#L62 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | import os 8 | 9 | # USE_NINJA = os.getenv('USE_NINJA') == '1' 10 | os.environ["CC"] = "gcc-7" 11 | os.environ["CXX"] = "gcc-7" 12 | 13 | USE_NINJA = os.getenv('USE_NINJA') == '1' 14 | 15 | setup( 16 | name='standard_rasterize_cuda', 17 | ext_modules=[ 18 | CUDAExtension('standard_rasterize_cuda', [ 19 | 'standard_rasterize_cuda.cpp', 20 | 'standard_rasterize_cuda_kernel.cu', 21 | ]) 22 | ], 23 | cmdclass={'build_ext': BuildExtension.with_options(use_ninja=USE_NINJA)} 24 | ) 25 | -------------------------------------------------------------------------------- /third_part/decalib/utils/rasterizer/standard_rasterize_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | std::vector forward_rasterize_cuda( 6 | at::Tensor face_vertices, 7 | at::Tensor depth_buffer, 8 | at::Tensor triangle_buffer, 9 | at::Tensor baryw_buffer, 10 | int h, 11 | int w); 12 | 13 | std::vector standard_rasterize( 14 | at::Tensor face_vertices, 15 | at::Tensor depth_buffer, 16 | at::Tensor triangle_buffer, 17 | at::Tensor baryw_buffer, 18 | int height, int width 19 | ) { 20 | return forward_rasterize_cuda(face_vertices, depth_buffer, triangle_buffer, baryw_buffer, height, width); 21 | } 22 | 23 | std::vector forward_rasterize_colors_cuda( 24 | at::Tensor face_vertices, 25 | at::Tensor face_colors, 26 | at::Tensor depth_buffer, 27 | at::Tensor triangle_buffer, 28 | at::Tensor images, 29 | int h, 30 | int w); 31 | 32 | std::vector standard_rasterize_colors( 33 | at::Tensor face_vertices, 34 | at::Tensor face_colors, 35 | at::Tensor depth_buffer, 36 | at::Tensor triangle_buffer, 37 | at::Tensor images, 38 | int height, int width 39 | ) { 40 | return forward_rasterize_colors_cuda(face_vertices, face_colors, depth_buffer, triangle_buffer, images, height, width); 41 | } 42 | 43 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 44 | m.def("standard_rasterize", &standard_rasterize, "RASTERIZE (CUDA)"); 45 | m.def("standard_rasterize_colors", &standard_rasterize_colors, "RASTERIZE COLORS (CUDA)"); 46 | } 47 | 48 | // TODO: backward -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import data as Dataset 4 | 5 | from configs.config import Config 6 | from utils.logging import init_logging, make_logging_dir 7 | from utils.trainer import get_model_optimizer_and_scheduler_with_pretrain, set_random_seed, get_trainer, get_model_optimizer_and_scheduler 8 | from utils.distributed import init_dist 9 | from utils.distributed import master_only_print as print 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description='Training') 14 | parser.add_argument('--config', required=True) 15 | parser.add_argument('--name', required=True) 16 | parser.add_argument('--checkpoints_dir', default='result', help='Dir for saving logs and models.') 17 | parser.add_argument('--seed', type=int, default=0, help='Random seed.') 18 | parser.add_argument('--which_iter', type=int, default=None) 19 | parser.add_argument('--no_resume', action='store_true') 20 | parser.add_argument('--local_rank', type=int, default=0) 21 | parser.add_argument('--single_gpu', action='store_true') 22 | parser.add_argument('--debug', action='store_true') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def main(): 28 | # get training options 29 | args = parse_args() 30 | set_random_seed(args.seed) 31 | 32 | opt = Config(args.config, args, is_train=True) 33 | 34 | if not args.single_gpu: 35 | opt.local_rank = args.local_rank 36 | init_dist(opt.local_rank) 37 | opt.device = opt.local_rank 38 | print('Distributed DataParallel Training.') 39 | else: 40 | print('Single GPU Training.') 41 | opt.device = 'cuda' 42 | opt.local_rank = 0 43 | opt.distributed = False 44 | opt.data.train.distributed = False 45 | opt.data.val.distributed = False 46 | 47 | # create a visualizer 48 | date_uid, logdir = init_logging(opt) 49 | opt.logdir = logdir 50 | make_logging_dir(logdir, date_uid) 51 | os.system(f'cp {args.config} {opt.logdir}') 52 | # create a dataset 53 | val_dataset, train_dataset = Dataset.get_train_val_dataloader(opt.data) 54 | 55 | # create a model 56 | net_G, net_G_ema, opt_G, sch_G = get_model_optimizer_and_scheduler_with_pretrain(opt) 57 | 58 | trainer = get_trainer(opt, net_G, net_G_ema, opt_G, sch_G, train_dataset) 59 | current_epoch, current_iteration = trainer.load_checkpoint(opt, args.which_iter) 60 | 61 | # training flag 62 | if args.debug: 63 | trainer.test_everything(train_dataset, val_dataset, current_epoch, current_iteration) 64 | exit() 65 | 66 | # Start training. 67 | for epoch in range(current_epoch, opt.max_epoch): 68 | print('Epoch {} ...'.format(epoch)) 69 | if not args.single_gpu: 70 | train_dataset.sampler.set_epoch(current_epoch) 71 | trainer.start_of_epoch(current_epoch) 72 | for it, data in enumerate(train_dataset): 73 | data = trainer.start_of_iteration(data, current_iteration) 74 | trainer.optimize_parameters(data) 75 | current_iteration += 1 76 | trainer.end_of_iteration(data, current_epoch, current_iteration) 77 | 78 | if current_iteration >= opt.max_iter: 79 | print('Done with training!!!') 80 | break 81 | current_epoch += 1 82 | trainer.end_of_epoch(data, val_dataset, current_epoch, current_iteration) 83 | trainer.test(val_dataset, output_dir=os.path.join(logdir, 'evaluation'), test_limit=10) 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | 89 | 90 | -------------------------------------------------------------------------------- /utils/common.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | from torchvision import transforms 6 | 7 | 8 | def tensor2map(var): 9 | # if len(var.shape) == 4: 10 | # var = var[0] 11 | if torch.is_tensor(var) and torch.max(var) < 1: 12 | mask = np.argmax(var.data.cpu().numpy(), axis=0) 13 | elif torch.is_tensor(var): 14 | mask = var.data.cpu().long().numpy() 15 | else: 16 | if len(var.shape) == 3: 17 | var = var[0] 18 | mask = var 19 | colors = get_colors() 20 | mask_image = np.ones(shape=(mask.shape[0], mask.shape[1], 3)) 21 | for class_idx in np.unique(mask): 22 | mask_image[mask == class_idx] = colors[class_idx] 23 | mask_image = mask_image.astype('uint8') 24 | return Image.fromarray(mask_image) 25 | 26 | 27 | def tensor2sketch(var): 28 | im = var[0].cpu().detach().numpy() 29 | im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) 30 | im = (im * 255).astype(np.uint8) 31 | return Image.fromarray(im) 32 | 33 | 34 | # Visualization utils 35 | def get_colors(): 36 | # currently support up to 19 classes (for the celebs-hq-mask dataset) 37 | colors = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], 38 | [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], 39 | [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]] 40 | return colors 41 | 42 | 43 | def tensor2img(var): 44 | if len(var.shape) == 4: 45 | var = var[0] 46 | if not torch.is_tensor(var): 47 | return Image.fromarray(var) 48 | # var = var.clamp_(min=-1, max=1) 49 | var = var.cpu().detach().transpose(0, 2).transpose(0, 1).numpy() 50 | var = ((var + 1) / 2) 51 | var[var < 0] = 0 52 | var[var > 1] = 1 53 | var = var * 255 54 | return Image.fromarray(var.astype('uint8')) 55 | 56 | 57 | def tensor2img_np(var): 58 | if len(var.shape) == 4: 59 | var = var[0] 60 | if not torch.is_tensor(var): 61 | return Image.fromarray(var) 62 | # var = var.clamp_(min=-1, max=1) 63 | var = var.cpu().detach().transpose(0, 2).transpose(0, 1).numpy() 64 | var = ((var + 1) / 2) 65 | var[var < 0] = 0 66 | var[var > 1] = 1 67 | var = var * 255 68 | return var 69 | 70 | 71 | def tensor2grayimg(var): 72 | assert len(var.shape) == 2 73 | if torch.is_tensor(var): 74 | var = var.cpu().detach().numpy() 75 | # var = ((var + 1) / 2) 76 | var[var < 0] = 0 77 | var[var > 1] = 1 78 | var = var * 255 79 | return Image.fromarray(var.astype('uint8'), 'L') 80 | 81 | 82 | def img2tensor(img): 83 | loader = transforms.Compose([ 84 | transforms.ToTensor(), 85 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), 86 | ]) 87 | tensor = loader(img) 88 | return tensor 89 | 90 | 91 | def numpy2img(var): 92 | return Image.fromarray(var.astype('uint8')) 93 | 94 | 95 | def write2video(results_dir, *video_list): 96 | cat_video = None 97 | 98 | for video in video_list: 99 | video_numpy = video[:, :3, :, :].cpu().float().detach().numpy() 100 | video_numpy = (np.transpose(video_numpy, (0, 2, 3, 1)) + 1) / 2.0 * 255.0 101 | video_numpy = video_numpy.astype(np.uint8) 102 | cat_video = np.concatenate([cat_video, video_numpy], 2) if cat_video is not None else video_numpy 103 | 104 | image_array = [] 105 | for i in range(cat_video.shape[0]): 106 | image_array.append(cat_video[i]) 107 | 108 | out_name = results_dir + '.mp4' 109 | _, height, width, layers = cat_video.shape 110 | size = (width, height) 111 | out = cv2.VideoWriter(out_name, cv2.VideoWriter_fourcc(*'mp4v'), 15, size) 112 | 113 | for i in range(len(image_array)): 114 | out.write(image_array[i][:, :, ::-1]) 115 | out.release() 116 | -------------------------------------------------------------------------------- /utils/cudnn.py: -------------------------------------------------------------------------------- 1 | import torch.backends.cudnn as cudnn 2 | 3 | from utils.distributed import master_only_print as print 4 | 5 | 6 | def init_cudnn(deterministic, benchmark): 7 | r"""Initialize the cudnn module. The two things to consider is whether to 8 | use cudnn benchmark and whether to use cudnn deterministic. If cudnn 9 | benchmark is set, then the cudnn deterministic is automatically false. 10 | 11 | Args: 12 | deterministic (bool): Whether to use cudnn deterministic. 13 | benchmark (bool): Whether to use cudnn benchmark. 14 | """ 15 | cudnn.deterministic = deterministic 16 | cudnn.benchmark = benchmark 17 | print('cudnn benchmark: {}'.format(benchmark)) 18 | print('cudnn deterministic: {}'.format(deterministic)) 19 | -------------------------------------------------------------------------------- /utils/distributed.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import torch 3 | import torch.distributed as dist 4 | 5 | 6 | def init_dist(local_rank, backend='nccl', **kwargs): 7 | r"""Initialize distributed training""" 8 | if dist.is_available(): 9 | if dist.is_initialized(): 10 | return torch.cuda.current_device() 11 | torch.cuda.set_device(local_rank) 12 | dist.init_process_group(backend=backend, init_method='env://', **kwargs) 13 | 14 | 15 | def get_rank(): 16 | r"""Get rank of the thread.""" 17 | rank = 0 18 | if dist.is_available(): 19 | if dist.is_initialized(): 20 | rank = dist.get_rank() 21 | return rank 22 | 23 | 24 | def get_world_size(): 25 | r"""Get world size. How many GPUs are available in this job.""" 26 | world_size = 1 27 | if dist.is_available(): 28 | if dist.is_initialized(): 29 | world_size = dist.get_world_size() 30 | return world_size 31 | 32 | 33 | def master_only(func): 34 | r"""Apply this function only to the master GPU.""" 35 | @functools.wraps(func) 36 | def wrapper(*args, **kwargs): 37 | r"""Simple function wrapper for the master function""" 38 | if get_rank() == 0: 39 | return func(*args, **kwargs) 40 | else: 41 | return None 42 | return wrapper 43 | 44 | 45 | def is_master(): 46 | r"""check if current process is the master""" 47 | return get_rank() == 0 48 | 49 | 50 | @master_only 51 | def master_only_print(*args): 52 | r"""master-only print""" 53 | print(*args) 54 | 55 | 56 | def dist_reduce_tensor(tensor): 57 | r""" Reduce to rank 0 """ 58 | world_size = get_world_size() 59 | if world_size < 2: 60 | return tensor 61 | with torch.no_grad(): 62 | dist.reduce(tensor, dst=0) 63 | if get_rank() == 0: 64 | tensor /= world_size 65 | return tensor 66 | 67 | 68 | def dist_all_reduce_tensor(tensor): 69 | r""" Reduce to all ranks """ 70 | world_size = get_world_size() 71 | if world_size < 2: 72 | return tensor 73 | with torch.no_grad(): 74 | dist.all_reduce(tensor) 75 | tensor.div_(world_size) 76 | return tensor 77 | 78 | 79 | def dist_all_gather_tensor(tensor): 80 | r""" gather to all ranks """ 81 | world_size = get_world_size() 82 | if world_size < 2: 83 | return [tensor] 84 | tensor_list = [ 85 | torch.ones_like(tensor) for _ in range(dist.get_world_size())] 86 | with torch.no_grad(): 87 | dist.all_gather(tensor_list, tensor) 88 | return tensor_list 89 | -------------------------------------------------------------------------------- /utils/init_weight.py: -------------------------------------------------------------------------------- 1 | from torch.nn import init 2 | 3 | 4 | def weights_init(init_type='normal', gain=0.02, bias=None): 5 | r"""Initialize weights in the network. 6 | 7 | Args: 8 | init_type (str): The name of the initialization scheme. 9 | gain (float): The parameter that is required for the initialization 10 | scheme. 11 | bias (object): If not ``None``, specifies the initialization parameter 12 | for bias. 13 | 14 | Returns: 15 | (obj): init function to be applied. 16 | """ 17 | 18 | def init_func(m): 19 | r"""Init function 20 | 21 | Args: 22 | m: module to be weight initialized. 23 | """ 24 | class_name = m.__class__.__name__ 25 | if hasattr(m, 'weight') and ( 26 | class_name.find('Conv') != -1 or 27 | class_name.find('Linear') != -1 or 28 | class_name.find('Embedding') != -1): 29 | if init_type == 'normal': 30 | init.normal_(m.weight.data, 0.0, gain) 31 | elif init_type == 'xavier': 32 | init.xavier_normal_(m.weight.data, gain=gain) 33 | elif init_type == 'xavier_uniform': 34 | init.xavier_uniform_(m.weight.data, gain=1.0) 35 | elif init_type == 'kaiming': 36 | init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') 37 | elif init_type == 'orthogonal': 38 | init.orthogonal_(m.weight.data, gain=gain) 39 | elif init_type == 'none': 40 | m.reset_parameters() 41 | else: 42 | raise NotImplementedError( 43 | 'initialization method [%s] is ' 44 | 'not implemented' % init_type) 45 | if hasattr(m, 'bias') and m.bias is not None: 46 | if bias is not None: 47 | bias_type = getattr(bias, 'type', 'normal') 48 | if bias_type == 'normal': 49 | bias_gain = getattr(bias, 'gain', 0.5) 50 | init.normal_(m.bias.data, 0.0, bias_gain) 51 | else: 52 | raise NotImplementedError( 53 | 'initialization method [%s] is ' 54 | 'not implemented' % bias_type) 55 | else: 56 | init.constant_(m.bias.data, 0.0) 57 | return init_func 58 | -------------------------------------------------------------------------------- /utils/landmark.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_landmark_bbox(lm, scale=1): 5 | l_eye_id = [36, 42] 6 | r_eye_id = [42, 48] 7 | nose_id = [27, 36] 8 | mouth_id = [48, 68] 9 | p = 8 10 | bbox = [] 11 | for _i, box_id in enumerate([mouth_id, l_eye_id, r_eye_id, nose_id]): 12 | box_lm = lm[:, box_id[0]:box_id[1]] 13 | ly, ry = torch.min(box_lm[:, :, 0], dim=1)[0], torch.max(box_lm[:, :, 0], dim=1)[0] 14 | lx, rx = torch.min(box_lm[:, :, 1], dim=1)[0], torch.max(box_lm[:, :, 1], dim=1)[0] # shape: [b] 15 | lx, rx, ly, ry = (lx * scale).long(), (rx * scale).long(), (ly * scale).long(), (ry * scale).long() 16 | if _i == 1 or _i == 2: 17 | p = 15 18 | lx, rx, ly, ry = lx - p, rx + p, ly - p, ry + p 19 | lx, rx, ly, ry = lx.unsqueeze(1), rx.unsqueeze(1), ly.unsqueeze(1), ry.unsqueeze(1) 20 | bbox.append(torch.cat([ly, lx, ry, rx], dim=1)) 21 | return bbox 22 | -------------------------------------------------------------------------------- /utils/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | from PIL import Image 4 | from utils.meters import set_summary_writer 5 | from utils.distributed import master_only_print as print 6 | from utils.distributed import master_only 7 | 8 | 9 | def get_date_uid(): 10 | """Generate a unique id based on date. 11 | Returns: 12 | str: Return uid string, e.g. '20171122171307111552'. 13 | """ 14 | return str(datetime.datetime.now().strftime("%Y_%m%d_%H%M_%S")) 15 | 16 | 17 | def init_logging(opt): 18 | date_uid = get_date_uid() 19 | if opt.name is not None: 20 | logdir = os.path.join(opt.checkpoints_dir, opt.name) 21 | else: 22 | logdir = os.path.join(opt.checkpoints_dir, date_uid) 23 | opt.logdir = logdir 24 | return date_uid, logdir 25 | 26 | @master_only 27 | def make_logging_dir(logdir, date_uid): 28 | r"""Create the logging directory 29 | 30 | Args: 31 | logdir (str): Log directory name 32 | """ 33 | 34 | 35 | print('Make folder {}'.format(logdir)) 36 | os.makedirs(logdir, exist_ok=True) 37 | tensorboard_dir = os.path.join(logdir, 'tensorboard') 38 | image_dir = os.path.join(logdir, 'image') 39 | eval_dir = os.path.join(logdir, 'evaluation') 40 | os.makedirs(tensorboard_dir, exist_ok=True) 41 | os.makedirs(image_dir, exist_ok=True) 42 | os.makedirs(eval_dir, exist_ok=True) 43 | 44 | set_summary_writer(tensorboard_dir) 45 | loss_log_name = os.path.join(logdir, 'loss_log.txt') 46 | with open(loss_log_name, "a") as log_file: 47 | log_file.write('================ Training Loss (%s) ================\n' % date_uid) 48 | 49 | 50 | def tensor2im(var, vmin=-1, vmax=1): 51 | # var shape: (3, H, W) 52 | if len(var.shape) == 4: 53 | var = var[0] 54 | var = var.cpu().detach().transpose(0, 2).transpose(0, 1).numpy() 55 | var = ((var - vmin) / (vmax - vmin)) 56 | var[var < 0] = 0 57 | var[var > 1] = 1 58 | var = var * 255 59 | return Image.fromarray(var.astype('uint8')) -------------------------------------------------------------------------------- /utils/lpips.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import numpy as np 4 | from imageio import imread 5 | 6 | import torch 7 | from third_part.PerceptualSimilarity.models import dist_model as dm 8 | from utils.distributed import master_only_print as print 9 | 10 | 11 | def get_image_list(flist): 12 | if isinstance(flist, list): 13 | return flist 14 | 15 | # flist: image file path, image directory path, text file flist path 16 | if isinstance(flist, str): 17 | if os.path.isdir(flist): 18 | flist = list(glob.glob(flist + '/*.jpg')) + list(glob.glob(flist + '/*.png')) 19 | flist.sort() 20 | return flist 21 | 22 | if os.path.isfile(flist): 23 | try: 24 | return np.genfromtxt(flist, dtype=np.str) 25 | except: 26 | return [flist] 27 | print('can not read files from %s return empty list' % flist) 28 | return [] 29 | 30 | 31 | def preprocess_path_for_deform_task(gt_path, distorted_path): 32 | distorted_image_list = sorted(get_image_list(distorted_path)) 33 | gt_list = [] 34 | distorated_list = [] 35 | 36 | for distorted_image in distorted_image_list: 37 | image = os.path.basename(distorted_image) 38 | image = image.split('_2_')[-1] 39 | image = image.split('_vis')[0] + '.jpg' 40 | gt_image = os.path.join(gt_path, image) 41 | if not os.path.isfile(gt_image): 42 | gt_image = gt_image.replace('.jpg', '.png') 43 | gt_list.append(gt_image) 44 | distorated_list.append(distorted_image) 45 | return gt_list, distorated_list 46 | 47 | 48 | class LPIPS(): 49 | def __init__(self, use_gpu=True): 50 | self.model = dm.DistModel() 51 | self.model.initialize(model='net-lin', net='alex', use_gpu=use_gpu) 52 | self.use_gpu = use_gpu 53 | 54 | def __call__(self, image_1, image_2): 55 | """ 56 | image_1: images with size (n, 3, w, h) with value [-1, 1] 57 | image_2: images with size (n, 3, w, h) with value [-1, 1] 58 | """ 59 | result = self.model.forward(image_1, image_2) 60 | return result 61 | 62 | def calculate_from_disk(self, gt_path, distorted_path, batch_size=64, verbose=False, for_deformation=True): 63 | # if sort: 64 | if for_deformation: 65 | files_1, files_2 = preprocess_path_for_deform_task(gt_path, distorted_path) 66 | else: 67 | files_1 = sorted(get_image_list(gt_path)) 68 | files_2 = sorted(get_image_list(distorted_path)) 69 | 70 | new_files_1, new_files_2 = [], [] 71 | for item1, item2 in zip(files_1, files_2): 72 | if os.path.isfile(item1) and os.path.isfile(item2): 73 | new_files_1.append(item1) 74 | new_files_2.append(item2) 75 | else: 76 | print(item2) 77 | imgs_1 = np.array([imread(str(fn)).astype(np.float32) / 127.5 - 1 for fn in new_files_1]) 78 | imgs_2 = np.array([imread(str(fn)).astype(np.float32) / 127.5 - 1 for fn in new_files_2]) 79 | 80 | # Bring images to shape (B, 3, H, W) 81 | imgs_1 = imgs_1.transpose((0, 3, 1, 2)) 82 | imgs_2 = imgs_2.transpose((0, 3, 1, 2)) 83 | 84 | result = [] 85 | 86 | d0 = imgs_1.shape[0] 87 | if batch_size > d0: 88 | print(('Warning: batch size is bigger than the data size. ' 89 | 'Setting batch size to data size')) 90 | batch_size = d0 91 | 92 | n_batches = d0 // batch_size 93 | n_used_imgs = n_batches * batch_size 94 | 95 | # imgs_1_arr = np.empty((n_used_imgs, self.dims)) 96 | # imgs_2_arr = np.empty((n_used_imgs, self.dims)) 97 | for i in range(n_batches): 98 | if verbose: 99 | print('\rPropagating batch %d/%d' % (i + 1, n_batches)) 100 | # end='', flush=True) 101 | start = i * batch_size 102 | end = start + batch_size 103 | 104 | img_1_batch = torch.from_numpy(imgs_1[start:end]).type(torch.FloatTensor) 105 | img_2_batch = torch.from_numpy(imgs_2[start:end]).type(torch.FloatTensor) 106 | 107 | if self.use_gpu: 108 | img_1_batch = img_1_batch.cuda() 109 | img_2_batch = img_2_batch.cuda() 110 | 111 | result.append(self.model.forward(img_1_batch, img_2_batch)) 112 | 113 | distance = np.average(result) 114 | print('lpips: %.3f' % distance) 115 | return distance 116 | -------------------------------------------------------------------------------- /utils/meters.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.tensorboard import SummaryWriter 5 | from torch.utils.tensorboard.summary import hparams 6 | 7 | 8 | from utils.distributed import master_only 9 | from utils.distributed import master_only_print as print 10 | 11 | LOG_WRITER = None 12 | LOG_DIR = None 13 | 14 | 15 | @torch.no_grad() 16 | def sn_reshape_weight_to_matrix(weight): 17 | r"""Reshape weight to obtain the matrix form. 18 | 19 | Args: 20 | weight (Parameters): pytorch layer parameter tensor. 21 | """ 22 | weight_mat = weight 23 | height = weight_mat.size(0) 24 | return weight_mat.reshape(height, -1) 25 | 26 | 27 | @torch.no_grad() 28 | def get_weight_stats(mod, cfg, loss_id): 29 | r"""Get weight state 30 | 31 | Args: 32 | mod: Pytorch module 33 | cfg: Configuration object 34 | loss_id: Needed when using AMP. 35 | """ 36 | loss_scale = 1.0 37 | if cfg.trainer.amp == 'O1' or cfg.trainer.amp == 'O2': 38 | # AMP rescales the gradient so we have to undo it. 39 | loss_scale = amp._amp_state.loss_scalers[loss_id].loss_scale() 40 | if mod.weight_orig.grad is not None: 41 | grad_norm = mod.weight_orig.grad.data.norm().item() / float(loss_scale) 42 | else: 43 | grad_norm = 0. 44 | weight_norm = mod.weight_orig.data.norm().item() 45 | weight_mat = sn_reshape_weight_to_matrix(mod.weight_orig) 46 | sigma = torch.sum(mod.weight_u * torch.mv(weight_mat, mod.weight_v)) 47 | return grad_norm, weight_norm, sigma 48 | 49 | 50 | @master_only 51 | def set_summary_writer(log_dir): 52 | r"""Set summary writer 53 | 54 | Args: 55 | log_dir (str): Log directory. 56 | """ 57 | global LOG_DIR, LOG_WRITER 58 | LOG_DIR = log_dir 59 | LOG_WRITER = SummaryWriter(log_dir=log_dir) 60 | 61 | 62 | @master_only 63 | def write_summary(name, summary, step, hist=False): 64 | """Utility function for write summary to log_writer. 65 | """ 66 | global LOG_WRITER 67 | lw = LOG_WRITER 68 | if lw is None: 69 | raise Exception("Log writer not set.") 70 | if hist: 71 | lw.add_histogram(name, summary, step) 72 | else: 73 | lw.add_scalar(name, summary, step) 74 | 75 | 76 | @master_only 77 | def add_hparams(hparam_dict=None, metric_dict=None): 78 | r"""Add a set of hyperparameters to be compared in tensorboard. 79 | 80 | Args: 81 | hparam_dict (dictionary): Each key-value pair in the dictionary is the 82 | name of the hyper parameter and it's corresponding value. 83 | The type of the value can be one of `bool`, `string`, `float`, 84 | `int`, or `None`. 85 | metric_dict (dictionary): Each key-value pair in the dictionary is the 86 | name of the metric and it's corresponding value. Note that the key 87 | used here should be unique in the tensorboard record. Otherwise the 88 | value you added by `add_scalar` will be displayed in hparam plugin. 89 | In most cases, this is unwanted. 90 | """ 91 | if type(hparam_dict) is not dict or type(metric_dict) is not dict: 92 | raise TypeError('hparam_dict and metric_dict should be dictionary.') 93 | global LOG_WRITER 94 | lw = LOG_WRITER 95 | 96 | exp, ssi, sei = hparams(hparam_dict, metric_dict) 97 | 98 | lw.file_writer.add_summary(exp) 99 | lw.file_writer.add_summary(ssi) 100 | lw.file_writer.add_summary(sei) 101 | 102 | 103 | class Meter(object): 104 | """Meter is to keep track of statistics along steps. 105 | Meters write values for purpose like printing average values. 106 | Meters can be flushed to log files (i.e. TensorBoard for now) 107 | regularly. 108 | 109 | Args: 110 | name (str): the name of meter 111 | """ 112 | 113 | @master_only 114 | def __init__(self, name): 115 | self.name = name 116 | self.values = [] 117 | 118 | @master_only 119 | def reset(self): 120 | r"""Reset the meter values""" 121 | self.values = [] 122 | 123 | @master_only 124 | def write(self, value): 125 | r"""Record the value""" 126 | self.values.append(value) 127 | 128 | @master_only 129 | def flush(self, step): 130 | r"""Write the value in the tensorboard. 131 | 132 | Args: 133 | step (int): Epoch or iteration number. 134 | """ 135 | if not all(math.isfinite(x) for x in self.values): 136 | print("meter {} contained a nan or inf.".format(self.name)) 137 | filtered_values = list(filter(lambda x: math.isfinite(x), self.values)) 138 | if float(len(filtered_values)) != 0: 139 | value = float(sum(filtered_values)) / float(len(filtered_values)) 140 | write_summary(self.name, value, step) 141 | self.reset() 142 | 143 | @master_only 144 | def write_image(self, img_grid, step): 145 | r"""Write the value in the tensorboard. 146 | 147 | Args: 148 | img_grid: 149 | step (int): Epoch or iteration number. 150 | """ 151 | global LOG_WRITER 152 | lw = LOG_WRITER 153 | if lw is None: 154 | raise Exception("Log writer not set.") 155 | lw.add_image("Visualizations", img_grid, step) 156 | -------------------------------------------------------------------------------- /utils/video_preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTalker/StyleHEAT/9cb0c7cac00c79f901c3061850693b110f88c6ea/utils/video_preprocess/__init__.py -------------------------------------------------------------------------------- /utils/video_preprocess/align_face.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | # import dlib 4 | import scipy 5 | import PIL 6 | from PIL import Image 7 | from torchvision import transforms 8 | 9 | 10 | def align_face_single(img, lm, output_size=1024): 11 | """ 12 | :param img: PIL Image 13 | :return: PIL Image 14 | """ 15 | 16 | lm_chin = lm[0: 17] # left-right 17 | lm_eyebrow_left = lm[17: 22] # left-right 18 | lm_eyebrow_right = lm[22: 27] # left-right 19 | lm_nose = lm[27: 31] # top-down 20 | lm_nostrils = lm[31: 36] # top-down 21 | lm_eye_left = lm[36: 42] # left-clockwise 22 | lm_eye_right = lm[42: 48] # left-clockwise 23 | lm_mouth_outer = lm[48: 60] # left-clockwise 24 | lm_mouth_inner = lm[60: 68] # left-clockwise 25 | 26 | # Calculate auxiliary vectors. 27 | eye_left = np.mean(lm_eye_left, axis=0) 28 | eye_right = np.mean(lm_eye_right, axis=0) 29 | eye_avg = (eye_left + eye_right) * 0.5 30 | eye_to_eye = eye_right - eye_left 31 | mouth_left = lm_mouth_outer[0] 32 | mouth_right = lm_mouth_outer[6] 33 | mouth_avg = (mouth_left + mouth_right) * 0.5 34 | eye_to_mouth = mouth_avg - eye_avg 35 | 36 | # Choose oriented crop rectangle. 37 | x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1] 38 | x /= np.hypot(*x) 39 | x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) 40 | y = np.flipud(x) * [-1, 1] 41 | c = eye_avg + eye_to_mouth * 0.1 42 | quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) 43 | qsize = np.hypot(*x) * 2 44 | 45 | transform_size = 1024 46 | enable_padding = False 47 | 48 | # Shrink. 49 | shrink = int(np.floor(qsize / output_size * 0.5)) 50 | if shrink > 1: 51 | rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink))) 52 | img = img.resize(rsize, PIL.Image.ANTIALIAS) 53 | quad /= shrink 54 | qsize /= shrink 55 | 56 | # Crop. 57 | border = max(int(np.rint(qsize * 0.1)), 3) 58 | crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), 59 | int(np.ceil(max(quad[:, 1])))) 60 | crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), 61 | min(crop[3] + border, img.size[1])) 62 | if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: 63 | img = img.crop(crop) 64 | quad -= crop[0:2] 65 | 66 | # Pad. 67 | pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), 68 | int(np.ceil(max(quad[:, 1])))) 69 | pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), 70 | max(pad[3] - img.size[1] + border, 0)) 71 | if enable_padding and max(pad) > border - 4: 72 | pad = np.maximum(pad, int(np.rint(qsize * 0.3))) 73 | img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') 74 | h, w, _ = img.shape 75 | y, x, _ = np.ogrid[:h, :w, :1] 76 | mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]), 77 | 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3])) 78 | blur = qsize * 0.02 79 | img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) 80 | img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0) 81 | img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB') 82 | quad += pad[:2] 83 | 84 | # Transform. 85 | img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR) 86 | if output_size < transform_size: 87 | img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS) 88 | 89 | # Save aligned image. 90 | return img 91 | 92 | 93 | def align_image(images, lms): 94 | """ 95 | Input: images PIL list 96 | lms batch tensor 97 | """ 98 | transform = transforms.Compose([ 99 | transforms.ToTensor(), 100 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), 101 | ]) 102 | output_images = [] 103 | for i, image in enumerate(images): 104 | lm = lms[i] 105 | align = align_face_single(image, lm, output_size=256) 106 | output_images.append(align) 107 | 108 | output_images = [transform(i) for i in output_images] 109 | output_images = torch.stack(output_images).float() 110 | return output_images 111 | 112 | 113 | def align_image_pil(images, lms): 114 | """ 115 | Input: images PIL list 116 | lms batch tensor 117 | """ 118 | output_images = [] 119 | for i, image in enumerate(images): 120 | lm = lms[i] 121 | align = align_face_single(image, lm, output_size=256) 122 | output_images.append(align) 123 | return output_images 124 | -------------------------------------------------------------------------------- /utils/video_preprocess/extract_3dmm.py: -------------------------------------------------------------------------------- 1 | """This script is the test script for Deep3DFaceRecon_pytorch 2 | Change most absolute path to relative path 3 | """ 4 | 5 | import os 6 | import sys 7 | # sys.path.append() 8 | from third_part.Deep3DFaceRecon_pytorch.options.test_options import TestOptions 9 | # from data import create_dataset 10 | from third_part.Deep3DFaceRecon_pytorch.models import create_model 11 | from third_part.Deep3DFaceRecon_pytorch.util.visualizer import MyVisualizer 12 | from third_part.Deep3DFaceRecon_pytorch.util.preprocess import align_img 13 | from PIL import Image 14 | import numpy as np 15 | from third_part.Deep3DFaceRecon_pytorch.util.load_mats import load_lm3d 16 | import torch 17 | from configs.path import PRETRAINED_MODELS_PATH 18 | 19 | 20 | class Extract3dmm: 21 | 22 | def __init__(self): 23 | bfm_path = PRETRAINED_MODELS_PATH['BFM'] 24 | deep3d_path = PRETRAINED_MODELS_PATH['3DMM'] 25 | deep3d_path = os.path.dirname(deep3d_path) 26 | deep3d_dir = os.path.dirname(deep3d_path) 27 | deep3d_name = os.path.basename(deep3d_path) 28 | cmd = f'--checkpoints_dir {deep3d_dir} ' \ 29 | f'--bfm_folder {bfm_path} --name={deep3d_name} ' \ 30 | f'--epoch=20 --img_folder=temp' 31 | 32 | opt = TestOptions(cmd_line=cmd).parse() # get test options 33 | 34 | self.model = create_model(opt) 35 | self.model.setup(opt) 36 | self.model.device = 'cuda' 37 | self.model.parallelize() 38 | self.model.eval() 39 | self.lm3d_std = load_lm3d(opt.bfm_folder) 40 | 41 | def image_transform(self, images, lm): 42 | # W, H = images.size 43 | W, H = 256, 256 44 | imsize = 256 # Note this hyper-param is key for downloading optical model 45 | images = images.resize((imsize, imsize)) 46 | # lm = lm * imsize / W # lm coordinate is corresponding to the image size 47 | # lm = lm.copy() # Note that lm has been extracted at the size of 256 48 | 49 | if np.mean(lm) == -1: 50 | lm = (self.lm3d_std[:, :2] + 1) / 2. 51 | lm = np.concatenate( 52 | [lm[:, :1] * W, lm[:, 1:2] * H], 1 53 | ) 54 | else: 55 | lm[:, -1] = H - 1 - lm[:, -1] 56 | 57 | trans_params, img, lm, _ = align_img(images, lm, self.lm3d_std) 58 | img = torch.tensor(np.array(img) / 255., dtype=torch.float32).permute(2, 0, 1) 59 | lm = torch.tensor(lm) 60 | trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]) 61 | trans_params = torch.tensor(trans_params.astype(np.float32)) 62 | return img, lm, trans_params 63 | 64 | def get_3dmm(self, images_pil, lms): 65 | """ 66 | :param images: PIL list 67 | :return: 68 | """ 69 | images = [] 70 | trans_params = [] 71 | for i, img in enumerate(images_pil): 72 | lm = lms[i] 73 | img, lm, p = self.image_transform(img, lm) 74 | images.append(img) 75 | trans_params.append(p) 76 | 77 | images = torch.stack(images) 78 | trans_params = torch.stack(trans_params) 79 | 80 | batch_size = 20 81 | num_batch = images.shape[0] // batch_size + 1 82 | pred_coeffs = [] 83 | for _i in range(num_batch): 84 | _images = images[_i * batch_size: (_i+1) * batch_size] 85 | if len(_images) == 0: 86 | break 87 | data_input = { 88 | 'imgs': _images, 89 | } 90 | self.model.set_input(data_input) 91 | with torch.no_grad(): 92 | self.model.test() 93 | pred_coeff = {key: self.model.pred_coeffs_dict[key] for key in self.model.pred_coeffs_dict} 94 | pred_coeff = torch.cat([ 95 | pred_coeff['id'], 96 | pred_coeff['exp'], 97 | pred_coeff['tex'], 98 | pred_coeff['angle'], 99 | pred_coeff['gamma'], 100 | pred_coeff['trans']], 1 101 | ) 102 | _trans_params = np.array(trans_params[_i * batch_size: (_i+1) * batch_size]) 103 | _, _, ratio, t0, t1 = np.hsplit(_trans_params, 5) 104 | crop_param = np.concatenate([ratio, t0, t1], 1) 105 | pred_coeff = np.concatenate([pred_coeff.cpu().numpy(), crop_param], 1) 106 | 107 | pred_coeffs.append(pred_coeff) 108 | 109 | coeff_3dmm = np.concatenate(pred_coeffs, 0) 110 | 111 | # extract 73 feature from 260 112 | # id_coeff = coeff_3dmm[:,:80] #identity 113 | ex_coeff = coeff_3dmm[:, 80:144] # expression 114 | # tex_coeff = coeff_3dmm[:,144:224] #texture 115 | angles = coeff_3dmm[:, 224:227] # euler angles for pose 116 | # gamma = coeff_3dmm[:,227:254] #lighting 117 | translation = coeff_3dmm[:, 254:257] # translation 118 | crop = coeff_3dmm[:, 257:300] # crop param 119 | coeff_3dmm = np.concatenate([ex_coeff, angles, translation, crop], 1) 120 | return coeff_3dmm 121 | 122 | 123 | -------------------------------------------------------------------------------- /utils/video_preprocess/extract_landmark.py: -------------------------------------------------------------------------------- 1 | import face_alignment 2 | import numpy as np 3 | import torch 4 | 5 | 6 | detector = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D) 7 | 8 | mean_landmark = np.array([[264., 460.], 9 | [264., 540.], 10 | [284., 599.], 11 | [294., 659.], 12 | [313., 729.], 13 | [343., 798.], 14 | [383., 838.], 15 | [423., 878.], 16 | [512., 917.], 17 | [602., 897.], 18 | [661., 848.], 19 | [711., 798.], 20 | [751., 739.], 21 | [770., 659.], 22 | [790., 590.], 23 | [800., 520.], 24 | [810., 440.], 25 | [284., 401.], 26 | [313., 391.], 27 | [353., 391.], 28 | [393., 411.], 29 | [423., 431.], 30 | [572., 431.], 31 | [611., 411.], 32 | [651., 401.], 33 | [701., 401.], 34 | [741., 421.], 35 | [502., 510.], 36 | [492., 570.], 37 | [492., 619.], 38 | [492., 659.], 39 | [462., 669.], 40 | [472., 679.], 41 | [502., 689.], 42 | [532., 679.], 43 | [552., 669.], 44 | [343., 480.], 45 | [363., 470.], 46 | [403., 470.], 47 | [433., 490.], 48 | [403., 500.], 49 | [363., 500.], 50 | [582., 500.], 51 | [611., 480.], 52 | [651., 480.], 53 | [681., 490.], 54 | [651., 510.], 55 | [611., 510.], 56 | [423., 748.], 57 | [443., 739.], 58 | [482., 729.], 59 | [502., 729.], 60 | [522., 729.], 61 | [572., 739.], 62 | [611., 748.], 63 | [572., 788.], 64 | [542., 808.], 65 | [502., 818.], 66 | [472., 808.], 67 | [443., 788.], 68 | [423., 748.], 69 | [482., 748.], 70 | [502., 748.], 71 | [532., 748.], 72 | [602., 748.], 73 | [532., 768.], 74 | [502., 778.], 75 | [472., 778.]]) 76 | 77 | mean_landmark /= 4 78 | 79 | def get_landmark(images): 80 | """ 81 | :param images: PIL list 82 | :return: numpy list 83 | """ 84 | # if isinstance(images, list): 85 | # lm = [] 86 | # for i in images: 87 | # lm.append(detector.get_landmarks_from_image(np.array(images))[0]) 88 | # else: 89 | lms_np = [] 90 | for image in images: 91 | try: 92 | lm = detector.get_landmarks_from_image(np.array(image))[0] 93 | lms_np.append(lm) 94 | except: 95 | lm = mean_landmark 96 | lms_np.append(lm) 97 | lms_np = np.stack(lms_np) # B, 68, 2 98 | return lms_np 99 | --------------------------------------------------------------------------------