├── README.md
├── assets
    ├── pipeline.png
    └── teaser.png
├── inference.sh
├── inference
    ├── bisenet.py
    ├── face_alignment.py
    ├── face_blending.py
    ├── get_mask.py
    ├── inference_body.py
    ├── inference_face_inpainting.py
    ├── insightface_func
    │   ├── .ipynb_checkpoints
    │   │   ├── face_detect_crop_single-checkpoint.py
    │   │   └── face_detect_crop_single_smooth-checkpoint.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── face_detect_crop_single.cpython-38.pyc
    │   │   └── face_detect_crop_single_smooth.cpython-38.pyc
    │   ├── face_detect_crop_multi.py
    │   ├── face_detect_crop_single.py
    │   ├── face_detect_crop_single_smooth.py
    │   └── utils
    │   │   ├── .ipynb_checkpoints
    │   │       └── face_align_ffhqandnewarc-checkpoint.py
    │   │   ├── __pycache__
    │   │       └── face_align_ffhqandnewarc.cpython-38.pyc
    │   │   └── face_align_ffhqandnewarc.py
    ├── models
    │   ├── __pycache__
    │   │   ├── attention.cpython-38.pyc
    │   │   ├── controlnet.cpython-38.pyc
    │   │   ├── controlnet_attention.cpython-38.pyc
    │   │   ├── controlnet_unet_blocks.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet_threestage.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet_twostage.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet_twounet_add.cpython-38.pyc
    │   │   ├── pipeline_controlvideo.cpython-38.pyc
    │   │   ├── pipeline_controlvideo_AR.cpython-38.pyc
    │   │   ├── pipeline_imageunet_animatediff_twostage.cpython-38.pyc
    │   │   ├── resnet.cpython-38.pyc
    │   │   ├── stable_diffusion_controlnet_img2img.cpython-38.pyc
    │   │   ├── unet.cpython-38.pyc
    │   │   ├── unet_blocks.cpython-38.pyc
    │   │   └── util.cpython-38.pyc
    │   ├── attention.py
    │   ├── controlnet.py
    │   ├── controlnet_attention.py
    │   ├── controlnet_unet_blocks.py
    │   ├── pipeline_controlvideo.py
    │   ├── resnet.py
    │   ├── unet.py
    │   ├── unet_blocks.py
    │   └── util.py
    ├── parsing_model
    │   ├── __pycache__
    │   │   ├── model.cpython-38.pyc
    │   │   └── resnet.cpython-38.pyc
    │   ├── model.py
    │   └── resnet.py
    ├── pipelines
    │   ├── __pycache__
    │   │   └── pipeline_stable_diffusion_controlnet_inpaint.cpython-38.pyc
    │   └── pipeline_stable_diffusion_controlnet_inpaint.py
    ├── res
    │   └── cp
    │   │   └── 79999_iter.pth
    ├── resnet.py
    ├── samples
    │   ├── appearance
    │   │   ├── body.png
    │   │   └── head.png
    │   └── poses
    │   │   └── seth1
    │   │       ├── 000001.png
    │   │       ├── 000002.png
    │   │       ├── 000003.png
    │   │       ├── 000004.png
    │   │       ├── 000005.png
    │   │       ├── 000006.png
    │   │       ├── 000007.png
    │   │       ├── 000008.png
    │   │       ├── 000009.png
    │   │       ├── 000010.png
    │   │       ├── 000011.png
    │   │       ├── 000012.png
    │   │       ├── 000013.png
    │   │       ├── 000014.png
    │   │       ├── 000015.png
    │   │       ├── 000016.png
    │   │       ├── 000017.png
    │   │       ├── 000018.png
    │   │       ├── 000019.png
    │   │       ├── 000020.png
    │   │       ├── 000021.png
    │   │       ├── 000022.png
    │   │       ├── 000023.png
    │   │       ├── 000024.png
    │   │       ├── 000025.png
    │   │       ├── 000026.png
    │   │       ├── 000027.png
    │   │       ├── 000028.png
    │   │       ├── 000029.png
    │   │       ├── 000030.png
    │   │       ├── 000031.png
    │   │       ├── 000032.png
    │   │       ├── 000033.png
    │   │       ├── 000034.png
    │   │       ├── 000035.png
    │   │       ├── 000036.png
    │   │       ├── 000037.png
    │   │       ├── 000038.png
    │   │       ├── 000039.png
    │   │       ├── 000040.png
    │   │       ├── 000041.png
    │   │       ├── 000042.png
    │   │       ├── 000043.png
    │   │       ├── 000044.png
    │   │       ├── 000045.png
    │   │       ├── 000046.png
    │   │       ├── 000047.png
    │   │       ├── 000048.png
    │   │       ├── 000049.png
    │   │       ├── 000050.png
    │   │       ├── 000051.png
    │   │       ├── 000052.png
    │   │       ├── 000053.png
    │   │       ├── 000054.png
    │   │       ├── 000055.png
    │   │       ├── 000056.png
    │   │       ├── 000057.png
    │   │       ├── 000058.png
    │   │       ├── 000059.png
    │   │       ├── 000060.png
    │   │       ├── 000061.png
    │   │       ├── 000062.png
    │   │       ├── 000063.png
    │   │       ├── 000064.png
    │   │       ├── 000065.png
    │   │       ├── 000066.png
    │   │       ├── 000067.png
    │   │       ├── 000068.png
    │   │       ├── 000069.png
    │   │       ├── 000070.png
    │   │       ├── 000071.png
    │   │       ├── 000072.png
    │   │       ├── 000073.png
    │   │       ├── 000074.png
    │   │       ├── 000075.png
    │   │       ├── 000076.png
    │   │       ├── 000077.png
    │   │       ├── 000078.png
    │   │       ├── 000079.png
    │   │       ├── 000080.png
    │   │       ├── 000081.png
    │   │       ├── 000082.png
    │   │       ├── 000083.png
    │   │       ├── 000084.png
    │   │       ├── 000085.png
    │   │       ├── 000086.png
    │   │       ├── 000087.png
    │   │       ├── 000088.png
    │   │       ├── 000089.png
    │   │       ├── 000090.png
    │   │       ├── 000091.png
    │   │       ├── 000092.png
    │   │       ├── 000093.png
    │   │       ├── 000094.png
    │   │       ├── 000095.png
    │   │       ├── 000096.png
    │   │       ├── 000097.png
    │   │       ├── 000098.png
    │   │       ├── 000099.png
    │   │       ├── 000100.png
    │   │       ├── 000101.png
    │   │       ├── 000102.png
    │   │       ├── 000103.png
    │   │       ├── 000104.png
    │   │       ├── 000105.png
    │   │       ├── 000106.png
    │   │       ├── 000107.png
    │   │       ├── 000108.png
    │   │       ├── 000109.png
    │   │       ├── 000110.png
    │   │       ├── 000111.png
    │   │       ├── 000112.png
    │   │       ├── 000113.png
    │   │       ├── 000114.png
    │   │       ├── 000115.png
    │   │       ├── 000116.png
    │   │       ├── 000117.png
    │   │       ├── 000118.png
    │   │       └── 000119.png
    └── util
    │   ├── .ipynb_checkpoints
    │       ├── norm-checkpoint.py
    │       ├── reverse2original-checkpoint.py
    │       ├── reverse2original_hzy-checkpoint.py
    │       ├── reverse2original_swapmask-checkpoint.py
    │       └── videoswap-checkpoint.py
    │   ├── __pycache__
    │       ├── add_watermark.cpython-38.pyc
    │       ├── logo_class.cpython-38.pyc
    │       ├── norm.cpython-38.pyc
    │       ├── plot.cpython-38.pyc
    │       ├── reverse2original.cpython-38.pyc
    │       ├── reverse2original_hzy.cpython-38.pyc
    │       ├── reverse2original_swapmask.cpython-38.pyc
    │       ├── util.cpython-38.pyc
    │       └── videoswap.cpython-38.pyc
    │   ├── add_watermark.py
    │   ├── html.py
    │   ├── image_pool.py
    │   ├── json_config.py
    │   ├── logo_class.py
    │   ├── norm.py
    │   ├── plot.py
    │   ├── reverse2original.py
    │   ├── reverse2original_hzy.py
    │   ├── reverse2original_swapmask.py
    │   ├── save_heatmap.py
    │   ├── util.py
    │   ├── videoswap.py
    │   ├── videoswap_multispecific.py
    │   ├── videoswap_specific.py
    │   └── visualizer.py
├── preprocess
    ├── 1_split.py
    ├── 3_get_body_json.py
    ├── 3_get_head_json.py
    ├── README.md
    ├── assets
    │   └── ref_advisor.png
    ├── bisenet.py
    ├── face_alignment.py
    ├── get_mask.py
    ├── insightface_func
    │   ├── .ipynb_checkpoints
    │   │   ├── face_detect_crop_single-checkpoint.py
    │   │   └── face_detect_crop_single_smooth-checkpoint.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── face_detect_crop_single.cpython-38.pyc
    │   │   └── face_detect_crop_single_smooth.cpython-38.pyc
    │   ├── face_detect_crop_multi.py
    │   ├── face_detect_crop_single.py
    │   ├── face_detect_crop_single_smooth.py
    │   └── utils
    │   │   ├── .ipynb_checkpoints
    │   │       └── face_align_ffhqandnewarc-checkpoint.py
    │   │   ├── __pycache__
    │   │       └── face_align_ffhqandnewarc.cpython-38.pyc
    │   │   └── face_align_ffhqandnewarc.py
    ├── models
    │   ├── __pycache__
    │   │   ├── attention.cpython-38.pyc
    │   │   ├── controlnet.cpython-38.pyc
    │   │   ├── controlnet_attention.cpython-38.pyc
    │   │   ├── controlnet_unet_blocks.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet_threestage.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet_twostage.cpython-38.pyc
    │   │   ├── pipeline_animatediff_videocontrolnet_twounet_add.cpython-38.pyc
    │   │   ├── pipeline_controlvideo.cpython-38.pyc
    │   │   ├── pipeline_controlvideo_AR.cpython-38.pyc
    │   │   ├── pipeline_imageunet_animatediff_twostage.cpython-38.pyc
    │   │   ├── resnet.cpython-38.pyc
    │   │   ├── stable_diffusion_controlnet_img2img.cpython-38.pyc
    │   │   ├── unet.cpython-38.pyc
    │   │   ├── unet_blocks.cpython-38.pyc
    │   │   └── util.cpython-38.pyc
    │   ├── attention.py
    │   ├── controlnet.py
    │   ├── controlnet_attention.py
    │   ├── controlnet_unet_blocks.py
    │   ├── pipeline_controlvideo.py
    │   ├── resnet.py
    │   ├── unet.py
    │   ├── unet_blocks.py
    │   └── util.py
    └── res
    │   └── cp
    │       └── 79999_iter.pth
├── requirements.txt
├── train
    ├── pipelines
    │   └── pipeline_stable_diffusion_controlnet_inpaint.py
    ├── train_body.py
    └── train_head.py
├── train_body.sh
└── train_head.sh


/assets/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/assets/pipeline.png


--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/assets/teaser.png


--------------------------------------------------------------------------------
/inference.sh:
--------------------------------------------------------------------------------
 1 | cd inference
 2 | 
 3 | export CUDA_VISIBLE_DEVICES=4
 4 | 
 5 | ## Please fill the parameters here
 6 | # path to the body model folder
 7 | body_weight_dir=./checkpoints/seth/body
 8 | # path to the head model folder
 9 | head_weight_dir=./checkpoints/seth/head
10 | # path to the input poses
11 | body_input_dir=./samples/poses/seth1
12 | # path to the reference body appearance
13 | body_prompt_img_pth=./samples/appearance/body.png
14 | # path to the reference head appearance
15 | head_prompt_img_pth=./samples/appearance/head.png
16 | 
17 | # pipe_path=runwayml/stable-diffusion-v1-5
18 | # pipe_inpainting_path=runwayml/stable-diffusion-inpainting
19 | 
20 | body_unet_path=${body_weight_dir}/unet
21 | body_controlent_path=${body_weight_dir}/controlnet
22 | body_cfg=7.5
23 | body_condition_scale=2.
24 | 
25 | head_unet_pth=${head_weight_dir}/unet
26 | head_controlnet_path=${head_weight_dir}/controlnet
27 | head_cfg=3.5
28 | head_condition_scale=1.
29 | 
30 | save_root=./samples/output
31 | 
32 | 
33 | fps=30
34 | 
35 | body_save_dir=${save_root}/body_output
36 | body_headcrop_root=${body_save_dir}_meshcrop
37 | head_dir=${body_headcrop_root}/raw_aligned
38 | head_mask_dir=${body_headcrop_root}/raw_aligned_mask
39 | head_input_dir=${body_headcrop_root}/aligned
40 | head_matrix_dir=${body_headcrop_root}/matrix
41 | head_save_dir=${body_headcrop_root}/face_output
42 | 
43 | final_save_dir=${save_root}/final_output
44 | 
45 | # inference on body
46 | python inference_body.py \
47 |     --unet_path $body_unet_path \
48 |     --controlnet_path $body_controlent_path \
49 |     --input_dir $body_input_dir \
50 |     --save_dir $body_save_dir \
51 |     --prompt_img_pth $body_prompt_img_pth \
52 |     --CFG $body_cfg \
53 |     --condition_scale $body_condition_scale \
54 |     # --pipe_path $pipe_path
55 | 
56 | ffmpeg -r 30 -i $body_save_dir/%06d.png -q:v 0 -pix_fmt yuv420p $body_save_dir.mp4
57 | 
58 | # face alignment
59 | python face_alignment.py \
60 |     --imgdir_pth $body_input_dir \
61 |     --raw_imgdir_pth $body_save_dir \
62 |     --results_dir $body_headcrop_root \
63 |     --crop_size 512
64 | 
65 | # get face mask
66 | python get_mask.py \
67 |     --input_pth $head_dir \
68 |     --mask_pth $head_mask_dir
69 | 
70 | 
71 | # inference on face inpainting
72 | python inference_face_inpainting.py \
73 |     --unet_path $head_unet_pth \
74 |     --controlnet_path $head_controlnet_path \
75 |     --input_dir $head_input_dir \
76 |     --face_dir $head_dir \
77 |     --mask_dir $head_mask_dir \
78 |     --save_dir $head_save_dir \
79 |     --prompt_img_pth $head_prompt_img_pth \
80 |     --CFG $head_cfg \
81 |     --condition_scale $head_condition_scale \
82 |     --batch_size 30 \
83 |     # --pipe_pth $pipe_inpainting_path
84 | 
85 | 
86 | # face blending
87 | python face_blending.py \
88 |     --body_dir $body_save_dir \
89 |     --face_dir $head_save_dir \
90 |     --matrix_dir $head_matrix_dir \
91 |     --save_dir $final_save_dir \
92 |     --crop_size 512 
93 | 
94 | 
95 | ffmpeg -r 30 -i $final_save_dir/%06d.png -q:v 0 -pix_fmt yuv420p $final_save_dir.mp4


--------------------------------------------------------------------------------
/inference/face_alignment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import torch
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | from insightface_func.face_detect_crop_single import Face_detect_crop
 7 | 
 8 | import argparse
 9 | 
10 | def align(img, M, crop_size):
11 |     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
12 |     return align_img
13 | 
14 | def _totensor(array):
15 |     tensor = torch.from_numpy(array)
16 |     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
17 |     return img.float().div(255)
18 | 
19 | 
20 | def imgdir_align(imgdir_path, raw_imgdir_pth,  detect_model, results_dir='./temp_results', crop_size=224):    
21 |     os.makedirs(results_dir, exist_ok=True)
22 |     os.makedirs(os.path.join(results_dir, 'aligned'), exist_ok=True)
23 |     os.makedirs(os.path.join(results_dir, 'raw_aligned'), exist_ok=True)
24 |     os.makedirs(os.path.join(results_dir, 'matrix'), exist_ok=True)
25 |             
26 |     for file_name in tqdm(os.listdir(imgdir_path)): 
27 |         img_pth = os.path.join(imgdir_path, file_name)
28 |         img = cv2.imread(img_pth)
29 | 
30 |         raw_img_pth = os.path.join(raw_imgdir_pth, file_name[:-4]+'.jpg')
31 |         raw_file_name = file_name[:-4]+'.jpg' if os.path.exists(raw_img_pth) else file_name[:-4]+'.png'
32 |         raw_img_pth = os.path.join(raw_imgdir_pth, raw_file_name)
33 |         if not os.path.exists(raw_img_pth):
34 |             continue
35 |         raw_img = cv2.imread(raw_img_pth)
36 | 
37 |         if img is not None:
38 |             detect_results = detect_model.get(img,crop_size)
39 | 
40 |             if detect_results is not None:
41 | 
42 |                 img_align_crop_list = detect_results[0]
43 |                 img_mat_list = detect_results[1]
44 |         
45 |                 for img_align_crop in img_align_crop_list:
46 |                     cv2.imwrite(os.path.join(results_dir, 'aligned', file_name[:-4]+'.jpg'), img_align_crop)
47 | 
48 |                     raw_img_align_crop = align(raw_img, img_mat_list[0], crop_size)
49 |                     cv2.imwrite(os.path.join(results_dir, 'raw_aligned', raw_file_name), raw_img_align_crop)
50 |                     np.save(os.path.join(results_dir, 'matrix', file_name.split('.')[0]), img_mat_list[0])
51 |                     break
52 | 
53 |             else:
54 |                 print('not detected in {}'.format(img_pth))
55 |                 if not os.path.exists(results_dir):
56 |                     os.mkdir(results_dir)
57 | 
58 |         else:
59 |             pass
60 | 
61 | 
62 | 
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 
67 |     parser = argparse.ArgumentParser()
68 |     parser.add_argument( "--imgdir_pth", type=str, default=None, required=True )
69 |     parser.add_argument( "--raw_imgdir_pth", type=str, default=None, required=True )
70 |     parser.add_argument( "--results_dir", type=str, default=None, required=True )
71 |     parser.add_argument( "--crop_size", type=int, default=512 )
72 | 
73 |     args = parser.parse_args()
74 | 
75 | 
76 |     imgdir_pth = args.imgdir_pth
77 |     raw_imgdir_pth = args.raw_imgdir_pth
78 | 
79 |     crop_size=args.crop_size
80 |     if crop_size == 512:
81 |         mode = 'ffhq'
82 |     else:
83 |         mode = 'None'
84 |     
85 |     app = Face_detect_crop(name='antelope', root='./insightface_func/models')
86 |     app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640),mode=mode)
87 | 
88 |     results_dir = args.results_dir
89 |     
90 |     imgdir_align(imgdir_pth, raw_imgdir_pth, app, crop_size=crop_size, results_dir=results_dir)


--------------------------------------------------------------------------------
/inference/face_blending.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import torch
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | from util.reverse2original import reverse2wholeimage
 7 | from util.add_watermark import watermark_image
 8 | from util.norm import SpecificNorm
 9 | from parsing_model.model import BiSeNet
10 | 
11 | import argparse
12 | 
13 | 
14 | def _totensor(array):
15 |     tensor = torch.from_numpy(array)
16 |     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
17 |     return img.float().div(255)
18 | 
19 | def imgdir_inverse_align_bylist(
20 |     body_dir,
21 |     face_dir,
22 |     matrix_dir,
23 |     save_dir,      # for save dir
24 |     crop_size=224, 
25 |     use_mask =False
26 | ):
27 |     spNorm =SpecificNorm()
28 |     if use_mask:
29 |         n_classes = 19
30 |         net = BiSeNet(n_classes=n_classes)
31 |         net.cuda()
32 |         save_pth = os.path.join('./parsing_model/checkpoint', '79999_iter.pth')
33 |         net.load_state_dict(torch.load(save_pth))
34 |         net.eval()
35 |     else:
36 |         net = None
37 |     
38 |     os.makedirs(save_dir, exist_ok=True)
39 | 
40 |     length = len(os.listdir(face_dir))
41 | 
42 |     for frame_idx in tqdm(range(1,length+1)):
43 |         body_pth = os.path.join(body_dir, '{:06d}.png'.format(frame_idx))
44 |         face_pth = os.path.join(face_dir, '{:06d}.png'.format(frame_idx))
45 |         matrix_pth = os.path.join(matrix_dir, '{:06d}.npy'.format(frame_idx))
46 | 
47 | 
48 |         body = cv2.imread(body_pth)
49 |         img = body
50 | 
51 |         m = np.load(matrix_pth)
52 |         img_mat_list = [m]
53 | 
54 |         aligned_pth=face_pth
55 |         img_align_crop = cv2.imread(aligned_pth)
56 |         img_align_crop = cv2.resize(img_align_crop, (512,512))
57 |         # BGR TO RGB
58 |         img_align_crop_tenor = _totensor(cv2.cvtColor(img_align_crop,cv2.COLOR_BGR2RGB))[None,...].cuda()            
59 |         img_align_crop_tenor_list = [img_align_crop_tenor]
60 |         
61 |         edit_align = cv2.imread(face_pth)
62 |         # BGR TO RGB
63 |         edit_align = cv2.resize(edit_align, (crop_size,crop_size))
64 | 
65 |         edit_align_tenor = _totensor(cv2.cvtColor(edit_align,cv2.COLOR_BGR2RGB))[None,...].cuda()
66 |         edit_align_tenor = edit_align_tenor.squeeze(0)
67 |         edit_result_list = [edit_align_tenor]
68 | 
69 |         reverse2wholeimage(img_align_crop_tenor_list, edit_result_list, img_mat_list, crop_size, img, \
70 |             os.path.join(save_dir, '{:06d}.png'.format(frame_idx)),pasring_model=net, use_mask=use_mask, norm = spNorm)        
71 |     
72 | 
73 | if __name__ == '__main__':
74 |     parser = argparse.ArgumentParser(description="Simple example of a ControlNet training script.")
75 |     parser.add_argument( "--body_dir", type=str, default=None, required=True )
76 |     parser.add_argument( "--face_dir", type=str, default=None, required=True )
77 |     parser.add_argument( "--matrix_dir", type=str, default=None, required=True )
78 |     parser.add_argument( "--save_dir", type=str, default=None, required=True )
79 |     parser.add_argument( "--crop_size", type=int, default=512,)
80 | 
81 |     args = parser.parse_args()
82 | 
83 |     crop_size=args.crop_size
84 |     if crop_size == 512:
85 |         mode = 'ffhq'
86 |     else:
87 |         mode = 'None'
88 | 
89 |     save_dir = args.save_dir
90 |     
91 |     imgdir_inverse_align_bylist(
92 |             body_dir=args.body_dir,
93 |             face_dir=args.face_dir,
94 |             matrix_dir=args.matrix_dir,
95 |             save_dir=save_dir,      # for save dir
96 |             crop_size=crop_size, use_mask=False)
97 |     


--------------------------------------------------------------------------------
/inference/get_mask.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | # from logger import setup_logger
  5 | from bisenet import BiSeNet
  6 | 
  7 | import torch
  8 | 
  9 | import os
 10 | import os.path as osp
 11 | import numpy as np
 12 | from PIL import Image
 13 | import torchvision.transforms as transforms
 14 | import cv2
 15 | 
 16 | import glob
 17 | import json
 18 | 
 19 | from tqdm import tqdm
 20 | 
 21 | import argparse
 22 | 
 23 | # [0, 'background', 1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye',
 24 | # 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',  10 'nose', 11 'mouth', 12 'u_lip',
 25 | # 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
 26 | 
 27 | def dilate(img,  reverse=False):
 28 |     img = torch.from_numpy(img)
 29 |     mask = torch.ones_like(img)
 30 |     
 31 |     parsing = img
 32 |     mask = mask - ((parsing == 0).float())
 33 |     mask = mask - ((parsing == 14).float())
 34 |     mask = mask - ((parsing == 15).float())
 35 |     mask = mask - ((parsing == 16).float())
 36 |     mask = mask - ((parsing == 17).float())
 37 |     mask = mask - ((parsing == 18).float())
 38 |     
 39 |     kernel = np.ones((3,3), dtype=np.uint8) # origin maybe
 40 |     mask_numpy = mask.numpy()
 41 |     mask_numpy = cv2.dilate(mask_numpy, kernel, iterations=1)
 42 |     if reverse:
 43 |         mask_numpy = 1-mask_numpy
 44 |     mask_numpy = 255*mask_numpy
 45 | 
 46 |     return mask_numpy
 47 | 
 48 | def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg', ifdilate=True, reverse=False):
 49 |     im = np.array(im)
 50 |     vis_im = im.copy().astype(np.uint8)
 51 |     vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
 52 |     vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
 53 | 
 54 |     if ifdilate:
 55 |         vis_parsing_anno = dilate(vis_parsing_anno, reverse=reverse)
 56 | 
 57 |     # Save result or not
 58 |     if save_im:
 59 |         cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
 60 | 
 61 | 
 62 | def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth', ifdilate=True, reverse=False):
 63 | 
 64 |     if not os.path.exists(respth):
 65 |         os.makedirs(respth)
 66 | 
 67 |     n_classes = 19
 68 |     net = BiSeNet(n_classes=n_classes)
 69 |     net.cuda()
 70 |     save_pth = osp.join('res/cp', cp)
 71 |     net.load_state_dict(torch.load(save_pth))
 72 |     net.eval()
 73 | 
 74 |     to_tensor = transforms.Compose([
 75 |         transforms.ToTensor(),
 76 |         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 77 |     ])
 78 |     pathes = glob.glob( os.path.join(dspth,'*.jpg') ) + glob.glob( os.path.join(dspth,'*.png') )
 79 |     with torch.no_grad():
 80 |         for image_path in tqdm(pathes):
 81 |             image_path = os.path.basename(image_path)
 82 |             # print(image_path)
 83 |             img = Image.open(osp.join(dspth, image_path))
 84 |             image = img.resize((512, 512), Image.BILINEAR)
 85 |             img = to_tensor(image)
 86 |             img = torch.unsqueeze(img, 0)
 87 |             img = img.cuda()
 88 |             out = net(img)[0]
 89 |             parsing = out.squeeze(0).cpu().numpy().argmax(0)
 90 | 
 91 | 
 92 |             vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path), ifdilate=ifdilate, reverse=reverse)
 93 | 
 94 | if __name__ == "__main__":
 95 |     parser = argparse.ArgumentParser(description="Simple example of a ControlNet training script.")
 96 |     parser.add_argument( "--input_pth", type=str, default=None, required=True )
 97 |     parser.add_argument( "--mask_pth", type=str, default=None, required=True )
 98 | 
 99 |     args = parser.parse_args()
100 | 
101 |     ifdilate=True
102 |     reverse=True
103 | 
104 |     respth=args.mask_pth
105 |     dspth=args.input_pth
106 | 
107 |     evaluate(respth=respth, dspth=dspth, cp='79999_iter.pth', ifdilate=ifdilate, reverse=reverse)
108 | 
109 | 


--------------------------------------------------------------------------------
/inference/inference_body.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import torch
  4 | 
  5 | from PIL import Image
  6 | 
  7 | from diffusers import DDIMScheduler, AutoencoderKL
  8 | from transformers import CLIPTextModel, CLIPTokenizer
  9 | from models.pipeline_controlvideo import ControlVideoPipeline
 10 | from models.util import save_image
 11 | from models.unet import UNet3DConditionModel
 12 | from models.controlnet import ControlNetModel3D
 13 | 
 14 | from glob import glob
 15 | 
 16 | from transformers import CLIPTokenizer, CLIPProcessor, CLIPVisionModel
 17 | 
 18 | 
 19 | import argparse
 20 | 
 21 | # Load CLIP Image Encoder
 22 | clip_encoder = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32").cuda()
 23 | clip_encoder.requires_grad_(False)
 24 | clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 25 | 
 26 | 
 27 | def encode_img_clip(image):
 28 |     inputs = clip_processor(images=image, return_tensors="pt")
 29 |     inputs = {k: v.cuda() for k, v in inputs.items()}
 30 |     clip_image_embeddings = clip_encoder(**inputs).last_hidden_state.cuda()
 31 |     return clip_image_embeddings
 32 | 
 33 | 
 34 | def get_args():
 35 |     parser = argparse.ArgumentParser()
 36 |     parser.add_argument("--height", type=int, default=512, help="Height of synthesized video, and should be a multiple of 32")
 37 |     parser.add_argument("--width", type=int, default=512, help="Width of synthesized video, and should be a multiple of 32")
 38 |     parser.add_argument("--smoother_steps", nargs='+', default=[19, 20], type=int, help="Timesteps at which using interleaved-frame smoother")
 39 |     parser.add_argument("--is_long_video", action='store_true', help="Whether to use hierarchical sampler to produce long video")
 40 |     parser.add_argument("--seed", type=int, default=42, help="Random seed of generator")
 41 |     
 42 | 
 43 |     parser.add_argument(
 44 |         "--pipe_path",
 45 |         type=str,
 46 |         default='runwayml/stable-diffusion-v1-5',
 47 |     )
 48 |     parser.add_argument( "--unet_path", type=str, default=None, required=True )
 49 |     parser.add_argument( "--controlnet_path", type=str, default=None, required=True )
 50 |     parser.add_argument( "--input_dir", type=str, default=None, required=True )
 51 |     parser.add_argument( "--save_dir", type=str, default=None, required=True )
 52 |     parser.add_argument( "--prompt_img_pth", type=str, default=None, required=True )
 53 |     parser.add_argument( "--CFG", type=float, default=1.5 )
 54 |     parser.add_argument( "--condition_scale", type=float, default=1. )
 55 | 
 56 |     parser.add_argument("--num_steps", type=int, default=20)
 57 | 
 58 |     parser.add_argument("--ws", type=int, default=16)
 59 |     parser.add_argument("--os", type=int, default=4)
 60 | 
 61 |     args = parser.parse_args()
 62 |     return args
 63 | 
 64 | if __name__ == "__main__":
 65 |     args = get_args()
 66 | 
 67 |     device = "cuda"
 68 |     sd_path = args.pipe_path
 69 |     
 70 |     # Height and width should be a multiple of 32
 71 |     args.height = (args.height // 32) * 32    
 72 |     args.width = (args.width // 32) * 32    
 73 | 
 74 |     controlnet_pth = args.controlnet_path
 75 |     unet_pth = args.unet_path
 76 | 
 77 |     tokenizer = CLIPTokenizer.from_pretrained(sd_path, subfolder="tokenizer")
 78 |     text_encoder = CLIPTextModel.from_pretrained(sd_path, subfolder="text_encoder").to(dtype=torch.float16)
 79 |     vae = AutoencoderKL.from_pretrained(sd_path, subfolder="vae").to(dtype=torch.float16)
 80 |     unet = UNet3DConditionModel.from_pretrained_2d(unet_pth).to(dtype=torch.float16)
 81 |     controlnet = ControlNetModel3D.from_pretrained_2d(controlnet_pth).to(dtype=torch.float16)
 82 | 
 83 | 
 84 |     scheduler=DDIMScheduler.from_pretrained(sd_path, subfolder="scheduler")
 85 | 
 86 |     pipe = ControlVideoPipeline(
 87 |             vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, unet=unet,
 88 |             controlnet=controlnet, scheduler=scheduler,
 89 |         )
 90 |     pipe.enable_vae_slicing()
 91 |     pipe.enable_xformers_memory_efficient_attention()
 92 |     pipe.to(device)
 93 | 
 94 |     generator = torch.Generator(device="cuda")
 95 |     generator.manual_seed(args.seed)
 96 | 
 97 |     input_dir = args.input_dir
 98 |     prompt_img_pth = args.prompt_img_pth
 99 |     save_dir= args.save_dir
100 |     
101 |     prompt_img = Image.open(prompt_img_pth)
102 |     prompt_embeds = encode_img_clip(prompt_img)
103 | 
104 | 
105 |     img_pths = glob(input_dir+'/*.png')
106 |     img_pths.sort()
107 | 
108 |     pil_annotation = [ Image.open(img) for img in img_pths]
109 | 
110 |     video_length = len(pil_annotation)
111 | 
112 |     # Step 3. inference
113 |     sample = pipe.generate_long_video_slidingwindow_overlap(
114 |                 video_length=video_length, frames=pil_annotation, 
115 |                 num_inference_steps=args.num_steps, smooth_steps=args.smoother_steps, window_size=args.ws, window_overlap=args.os,
116 |                 generator=generator, 
117 |                 guidance_scale=args.CFG,
118 |                 prompt_embeds=prompt_embeds,
119 |                 controlnet_conditioning_scale=args.condition_scale,
120 |                 width=args.width, 
121 |                 height=args.height,
122 |             ).videos
123 |     os.makedirs(save_dir, exist_ok=True)
124 | 
125 |     for idx, img_pth in enumerate(img_pths):
126 |         img_name = os.path.basename(img_pth)
127 |         save_pth = os.path.join(save_dir, img_name)
128 |         output_image = sample[:,:,]
129 |         save_image(sample[:,:,idx,:,:].unsqueeze(2), save_pth)


--------------------------------------------------------------------------------
/inference/insightface_func/.ipynb_checkpoints/face_detect_crop_single-checkpoint.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Naiyuan liu
 3 | Github: https://github.com/NNNNAI
 4 | Date: 2021-11-23 17:03:58
 5 | LastEditors: Naiyuan liu
 6 | LastEditTime: 2021-11-24 16:46:04
 7 | Description: 
 8 | '''
 9 | from __future__ import division
10 | import collections
11 | import numpy as np
12 | import glob
13 | import os
14 | import os.path as osp
15 | import cv2
16 | from insightface.model_zoo import model_zoo
17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
18 | 
19 | __all__ = ['Face_detect_crop', 'Face']
20 | 
21 | Face = collections.namedtuple('Face', [
22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
23 |     'embedding_norm', 'normed_embedding',
24 |     'landmark'
25 | ])
26 | 
27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
28 | 
29 | 
30 | class Face_detect_crop:
31 |     def __init__(self, name, root='~/.insightface_func/models'):
32 |         self.models = {}
33 |         root = os.path.expanduser(root)
34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
35 |         onnx_files = sorted(onnx_files)
36 |         for onnx_file in onnx_files:
37 |             if onnx_file.find('_selfgen_')>0:
38 |                 #print('ignore:', onnx_file)
39 |                 continue
40 |             model = model_zoo.get_model(onnx_file)
41 |             if model.taskname not in self.models:
42 |                 print('find model:', onnx_file, model.taskname)
43 |                 self.models[model.taskname] = model
44 |             else:
45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
46 |                 del model
47 |         assert 'detection' in self.models
48 |         self.det_model = self.models['detection']
49 | 
50 | 
51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
52 |         self.det_thresh = det_thresh
53 |         self.mode = mode
54 |         assert det_size is not None
55 |         print('set det-size:', det_size)
56 |         self.det_size = det_size
57 |         for taskname, model in self.models.items():
58 |             if taskname=='detection':
59 |                 model.prepare(ctx_id, input_size=det_size)
60 |             else:
61 |                 model.prepare(ctx_id)
62 | 
63 |     def get(self, img, crop_size, max_num=0):
64 |         bboxes, kpss = self.det_model.detect(img,
65 |                                              threshold=self.det_thresh,
66 |                                              max_num=max_num,
67 |                                              metric='default')
68 |         if bboxes.shape[0] == 0:
69 |             print('No face detected')
70 |             return None
71 |         # ret = []
72 |         # for i in range(bboxes.shape[0]):
73 |         #     bbox = bboxes[i, 0:4]
74 |         #     det_score = bboxes[i, 4]
75 |         #     kps = None
76 |         #     if kpss is not None:
77 |         #         kps = kpss[i]
78 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
79 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
80 |         # for i in range(bboxes.shape[0]):
81 |         #     kps = None
82 |         #     if kpss is not None:
83 |         #         kps = kpss[i]
84 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
85 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
86 | 
87 |         det_score = bboxes[..., 4]
88 | 
89 |         # select the face with the hightest detection score
90 |         best_index = np.argmax(det_score)
91 | 
92 |         kps = None
93 |         if kpss is not None:
94 |             kps = kpss[best_index]
95 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
96 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
97 |         
98 |         return [align_img], [M]
99 | 


--------------------------------------------------------------------------------
/inference/insightface_func/.ipynb_checkpoints/face_detect_crop_single_smooth-checkpoint.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 16:46:04
  7 | Description: 
  8 | '''
  9 | from __future__ import division
 10 | import collections
 11 | import numpy as np
 12 | import glob
 13 | import os
 14 | import os.path as osp
 15 | import cv2
 16 | from insightface.model_zoo import model_zoo
 17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
 18 | 
 19 | __all__ = ['Face_detect_crop', 'Face']
 20 | 
 21 | Face = collections.namedtuple('Face', [
 22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
 23 |     'embedding_norm', 'normed_embedding',
 24 |     'landmark'
 25 | ])
 26 | 
 27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
 28 | 
 29 | 
 30 | class Face_detect_crop:
 31 |     def __init__(self, name, root='~/.insightface_func/models'):
 32 |         self.models = {}
 33 |         root = os.path.expanduser(root)
 34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
 35 |         onnx_files = sorted(onnx_files)
 36 |         for onnx_file in onnx_files:
 37 |             if onnx_file.find('_selfgen_')>0:
 38 |                 #print('ignore:', onnx_file)
 39 |                 continue
 40 |             model = model_zoo.get_model(onnx_file)
 41 |             if model.taskname not in self.models:
 42 |                 print('find model:', onnx_file, model.taskname)
 43 |                 self.models[model.taskname] = model
 44 |             else:
 45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
 46 |                 del model
 47 |         assert 'detection' in self.models
 48 |         self.det_model = self.models['detection']
 49 | 
 50 | 
 51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
 52 |         self.det_thresh = det_thresh
 53 |         self.mode = mode
 54 |         assert det_size is not None
 55 |         print('set det-size:', det_size)
 56 |         self.det_size = det_size
 57 |         for taskname, model in self.models.items():
 58 |             if taskname=='detection':
 59 |                 model.prepare(ctx_id, input_size=det_size)
 60 |             else:
 61 |                 model.prepare(ctx_id)
 62 | 
 63 |     def get(self, img, crop_size, max_num=0):
 64 |         bboxes, kpss = self.det_model.detect(img,
 65 |                                              threshold=self.det_thresh,
 66 |                                              max_num=max_num,
 67 |                                              metric='default')
 68 |         if bboxes.shape[0] == 0:
 69 |             print('No face detected')
 70 |             return None
 71 | 
 72 |         det_score = bboxes[..., 4]
 73 | 
 74 |         # select the face with the hightest detection score
 75 |         best_index = np.argmax(det_score)
 76 | 
 77 |         kps = None
 78 |         if kpss is not None:
 79 |             kps = kpss[best_index]
 80 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
 81 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 82 |         
 83 |         return [align_img], [M]
 84 |     
 85 |     def get_kps(self, img, crop_size, max_num=0):
 86 |         bboxes, kpss = self.det_model.detect(img,
 87 |                                              threshold=self.det_thresh,
 88 |                                              max_num=max_num,
 89 |                                              metric='default')
 90 |         if bboxes.shape[0] == 0:
 91 |             print('No face detected')
 92 |             return None
 93 | 
 94 |         det_score = bboxes[..., 4]
 95 | 
 96 |         # select the face with the hightest detection score
 97 |         best_index = np.argmax(det_score)
 98 |         # print(best_index)
 99 | 
100 |         kps = None
101 |         if kpss is not None:
102 |             kps = kpss[best_index]
103 |         return kps
104 |     
105 |     def smooth_kps(self, kpss):
106 |         kps = np.mean(kpss, axis=0)
107 |         return kps
108 | 
109 |     def get_smooth_m(self, img, crop_size, kpss):
110 |         '''
111 |         kpss means kps with nearest frame
112 |         '''
113 |         kps = self.smooth_kps(kpss)
114 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
115 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
116 |         
117 |         return [align_img], [M]


--------------------------------------------------------------------------------
/inference/insightface_func/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/insightface_func/__init__.py


--------------------------------------------------------------------------------
/inference/insightface_func/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/insightface_func/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/insightface_func/__pycache__/face_detect_crop_single.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/insightface_func/__pycache__/face_detect_crop_single.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/insightface_func/__pycache__/face_detect_crop_single_smooth.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/insightface_func/__pycache__/face_detect_crop_single_smooth.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/insightface_func/face_detect_crop_multi.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 16:45:41
  7 | Description: 
  8 | '''
  9 | from __future__ import division
 10 | import collections
 11 | import numpy as np
 12 | import glob
 13 | import os
 14 | import os.path as osp
 15 | import cv2
 16 | from insightface.model_zoo import model_zoo
 17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
 18 | 
 19 | __all__ = ['Face_detect_crop', 'Face']
 20 | 
 21 | Face = collections.namedtuple('Face', [
 22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
 23 |     'embedding_norm', 'normed_embedding',
 24 |     'landmark'
 25 | ])
 26 | 
 27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
 28 | 
 29 | 
 30 | class Face_detect_crop:
 31 |     def __init__(self, name, root='~/.insightface_func/models'):
 32 |         self.models = {}
 33 |         root = os.path.expanduser(root)
 34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
 35 |         onnx_files = sorted(onnx_files)
 36 |         for onnx_file in onnx_files:
 37 |             if onnx_file.find('_selfgen_')>0:
 38 |                 #print('ignore:', onnx_file)
 39 |                 continue
 40 |             model = model_zoo.get_model(onnx_file)
 41 |             if model.taskname not in self.models:
 42 |                 print('find model:', onnx_file, model.taskname)
 43 |                 self.models[model.taskname] = model
 44 |             else:
 45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
 46 |                 del model
 47 |         assert 'detection' in self.models
 48 |         self.det_model = self.models['detection']
 49 | 
 50 | 
 51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
 52 |         self.det_thresh = det_thresh
 53 |         self.mode = mode
 54 |         assert det_size is not None
 55 |         print('set det-size:', det_size)
 56 |         self.det_size = det_size
 57 |         for taskname, model in self.models.items():
 58 |             if taskname=='detection':
 59 |                 model.prepare(ctx_id, input_size=det_size)
 60 |             else:
 61 |                 model.prepare(ctx_id)
 62 | 
 63 |     def get(self, img, crop_size, max_num=0):
 64 |         bboxes, kpss = self.det_model.detect(img,
 65 |                                              threshold=self.det_thresh,
 66 |                                              max_num=max_num,
 67 |                                              metric='default')
 68 |         if bboxes.shape[0] == 0:
 69 |             return None
 70 |         ret = []
 71 |         # for i in range(bboxes.shape[0]):
 72 |         #     bbox = bboxes[i, 0:4]
 73 |         #     det_score = bboxes[i, 4]
 74 |         #     kps = None
 75 |         #     if kpss is not None:
 76 |         #         kps = kpss[i]
 77 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
 78 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 79 |         align_img_list = []
 80 |         M_list = []
 81 |         for i in range(bboxes.shape[0]):
 82 |             kps = None
 83 |             if kpss is not None:
 84 |                 kps = kpss[i]
 85 |             M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
 86 |             align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 87 |             align_img_list.append(align_img)
 88 |             M_list.append(M)
 89 | 
 90 |         # det_score = bboxes[..., 4]
 91 | 
 92 |         # best_index = np.argmax(det_score)
 93 | 
 94 |         # kps = None
 95 |         # if kpss is not None:
 96 |         #     kps = kpss[best_index]
 97 |         # M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
 98 |         # align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 99 |         
100 |         return align_img_list, M_list
101 | 


--------------------------------------------------------------------------------
/inference/insightface_func/face_detect_crop_single.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Naiyuan liu
 3 | Github: https://github.com/NNNNAI
 4 | Date: 2021-11-23 17:03:58
 5 | LastEditors: Naiyuan liu
 6 | LastEditTime: 2021-11-24 16:46:04
 7 | Description: 
 8 | '''
 9 | from __future__ import division
10 | import collections
11 | import numpy as np
12 | import glob
13 | import os
14 | import os.path as osp
15 | import cv2
16 | from insightface.model_zoo import model_zoo
17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
18 | 
19 | __all__ = ['Face_detect_crop', 'Face']
20 | 
21 | Face = collections.namedtuple('Face', [
22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
23 |     'embedding_norm', 'normed_embedding',
24 |     'landmark'
25 | ])
26 | 
27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
28 | 
29 | 
30 | class Face_detect_crop:
31 |     def __init__(self, name, root='~/.insightface_func/models'):
32 |         self.models = {}
33 |         root = os.path.expanduser(root)
34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
35 |         onnx_files = sorted(onnx_files)
36 |         for onnx_file in onnx_files:
37 |             if onnx_file.find('_selfgen_')>0:
38 |                 #print('ignore:', onnx_file)
39 |                 continue
40 |             model = model_zoo.get_model(onnx_file)
41 |             if model.taskname not in self.models:
42 |                 print('find model:', onnx_file, model.taskname)
43 |                 self.models[model.taskname] = model
44 |             else:
45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
46 |                 del model
47 |         assert 'detection' in self.models
48 |         self.det_model = self.models['detection']
49 | 
50 | 
51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
52 |         self.det_thresh = det_thresh
53 |         self.mode = mode
54 |         assert det_size is not None
55 |         print('set det-size:', det_size)
56 |         self.det_size = det_size
57 |         for taskname, model in self.models.items():
58 |             if taskname=='detection':
59 |                 model.prepare(ctx_id, input_size=det_size)
60 |             else:
61 |                 model.prepare(ctx_id)
62 | 
63 |     def get(self, img, crop_size, max_num=0):
64 |         bboxes, kpss = self.det_model.detect(img,
65 |                                              threshold=self.det_thresh,
66 |                                              max_num=max_num,
67 |                                              metric='default')
68 |         if bboxes.shape[0] == 0:
69 |             print('No face detected')
70 |             return None
71 |         # ret = []
72 |         # for i in range(bboxes.shape[0]):
73 |         #     bbox = bboxes[i, 0:4]
74 |         #     det_score = bboxes[i, 4]
75 |         #     kps = None
76 |         #     if kpss is not None:
77 |         #         kps = kpss[i]
78 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
79 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
80 |         # for i in range(bboxes.shape[0]):
81 |         #     kps = None
82 |         #     if kpss is not None:
83 |         #         kps = kpss[i]
84 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
85 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
86 | 
87 |         det_score = bboxes[..., 4]
88 | 
89 |         # select the face with the hightest detection score
90 |         best_index = np.argmax(det_score)
91 | 
92 |         kps = None
93 |         if kpss is not None:
94 |             kps = kpss[best_index]
95 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
96 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
97 |         
98 |         return [align_img], [M]
99 | 


--------------------------------------------------------------------------------
/inference/insightface_func/face_detect_crop_single_smooth.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 16:46:04
  7 | Description: 
  8 | '''
  9 | from __future__ import division
 10 | import collections
 11 | import numpy as np
 12 | import glob
 13 | import os
 14 | import os.path as osp
 15 | import cv2
 16 | from insightface.model_zoo import model_zoo
 17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
 18 | 
 19 | __all__ = ['Face_detect_crop', 'Face']
 20 | 
 21 | Face = collections.namedtuple('Face', [
 22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
 23 |     'embedding_norm', 'normed_embedding',
 24 |     'landmark'
 25 | ])
 26 | 
 27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
 28 | 
 29 | 
 30 | class Face_detect_crop:
 31 |     def __init__(self, name, root='~/.insightface_func/models'):
 32 |         self.models = {}
 33 |         root = os.path.expanduser(root)
 34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
 35 |         onnx_files = sorted(onnx_files)
 36 |         for onnx_file in onnx_files:
 37 |             if onnx_file.find('_selfgen_')>0:
 38 |                 #print('ignore:', onnx_file)
 39 |                 continue
 40 |             model = model_zoo.get_model(onnx_file)
 41 |             if model.taskname not in self.models:
 42 |                 print('find model:', onnx_file, model.taskname)
 43 |                 self.models[model.taskname] = model
 44 |             else:
 45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
 46 |                 del model
 47 |         assert 'detection' in self.models
 48 |         self.det_model = self.models['detection']
 49 | 
 50 | 
 51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
 52 |         self.det_thresh = det_thresh
 53 |         self.mode = mode
 54 |         assert det_size is not None
 55 |         print('set det-size:', det_size)
 56 |         self.det_size = det_size
 57 |         for taskname, model in self.models.items():
 58 |             if taskname=='detection':
 59 |                 model.prepare(ctx_id, input_size=det_size)
 60 |             else:
 61 |                 model.prepare(ctx_id)
 62 | 
 63 |     def get(self, img, crop_size, max_num=0):
 64 |         bboxes, kpss = self.det_model.detect(img,
 65 |                                              threshold=self.det_thresh,
 66 |                                              max_num=max_num,
 67 |                                              metric='default')
 68 |         if bboxes.shape[0] == 0:
 69 |             print('No face detected')
 70 |             return None
 71 | 
 72 |         det_score = bboxes[..., 4]
 73 | 
 74 |         # select the face with the hightest detection score
 75 |         best_index = np.argmax(det_score)
 76 | 
 77 |         kps = None
 78 |         if kpss is not None:
 79 |             kps = kpss[best_index]
 80 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
 81 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 82 |         
 83 |         return [align_img], [M]
 84 |     
 85 |     def get_kps(self, img, crop_size, max_num=0):
 86 |         bboxes, kpss = self.det_model.detect(img,
 87 |                                              threshold=self.det_thresh,
 88 |                                              max_num=max_num,
 89 |                                              metric='default')
 90 |         if bboxes.shape[0] == 0:
 91 |             print('No face detected')
 92 |             return None
 93 | 
 94 |         det_score = bboxes[..., 4]
 95 | 
 96 |         # select the face with the hightest detection score
 97 |         best_index = np.argmax(det_score)
 98 |         # print(best_index)
 99 | 
100 |         kps = None
101 |         if kpss is not None:
102 |             kps = kpss[best_index]
103 |         return kps
104 |     
105 |     def smooth_kps(self, kpss):
106 |         kps = np.mean(kpss, axis=0)
107 |         return kps
108 | 
109 |     def get_smooth_m(self, img, crop_size, kpss):
110 |         '''
111 |         kpss means kps with nearest frame
112 |         '''
113 |         kps = self.smooth_kps(kpss)
114 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
115 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
116 |         
117 |         return [align_img], [M]


--------------------------------------------------------------------------------
/inference/insightface_func/utils/.ipynb_checkpoints/face_align_ffhqandnewarc-checkpoint.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-15 19:42:42
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-15 20:01:47
  7 | Description: 
  8 | '''
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | from skimage import transform as trans
 13 | 
 14 | src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
 15 |                  [51.157, 89.050], [57.025, 89.702]],
 16 |                 dtype=np.float32)
 17 | #<--left
 18 | src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
 19 |                  [45.177, 86.190], [64.246, 86.758]],
 20 |                 dtype=np.float32)
 21 | 
 22 | #---frontal
 23 | src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
 24 |                  [42.463, 87.010], [69.537, 87.010]],
 25 |                 dtype=np.float32)
 26 | 
 27 | #-->right
 28 | src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
 29 |                  [48.167, 86.758], [67.236, 86.190]],
 30 |                 dtype=np.float32)
 31 | 
 32 | #-->right profile
 33 | src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
 34 |                  [55.388, 89.702], [61.257, 89.050]],
 35 |                 dtype=np.float32)
 36 | 
 37 | src = np.array([src1, src2, src3, src4, src5])
 38 | src_map = src
 39 | 
 40 | ffhq_src = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
 41 |                                            [201.26117, 371.41043], [313.08905, 371.15118]])
 42 | ffhq_src = np.expand_dims(ffhq_src, axis=0)
 43 | 
 44 | large_ffhq_src = ffhq_src/1.5 + 110
 45 | 
 46 | # arcface_src = np.array(
 47 | #     [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
 48 | #      [41.5493, 92.3655], [70.7299, 92.2041]],
 49 | #     dtype=np.float32)
 50 | 
 51 | # arcface_src = np.expand_dims(arcface_src, axis=0)
 52 | 
 53 | # In[66]:
 54 | 
 55 | 
 56 | # lmk is prediction; src is template
 57 | def estimate_norm(lmk, image_size=112, mode='ffhq'):
 58 |     assert lmk.shape == (5, 2)
 59 |     tform = trans.SimilarityTransform()
 60 |     lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
 61 |     min_M = []
 62 |     min_index = []
 63 |     min_error = float('inf')
 64 |     if mode == 'ffhq':
 65 |         # assert image_size == 112
 66 |         src = ffhq_src * image_size / 512
 67 |     elif mode == 'large_ffhq':
 68 |         src = large_ffhq_src * image_size / 512
 69 |     else:
 70 |         src = src_map * image_size / 112
 71 |     for i in np.arange(src.shape[0]):
 72 |         tform.estimate(lmk, src[i])
 73 |         M = tform.params[0:2, :]
 74 |         results = np.dot(M, lmk_tran.T)
 75 |         results = results.T
 76 |         error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
 77 |         #         print(error)
 78 |         if error < min_error:
 79 |             min_error = error
 80 |             min_M = M
 81 |             min_index = i
 82 |     return min_M, min_index
 83 | 
 84 | 
 85 | def norm_crop(img, landmark, image_size=112, mode='ffhq'):
 86 |     if mode == 'Both':
 87 |         M_None, _ = estimate_norm(landmark, image_size, mode = 'newarc')
 88 |         M_ffhq, _ = estimate_norm(landmark, image_size, mode='ffhq')
 89 |         warped_None = cv2.warpAffine(img, M_None, (image_size, image_size), borderValue=0.0)
 90 |         warped_ffhq = cv2.warpAffine(img, M_ffhq, (image_size, image_size), borderValue=0.0)
 91 |         return warped_ffhq, warped_None
 92 |     else:
 93 |         M, pose_index = estimate_norm(landmark, image_size, mode)
 94 |         warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
 95 |         return warped
 96 | 
 97 | def square_crop(im, S):
 98 |     if im.shape[0] > im.shape[1]:
 99 |         height = S
100 |         width = int(float(im.shape[1]) / im.shape[0] * S)
101 |         scale = float(S) / im.shape[0]
102 |     else:
103 |         width = S
104 |         height = int(float(im.shape[0]) / im.shape[1] * S)
105 |         scale = float(S) / im.shape[1]
106 |     resized_im = cv2.resize(im, (width, height))
107 |     det_im = np.zeros((S, S, 3), dtype=np.uint8)
108 |     det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
109 |     return det_im, scale
110 | 
111 | 
112 | def transform(data, center, output_size, scale, rotation):
113 |     scale_ratio = scale
114 |     rot = float(rotation) * np.pi / 180.0
115 |     #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
116 |     t1 = trans.SimilarityTransform(scale=scale_ratio)
117 |     cx = center[0] * scale_ratio
118 |     cy = center[1] * scale_ratio
119 |     t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
120 |     t3 = trans.SimilarityTransform(rotation=rot)
121 |     t4 = trans.SimilarityTransform(translation=(output_size / 2,
122 |                                                 output_size / 2))
123 |     t = t1 + t2 + t3 + t4
124 |     M = t.params[0:2]
125 |     cropped = cv2.warpAffine(data,
126 |                              M, (output_size, output_size),
127 |                              borderValue=0.0)
128 |     return cropped, M
129 | 
130 | 
131 | def trans_points2d(pts, M):
132 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
133 |     for i in range(pts.shape[0]):
134 |         pt = pts[i]
135 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
136 |         new_pt = np.dot(M, new_pt)
137 |         #print('new_pt', new_pt.shape, new_pt)
138 |         new_pts[i] = new_pt[0:2]
139 | 
140 |     return new_pts
141 | 
142 | 
143 | def trans_points3d(pts, M):
144 |     scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
145 |     #print(scale)
146 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
147 |     for i in range(pts.shape[0]):
148 |         pt = pts[i]
149 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
150 |         new_pt = np.dot(M, new_pt)
151 |         #print('new_pt', new_pt.shape, new_pt)
152 |         new_pts[i][0:2] = new_pt[0:2]
153 |         new_pts[i][2] = pts[i][2] * scale
154 | 
155 |     return new_pts
156 | 
157 | 
158 | def trans_points(pts, M):
159 |     if pts.shape[1] == 2:
160 |         return trans_points2d(pts, M)
161 |     else:
162 |         return trans_points3d(pts, M)
163 | 
164 | 


--------------------------------------------------------------------------------
/inference/insightface_func/utils/__pycache__/face_align_ffhqandnewarc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/insightface_func/utils/__pycache__/face_align_ffhqandnewarc.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/insightface_func/utils/face_align_ffhqandnewarc.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-15 19:42:42
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-15 20:01:47
  7 | Description: 
  8 | '''
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | from skimage import transform as trans
 13 | 
 14 | src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
 15 |                  [51.157, 89.050], [57.025, 89.702]],
 16 |                 dtype=np.float32)
 17 | #<--left
 18 | src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
 19 |                  [45.177, 86.190], [64.246, 86.758]],
 20 |                 dtype=np.float32)
 21 | 
 22 | #---frontal
 23 | src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
 24 |                  [42.463, 87.010], [69.537, 87.010]],
 25 |                 dtype=np.float32)
 26 | 
 27 | #-->right
 28 | src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
 29 |                  [48.167, 86.758], [67.236, 86.190]],
 30 |                 dtype=np.float32)
 31 | 
 32 | #-->right profile
 33 | src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
 34 |                  [55.388, 89.702], [61.257, 89.050]],
 35 |                 dtype=np.float32)
 36 | 
 37 | src = np.array([src1, src2, src3, src4, src5])
 38 | src_map = src
 39 | 
 40 | ffhq_src = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
 41 |                                            [201.26117, 371.41043], [313.08905, 371.15118]])
 42 | ffhq_src = np.expand_dims(ffhq_src, axis=0)
 43 | 
 44 | large_ffhq_src = ffhq_src/1.5 + 110
 45 | 
 46 | # arcface_src = np.array(
 47 | #     [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
 48 | #      [41.5493, 92.3655], [70.7299, 92.2041]],
 49 | #     dtype=np.float32)
 50 | 
 51 | # arcface_src = np.expand_dims(arcface_src, axis=0)
 52 | 
 53 | # In[66]:
 54 | 
 55 | 
 56 | # lmk is prediction; src is template
 57 | def estimate_norm(lmk, image_size=112, mode='ffhq'):
 58 |     assert lmk.shape == (5, 2)
 59 |     tform = trans.SimilarityTransform()
 60 |     lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
 61 |     min_M = []
 62 |     min_index = []
 63 |     min_error = float('inf')
 64 |     if mode == 'ffhq':
 65 |         # assert image_size == 112
 66 |         src = ffhq_src * image_size / 512
 67 |     elif mode == 'large_ffhq':
 68 |         src = large_ffhq_src * image_size / 512
 69 |     else:
 70 |         src = src_map * image_size / 112
 71 |     for i in np.arange(src.shape[0]):
 72 |         tform.estimate(lmk, src[i])
 73 |         M = tform.params[0:2, :]
 74 |         results = np.dot(M, lmk_tran.T)
 75 |         results = results.T
 76 |         error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
 77 |         #         print(error)
 78 |         if error < min_error:
 79 |             min_error = error
 80 |             min_M = M
 81 |             min_index = i
 82 |     return min_M, min_index
 83 | 
 84 | 
 85 | def norm_crop(img, landmark, image_size=112, mode='ffhq'):
 86 |     if mode == 'Both':
 87 |         M_None, _ = estimate_norm(landmark, image_size, mode = 'newarc')
 88 |         M_ffhq, _ = estimate_norm(landmark, image_size, mode='ffhq')
 89 |         warped_None = cv2.warpAffine(img, M_None, (image_size, image_size), borderValue=0.0)
 90 |         warped_ffhq = cv2.warpAffine(img, M_ffhq, (image_size, image_size), borderValue=0.0)
 91 |         return warped_ffhq, warped_None
 92 |     else:
 93 |         M, pose_index = estimate_norm(landmark, image_size, mode)
 94 |         warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
 95 |         return warped
 96 | 
 97 | def square_crop(im, S):
 98 |     if im.shape[0] > im.shape[1]:
 99 |         height = S
100 |         width = int(float(im.shape[1]) / im.shape[0] * S)
101 |         scale = float(S) / im.shape[0]
102 |     else:
103 |         width = S
104 |         height = int(float(im.shape[0]) / im.shape[1] * S)
105 |         scale = float(S) / im.shape[1]
106 |     resized_im = cv2.resize(im, (width, height))
107 |     det_im = np.zeros((S, S, 3), dtype=np.uint8)
108 |     det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
109 |     return det_im, scale
110 | 
111 | 
112 | def transform(data, center, output_size, scale, rotation):
113 |     scale_ratio = scale
114 |     rot = float(rotation) * np.pi / 180.0
115 |     #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
116 |     t1 = trans.SimilarityTransform(scale=scale_ratio)
117 |     cx = center[0] * scale_ratio
118 |     cy = center[1] * scale_ratio
119 |     t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
120 |     t3 = trans.SimilarityTransform(rotation=rot)
121 |     t4 = trans.SimilarityTransform(translation=(output_size / 2,
122 |                                                 output_size / 2))
123 |     t = t1 + t2 + t3 + t4
124 |     M = t.params[0:2]
125 |     cropped = cv2.warpAffine(data,
126 |                              M, (output_size, output_size),
127 |                              borderValue=0.0)
128 |     return cropped, M
129 | 
130 | 
131 | def trans_points2d(pts, M):
132 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
133 |     for i in range(pts.shape[0]):
134 |         pt = pts[i]
135 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
136 |         new_pt = np.dot(M, new_pt)
137 |         #print('new_pt', new_pt.shape, new_pt)
138 |         new_pts[i] = new_pt[0:2]
139 | 
140 |     return new_pts
141 | 
142 | 
143 | def trans_points3d(pts, M):
144 |     scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
145 |     #print(scale)
146 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
147 |     for i in range(pts.shape[0]):
148 |         pt = pts[i]
149 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
150 |         new_pt = np.dot(M, new_pt)
151 |         #print('new_pt', new_pt.shape, new_pt)
152 |         new_pts[i][0:2] = new_pt[0:2]
153 |         new_pts[i][2] = pts[i][2] * scale
154 | 
155 |     return new_pts
156 | 
157 | 
158 | def trans_points(pts, M):
159 |     if pts.shape[1] == 2:
160 |         return trans_points2d(pts, M)
161 |     else:
162 |         return trans_points3d(pts, M)
163 | 
164 | 


--------------------------------------------------------------------------------
/inference/models/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/controlnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/controlnet.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/controlnet_attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/controlnet_attention.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/controlnet_unet_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/controlnet_unet_blocks.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_animatediff_videocontrolnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_animatediff_videocontrolnet.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_animatediff_videocontrolnet_threestage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_animatediff_videocontrolnet_threestage.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_animatediff_videocontrolnet_twostage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_animatediff_videocontrolnet_twostage.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_animatediff_videocontrolnet_twounet_add.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_animatediff_videocontrolnet_twounet_add.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_controlvideo.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_controlvideo.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_controlvideo_AR.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_controlvideo_AR.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/pipeline_imageunet_animatediff_twostage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/pipeline_imageunet_animatediff_twostage.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/stable_diffusion_controlnet_img2img.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/stable_diffusion_controlnet_img2img.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/unet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/unet.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/unet_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/unet_blocks.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/models/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/models/util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import imageio
  3 | import numpy as np
  4 | from typing import Union
  5 | import decord
  6 | decord.bridge.set_bridge('torch')
  7 | import torch
  8 | import torchvision
  9 | import PIL
 10 | from typing import List
 11 | from tqdm import tqdm
 12 | from einops import rearrange
 13 | 
 14 | from controlnet_aux import CannyDetector
 15 | 
 16 | from PIL import Image
 17 | 
 18 | def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=4, fps=8):
 19 |     videos = rearrange(videos, "b c t h w -> t b c h w")
 20 |     outputs = []
 21 |     for x in videos:
 22 |         x = torchvision.utils.make_grid(x, nrow=n_rows)
 23 |         x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
 24 |         if rescale:
 25 |             x = (x + 1.0) / 2.0  # -1,1 -> 0,1
 26 |         x = (x * 255).numpy().astype(np.uint8)
 27 |         outputs.append(x)
 28 | 
 29 |     os.makedirs(os.path.dirname(path), exist_ok=True)
 30 |     imageio.mimsave(path, outputs, fps=fps)
 31 |     return outputs
 32 | 
 33 | def save_image(image, path, rescale=False, n_rows=4,):
 34 |     image =rearrange(image, "b c t h w -> t b c h w")
 35 |     x = image[0]
 36 |     x = torchvision.utils.make_grid(x, nrow=n_rows)
 37 |     x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
 38 |     if rescale:
 39 |         x = (x + 1.0) / 2.0  # -1,1 -> 0,1
 40 |     x = (x * 255).numpy().astype(np.uint8)
 41 |     x = Image.fromarray(x)
 42 |     x.save(path)
 43 | 
 44 | def save_videos_grid_pil(videos: List[PIL.Image.Image], path: str, rescale=False, n_rows=4, fps=8):
 45 |     videos = rearrange(videos, "b c t h w -> t b c h w")
 46 |     outputs = []
 47 |     for x in videos:
 48 |         x = torchvision.utils.make_grid(x, nrow=n_rows)
 49 |         x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
 50 |         if rescale:
 51 |             x = (x + 1.0) / 2.0  # -1,1 -> 0,1
 52 |         x = (x * 255).numpy().astype(np.uint8)
 53 |         outputs.append(x)
 54 | 
 55 |     os.makedirs(os.path.dirname(path), exist_ok=True)
 56 |     imageio.mimsave(path, outputs, fps=fps)
 57 | 
 58 | def read_video(video_path, video_length, width=512, height=512, frame_rate=2):
 59 |     vr = decord.VideoReader(video_path, width=width, height=height)
 60 |     sample_index = list(range(0, len(vr), frame_rate))[:video_length]
 61 |     video = vr.get_batch(sample_index)
 62 |     video = rearrange(video, "f h w c -> f c h w")
 63 |     video = (video / 127.5 - 1.0)
 64 |     return video
 65 | 
 66 | 
 67 | def get_annotation(video, annotator):
 68 |     t2i_transform = torchvision.transforms.ToPILImage()
 69 |     annotation = []
 70 |     for frame in video:
 71 |         pil_frame = t2i_transform(frame)
 72 |         if isinstance(annotator, CannyDetector):
 73 |             annotation.append(annotator(pil_frame, low_threshold=100, high_threshold=200))
 74 |         else:
 75 |             annotation.append(annotator(pil_frame))
 76 |     return annotation
 77 | 
 78 | # DDIM Inversion
 79 | @torch.no_grad()
 80 | def init_prompt(prompt, pipeline):
 81 |     uncond_input = pipeline.tokenizer(
 82 |         [""], padding="max_length", max_length=pipeline.tokenizer.model_max_length,
 83 |         return_tensors="pt"
 84 |     )
 85 |     uncond_embeddings = pipeline.text_encoder(uncond_input.input_ids.to(pipeline.device))[0]
 86 |     text_input = pipeline.tokenizer(
 87 |         [prompt],
 88 |         padding="max_length",
 89 |         max_length=pipeline.tokenizer.model_max_length,
 90 |         truncation=True,
 91 |         return_tensors="pt",
 92 |     )
 93 |     text_embeddings = pipeline.text_encoder(text_input.input_ids.to(pipeline.device))[0]
 94 |     context = torch.cat([uncond_embeddings, text_embeddings])
 95 | 
 96 |     return context
 97 | 
 98 | 
 99 | def next_step(model_output: Union[torch.FloatTensor, np.ndarray], timestep: int,
100 |               sample: Union[torch.FloatTensor, np.ndarray], ddim_scheduler):
101 |     timestep, next_timestep = min(
102 |         timestep - ddim_scheduler.config.num_train_timesteps // ddim_scheduler.num_inference_steps, 999), timestep
103 |     alpha_prod_t = ddim_scheduler.alphas_cumprod[timestep] if timestep >= 0 else ddim_scheduler.final_alpha_cumprod
104 |     alpha_prod_t_next = ddim_scheduler.alphas_cumprod[next_timestep]
105 |     beta_prod_t = 1 - alpha_prod_t
106 |     next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
107 |     next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
108 |     next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
109 |     return next_sample
110 | 
111 | 
112 | def get_noise_pred_single(latents, t, context, unet):
113 |     noise_pred = unet(latents, t, encoder_hidden_states=context)["sample"]
114 |     return noise_pred
115 | 
116 | 
117 | @torch.no_grad()
118 | def ddim_loop(pipeline, ddim_scheduler, latent, num_inv_steps, prompt):
119 |     context = init_prompt(prompt, pipeline)
120 |     uncond_embeddings, cond_embeddings = context.chunk(2)
121 |     all_latent = [latent]
122 |     latent = latent.clone().detach()
123 |     for i in tqdm(range(num_inv_steps)):
124 |         t = ddim_scheduler.timesteps[len(ddim_scheduler.timesteps) - i - 1]
125 |         noise_pred = get_noise_pred_single(latent, t, cond_embeddings, pipeline.unet)
126 |         latent = next_step(noise_pred, t, latent, ddim_scheduler)
127 |         all_latent.append(latent)
128 |     return all_latent
129 | 
130 | 
131 | @torch.no_grad()
132 | def ddim_inversion(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt=""):
133 |     ddim_latents = ddim_loop(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt)
134 |     return ddim_latents
135 | 


--------------------------------------------------------------------------------
/inference/parsing_model/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/parsing_model/__pycache__/model.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/parsing_model/__pycache__/resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/parsing_model/__pycache__/resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/parsing_model/resnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.utils.model_zoo as modelzoo
  8 | 
  9 | # from modules.bn import InPlaceABNSync as BatchNorm2d
 10 | 
 11 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
 12 | 
 13 | 
 14 | def conv3x3(in_planes, out_planes, stride=1):
 15 |     """3x3 convolution with padding"""
 16 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 17 |                      padding=1, bias=False)
 18 | 
 19 | 
 20 | class BasicBlock(nn.Module):
 21 |     def __init__(self, in_chan, out_chan, stride=1):
 22 |         super(BasicBlock, self).__init__()
 23 |         self.conv1 = conv3x3(in_chan, out_chan, stride)
 24 |         self.bn1 = nn.BatchNorm2d(out_chan)
 25 |         self.conv2 = conv3x3(out_chan, out_chan)
 26 |         self.bn2 = nn.BatchNorm2d(out_chan)
 27 |         self.relu = nn.ReLU(inplace=True)
 28 |         self.downsample = None
 29 |         if in_chan != out_chan or stride != 1:
 30 |             self.downsample = nn.Sequential(
 31 |                 nn.Conv2d(in_chan, out_chan,
 32 |                           kernel_size=1, stride=stride, bias=False),
 33 |                 nn.BatchNorm2d(out_chan),
 34 |                 )
 35 | 
 36 |     def forward(self, x):
 37 |         residual = self.conv1(x)
 38 |         residual = F.relu(self.bn1(residual))
 39 |         residual = self.conv2(residual)
 40 |         residual = self.bn2(residual)
 41 | 
 42 |         shortcut = x
 43 |         if self.downsample is not None:
 44 |             shortcut = self.downsample(x)
 45 | 
 46 |         out = shortcut + residual
 47 |         out = self.relu(out)
 48 |         return out
 49 | 
 50 | 
 51 | def create_layer_basic(in_chan, out_chan, bnum, stride=1):
 52 |     layers = [BasicBlock(in_chan, out_chan, stride=stride)]
 53 |     for i in range(bnum-1):
 54 |         layers.append(BasicBlock(out_chan, out_chan, stride=1))
 55 |     return nn.Sequential(*layers)
 56 | 
 57 | 
 58 | class Resnet18(nn.Module):
 59 |     def __init__(self):
 60 |         super(Resnet18, self).__init__()
 61 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 62 |                                bias=False)
 63 |         self.bn1 = nn.BatchNorm2d(64)
 64 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 65 |         self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
 66 |         self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
 67 |         self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
 68 |         self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
 69 |         self.init_weight()
 70 | 
 71 |     def forward(self, x):
 72 |         x = self.conv1(x)
 73 |         x = F.relu(self.bn1(x))
 74 |         x = self.maxpool(x)
 75 | 
 76 |         x = self.layer1(x)
 77 |         feat8 = self.layer2(x) # 1/8
 78 |         feat16 = self.layer3(feat8) # 1/16
 79 |         feat32 = self.layer4(feat16) # 1/32
 80 |         return feat8, feat16, feat32
 81 | 
 82 |     def init_weight(self):
 83 |         state_dict = modelzoo.load_url(resnet18_url)
 84 |         self_state_dict = self.state_dict()
 85 |         for k, v in state_dict.items():
 86 |             if 'fc' in k: continue
 87 |             self_state_dict.update({k: v})
 88 |         self.load_state_dict(self_state_dict)
 89 | 
 90 |     def get_params(self):
 91 |         wd_params, nowd_params = [], []
 92 |         for name, module in self.named_modules():
 93 |             if isinstance(module, (nn.Linear, nn.Conv2d)):
 94 |                 wd_params.append(module.weight)
 95 |                 if not module.bias is None:
 96 |                     nowd_params.append(module.bias)
 97 |             elif isinstance(module,  nn.BatchNorm2d):
 98 |                 nowd_params += list(module.parameters())
 99 |         return wd_params, nowd_params
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     net = Resnet18()
104 |     x = torch.randn(16, 3, 224, 224)
105 |     out = net(x)
106 |     print(out[0].size())
107 |     print(out[1].size())
108 |     print(out[2].size())
109 |     net.get_params()
110 | 


--------------------------------------------------------------------------------
/inference/pipelines/__pycache__/pipeline_stable_diffusion_controlnet_inpaint.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/pipelines/__pycache__/pipeline_stable_diffusion_controlnet_inpaint.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/res/cp/79999_iter.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/res/cp/79999_iter.pth


--------------------------------------------------------------------------------
/inference/resnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.utils.model_zoo as modelzoo
  8 | 
  9 | # from modules.bn import InPlaceABNSync as BatchNorm2d
 10 | 
 11 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
 12 | 
 13 | 
 14 | def conv3x3(in_planes, out_planes, stride=1):
 15 |     """3x3 convolution with padding"""
 16 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 17 |                      padding=1, bias=False)
 18 | 
 19 | 
 20 | class BasicBlock(nn.Module):
 21 |     def __init__(self, in_chan, out_chan, stride=1):
 22 |         super(BasicBlock, self).__init__()
 23 |         self.conv1 = conv3x3(in_chan, out_chan, stride)
 24 |         self.bn1 = nn.BatchNorm2d(out_chan)
 25 |         self.conv2 = conv3x3(out_chan, out_chan)
 26 |         self.bn2 = nn.BatchNorm2d(out_chan)
 27 |         self.relu = nn.ReLU(inplace=True)
 28 |         self.downsample = None
 29 |         if in_chan != out_chan or stride != 1:
 30 |             self.downsample = nn.Sequential(
 31 |                 nn.Conv2d(in_chan, out_chan,
 32 |                           kernel_size=1, stride=stride, bias=False),
 33 |                 nn.BatchNorm2d(out_chan),
 34 |                 )
 35 | 
 36 |     def forward(self, x):
 37 |         residual = self.conv1(x)
 38 |         residual = F.relu(self.bn1(residual))
 39 |         residual = self.conv2(residual)
 40 |         residual = self.bn2(residual)
 41 | 
 42 |         shortcut = x
 43 |         if self.downsample is not None:
 44 |             shortcut = self.downsample(x)
 45 | 
 46 |         out = shortcut + residual
 47 |         out = self.relu(out)
 48 |         return out
 49 | 
 50 | 
 51 | def create_layer_basic(in_chan, out_chan, bnum, stride=1):
 52 |     layers = [BasicBlock(in_chan, out_chan, stride=stride)]
 53 |     for i in range(bnum-1):
 54 |         layers.append(BasicBlock(out_chan, out_chan, stride=1))
 55 |     return nn.Sequential(*layers)
 56 | 
 57 | 
 58 | class Resnet18(nn.Module):
 59 |     def __init__(self):
 60 |         super(Resnet18, self).__init__()
 61 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 62 |                                bias=False)
 63 |         self.bn1 = nn.BatchNorm2d(64)
 64 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 65 |         self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
 66 |         self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
 67 |         self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
 68 |         self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
 69 |         self.init_weight()
 70 | 
 71 |     def forward(self, x):
 72 |         x = self.conv1(x)
 73 |         x = F.relu(self.bn1(x))
 74 |         x = self.maxpool(x)
 75 | 
 76 |         x = self.layer1(x)
 77 |         feat8 = self.layer2(x) # 1/8
 78 |         feat16 = self.layer3(feat8) # 1/16
 79 |         feat32 = self.layer4(feat16) # 1/32
 80 |         return feat8, feat16, feat32
 81 | 
 82 |     def init_weight(self):
 83 |         state_dict = modelzoo.load_url(resnet18_url)
 84 |         self_state_dict = self.state_dict()
 85 |         for k, v in state_dict.items():
 86 |             if 'fc' in k: continue
 87 |             self_state_dict.update({k: v})
 88 |         self.load_state_dict(self_state_dict)
 89 | 
 90 |     def get_params(self):
 91 |         wd_params, nowd_params = [], []
 92 |         for name, module in self.named_modules():
 93 |             if isinstance(module, (nn.Linear, nn.Conv2d)):
 94 |                 wd_params.append(module.weight)
 95 |                 if not module.bias is None:
 96 |                     nowd_params.append(module.bias)
 97 |             elif isinstance(module,  nn.BatchNorm2d):
 98 |                 nowd_params += list(module.parameters())
 99 |         return wd_params, nowd_params
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     net = Resnet18()
104 |     x = torch.randn(16, 3, 224, 224)
105 |     out = net(x)
106 |     print(out[0].size())
107 |     print(out[1].size())
108 |     print(out[2].size())
109 |     net.get_params()
110 | 


--------------------------------------------------------------------------------
/inference/samples/appearance/body.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/appearance/body.png


--------------------------------------------------------------------------------
/inference/samples/appearance/head.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/appearance/head.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000001.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000002.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000003.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000004.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000005.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000006.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000007.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000008.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000009.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000010.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000011.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000012.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000012.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000013.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000014.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000014.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000015.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000016.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000016.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000017.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000017.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000018.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000018.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000019.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000020.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000021.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000022.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000022.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000023.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000023.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000024.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000024.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000025.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000026.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000027.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000027.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000028.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000028.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000029.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000029.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000030.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000030.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000031.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000031.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000032.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000032.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000033.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000033.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000034.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000034.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000035.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000035.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000036.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000036.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000037.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000037.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000038.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000038.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000039.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000039.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000040.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000040.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000041.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000041.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000042.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000042.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000043.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000043.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000044.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000044.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000045.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000045.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000046.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000046.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000047.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000047.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000048.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000048.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000049.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000049.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000050.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000050.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000051.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000051.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000052.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000052.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000053.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000053.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000054.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000054.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000055.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000055.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000056.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000056.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000057.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000057.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000058.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000058.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000059.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000059.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000060.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000060.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000061.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000061.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000062.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000062.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000063.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000063.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000064.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000064.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000065.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000065.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000066.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000066.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000067.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000067.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000068.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000068.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000069.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000069.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000070.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000070.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000071.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000071.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000072.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000072.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000073.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000073.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000074.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000074.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000075.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000075.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000076.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000076.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000077.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000077.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000078.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000078.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000079.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000079.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000080.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000080.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000081.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000081.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000082.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000082.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000083.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000083.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000084.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000084.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000085.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000085.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000086.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000087.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000087.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000088.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000088.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000089.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000089.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000090.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000090.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000091.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000091.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000092.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000092.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000093.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000093.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000094.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000094.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000095.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000095.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000096.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000096.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000097.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000097.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000098.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000098.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000099.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000099.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000100.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000101.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000102.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000102.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000103.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000103.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000104.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000104.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000105.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000105.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000106.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000107.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000107.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000108.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000109.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000109.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000110.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000110.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000111.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000111.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000112.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000112.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000113.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000113.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000114.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000115.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000115.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000116.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000116.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000117.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000117.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000118.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000118.png


--------------------------------------------------------------------------------
/inference/samples/poses/seth1/000119.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/samples/poses/seth1/000119.png


--------------------------------------------------------------------------------
/inference/util/.ipynb_checkpoints/norm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import numpy as np
 3 | import torch
 4 | class SpecificNorm(nn.Module):
 5 |     def __init__(self, epsilon=1e-8):
 6 |         """
 7 |             @notice: avoid in-place ops.
 8 |             https://discuss.pytorch.org/t/encounter-the-runtimeerror-one-of-the-variables-needed-for-gradient-computation-has-been-modified-by-an-inplace-operation/836/3
 9 |         """
10 |         super(SpecificNorm, self).__init__()
11 |         self.mean = np.array([0.485, 0.456, 0.406])
12 |         self.mean = torch.from_numpy(self.mean).float().cuda()
13 |         self.mean = self.mean.view([1, 3, 1, 1])
14 | 
15 |         self.std = np.array([0.229, 0.224, 0.225])
16 |         self.std = torch.from_numpy(self.std).float().cuda()
17 |         self.std = self.std.view([1, 3, 1, 1])
18 | 
19 |     def forward(self, x):
20 |         mean = self.mean.expand([1, 3, x.shape[2], x.shape[3]])
21 |         std = self.std.expand([1, 3, x.shape[2], x.shape[3]])
22 | 
23 |         x = (x - mean) / std
24 | 
25 |         return x


--------------------------------------------------------------------------------
/inference/util/.ipynb_checkpoints/videoswap-checkpoint.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 19:19:52
  7 | Description: 
  8 | '''
  9 | import os 
 10 | import cv2
 11 | import glob
 12 | import torch
 13 | import shutil
 14 | import numpy as np
 15 | from tqdm import tqdm
 16 | from util.reverse2original import reverse2wholeimage
 17 | import moviepy.editor as mp
 18 | from moviepy.editor import AudioFileClip, VideoFileClip 
 19 | from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
 20 | import  time
 21 | from util.add_watermark import watermark_image
 22 | from util.norm import SpecificNorm
 23 | from parsing_model.model import BiSeNet
 24 | 
 25 | def _totensor(array):
 26 |     tensor = torch.from_numpy(array)
 27 |     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
 28 |     return img.float().div(255)
 29 | 
 30 | def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
 31 |     video_forcheck = VideoFileClip(video_path)
 32 |     if video_forcheck.audio is None:
 33 |         no_audio = True
 34 |     else:
 35 |         no_audio = False
 36 | 
 37 |     del video_forcheck
 38 | 
 39 |     if not no_audio:
 40 |         video_audio_clip = AudioFileClip(video_path)
 41 | 
 42 |     video = cv2.VideoCapture(video_path)
 43 |     logoclass = watermark_image('./simswaplogo/simswaplogo.png')
 44 |     ret = True
 45 |     frame_index = 0
 46 | 
 47 |     frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 48 | 
 49 |     # video_WIDTH = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
 50 | 
 51 |     # video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 52 |     
 53 |     fps = video.get(cv2.CAP_PROP_FPS)
 54 |     if  os.path.exists(temp_results_dir):
 55 |             shutil.rmtree(temp_results_dir)
 56 | 
 57 |     spNorm =SpecificNorm()
 58 |     if use_mask:
 59 |         n_classes = 19
 60 |         net = BiSeNet(n_classes=n_classes)
 61 |         net.cuda()
 62 |         save_pth = os.path.join('./parsing_model/checkpoint', '79999_iter.pth')
 63 |         net.load_state_dict(torch.load(save_pth))
 64 |         net.eval()
 65 |     else:
 66 |         net =None
 67 | 
 68 |     # while ret:
 69 |     for frame_index in tqdm(range(frame_count)): 
 70 |         ret, frame = video.read()
 71 |         if  ret:
 72 |             detect_results = detect_model.get(frame,crop_size)
 73 | 
 74 |             if detect_results is not None:
 75 |                 # print(frame_index)
 76 |                 if not os.path.exists(temp_results_dir):
 77 |                         os.mkdir(temp_results_dir)
 78 |                 frame_align_crop_list = detect_results[0]
 79 |                 frame_mat_list = detect_results[1]
 80 |                 swap_result_list = []
 81 |                 frame_align_crop_tenor_list = []
 82 |                 for frame_align_crop in frame_align_crop_list:
 83 | 
 84 |                     # BGR TO RGB
 85 |                     # frame_align_crop_RGB = frame_align_crop[...,::-1]
 86 | 
 87 |                     frame_align_crop_tenor = _totensor(cv2.cvtColor(frame_align_crop,cv2.COLOR_BGR2RGB))[None,...].cuda()
 88 | 
 89 |                     swap_result = swap_model(None, frame_align_crop_tenor, id_vetor, None, True)[0]
 90 |                     cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
 91 |                     swap_result_list.append(swap_result)
 92 |                     frame_align_crop_tenor_list.append(frame_align_crop_tenor)
 93 | 
 94 |                     
 95 | 
 96 |                 reverse2wholeimage(frame_align_crop_tenor_list,swap_result_list, frame_mat_list, crop_size, frame, logoclass,\
 97 |                     os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask=use_mask, norm = spNorm)
 98 | 
 99 |             else:
100 |                 if not os.path.exists(temp_results_dir):
101 |                     os.mkdir(temp_results_dir)
102 |                 frame = frame.astype(np.uint8)
103 |                 if not no_simswaplogo:
104 |                     frame = logoclass.apply_frames(frame)
105 |                 cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
106 |         else:
107 |             break
108 | 
109 |     video.release()
110 | 
111 |     # image_filename_list = []
112 |     path = os.path.join(temp_results_dir,'*.jpg')
113 |     image_filenames = sorted(glob.glob(path))
114 | 
115 |     clips = ImageSequenceClip(image_filenames,fps = fps)
116 | 
117 |     if not no_audio:
118 |         clips = clips.set_audio(video_audio_clip)
119 | 
120 | 
121 |     clips.write_videofile(save_path,audio_codec='aac')
122 | 
123 | 


--------------------------------------------------------------------------------
/inference/util/__pycache__/add_watermark.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/add_watermark.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/logo_class.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/logo_class.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/norm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/norm.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/plot.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/plot.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/reverse2original.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/reverse2original.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/reverse2original_hzy.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/reverse2original_hzy.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/reverse2original_swapmask.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/reverse2original_swapmask.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/__pycache__/videoswap.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/inference/util/__pycache__/videoswap.cpython-38.pyc


--------------------------------------------------------------------------------
/inference/util/add_watermark.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import cv2
  3 | import numpy as np
  4 | from PIL import Image
  5 | import math
  6 | import numpy as np
  7 | # import torch
  8 | # from torchvision import transforms
  9 | 
 10 | def rotate_image(image, angle, center = None, scale = 1.0):
 11 |     (h, w) = image.shape[:2]
 12 | 
 13 |     if center is None:
 14 |         center = (w / 2, h / 2)
 15 | 
 16 |     # Perform the rotation
 17 |     M = cv2.getRotationMatrix2D(center, angle, scale)
 18 |     rotated = cv2.warpAffine(image, M, (w, h))
 19 | 
 20 |     return rotated
 21 | 
 22 | class watermark_image:
 23 |     def __init__(self, logo_path, size=0.3, oritation="DR", margin=(5,20,20,20), angle=15, rgb_weight=(0,1,1.5), input_frame_shape=None) -> None:
 24 | 
 25 |         logo_image = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED)
 26 |         h,w,c = logo_image.shape
 27 |         if angle%360 != 0:
 28 |             new_h = w*math.sin(angle/180*math.pi) + h*math.cos(angle/180*math.pi)
 29 |             pad_h = int((new_h-h)//2)
 30 |             
 31 |             padding = np.zeros((pad_h, w, c), dtype=np.uint8)
 32 |             logo_image = cv2.vconcat([logo_image, padding])
 33 |             logo_image = cv2.vconcat([padding, logo_image])
 34 |         
 35 |             logo_image = rotate_image(logo_image, angle)
 36 |         print(logo_image.shape)
 37 |         self.logo_image = logo_image
 38 |         
 39 |         if self.logo_image.shape[2] < 4:
 40 |             print("No alpha channel found!")
 41 |             self.logo_image  = self.__addAlpha__(self.logo_image) #add alpha channel
 42 |         self.size       = size
 43 |         self.oritation  = oritation
 44 |         self.margin     = margin
 45 |         self.ori_shape  = self.logo_image.shape
 46 |         self.resized    = False
 47 |         self.rgb_weight = rgb_weight
 48 | 
 49 |         self.logo_image[:, :, 2] = self.logo_image[:, :, 2]*self.rgb_weight[0]
 50 |         self.logo_image[:, :, 1] = self.logo_image[:, :, 1]*self.rgb_weight[1]
 51 |         self.logo_image[:, :, 0] = self.logo_image[:, :, 0]*self.rgb_weight[2]
 52 | 
 53 |         if input_frame_shape is not None:
 54 | 
 55 |             logo_w = input_frame_shape[1] * self.size
 56 |             ratio  = logo_w / self.ori_shape[1]
 57 |             logo_h = int(ratio * self.ori_shape[0])
 58 |             logo_w = int(logo_w)
 59 | 
 60 |             size   = (logo_w, logo_h)
 61 |             self.logo_image = cv2.resize(self.logo_image, size, interpolation = cv2.INTER_CUBIC)
 62 |             self.resized    = True
 63 |             if oritation == "UL":
 64 |                 self.coor_h = self.margin[1]
 65 |                 self.coor_w = self.margin[0]
 66 |             elif oritation == "UR":
 67 |                 self.coor_h = self.margin[1]
 68 |                 self.coor_w = input_frame_shape[1] - (logo_w + self.margin[2])
 69 |             elif oritation == "DL":
 70 |                 self.coor_h = input_frame_shape[0] - (logo_h + self.margin[1])
 71 |                 self.coor_w = self.margin[0]
 72 |             else:
 73 |                 self.coor_h = input_frame_shape[0] - (logo_h + self.margin[3])
 74 |                 self.coor_w = input_frame_shape[1] - (logo_w + self.margin[2])
 75 |             self.logo_w = logo_w
 76 |             self.logo_h = logo_h
 77 |             self.mask = self.logo_image[:,:,3]
 78 |             self.mask = cv2.bitwise_not(self.mask//255)
 79 |             
 80 |     def apply_frames(self, frame):
 81 | 
 82 |         if not self.resized:
 83 |             shape = frame.shape
 84 |             logo_w = shape[1] * self.size
 85 |             ratio  = logo_w / self.ori_shape[1]
 86 |             logo_h = int(ratio * self.ori_shape[0])
 87 |             logo_w = int(logo_w)
 88 | 
 89 |             size   = (logo_w, logo_h)
 90 |             self.logo_image = cv2.resize(self.logo_image, size, interpolation = cv2.INTER_CUBIC)
 91 |             self.resized    = True
 92 |             if self.oritation == "UL":
 93 |                 self.coor_h = self.margin[1]
 94 |                 self.coor_w = self.margin[0]
 95 |             elif self.oritation == "UR":
 96 |                 self.coor_h = self.margin[1]
 97 |                 self.coor_w = shape[1] - (logo_w + self.margin[2])
 98 |             elif self.oritation == "DL":
 99 |                 self.coor_h = shape[0] - (logo_h + self.margin[1])
100 |                 self.coor_w = self.margin[0]
101 |             else:
102 |                 self.coor_h = shape[0] - (logo_h + self.margin[3])
103 |                 self.coor_w = shape[1] - (logo_w + self.margin[2])
104 |             self.logo_w = logo_w
105 |             self.logo_h = logo_h
106 |             self.mask = self.logo_image[:,:,3]
107 |             self.mask = cv2.bitwise_not(self.mask//255)
108 |             
109 |         original_frame = frame[self.coor_h:(self.coor_h+self.logo_h), self.coor_w:(self.coor_w+self.logo_w),:]
110 |         blending_logo   = cv2.add(self.logo_image[:,:,0:3],original_frame,mask = self.mask)
111 |         frame[self.coor_h:(self.coor_h+self.logo_h), self.coor_w:(self.coor_w+self.logo_w),:] = blending_logo 
112 |         return frame
113 |         
114 |     def __addAlpha__(self, image):
115 |         shape = image.shape
116 |         alpha_channel = np.ones((shape[0],shape[1],1),np.uint8)*255
117 |         return np.concatenate((image,alpha_channel),2)
118 | 
119 | 


--------------------------------------------------------------------------------
/inference/util/html.py:
--------------------------------------------------------------------------------
 1 | import dominate
 2 | from dominate.tags import *
 3 | import os
 4 | 
 5 | 
 6 | class HTML:
 7 |     def __init__(self, web_dir, title, refresh=0):
 8 |         self.title = title
 9 |         self.web_dir = web_dir
10 |         self.img_dir = os.path.join(self.web_dir, 'images')
11 |         if not os.path.exists(self.web_dir):
12 |             os.makedirs(self.web_dir)
13 |         if not os.path.exists(self.img_dir):
14 |             os.makedirs(self.img_dir)
15 | 
16 |         self.doc = dominate.document(title=title)
17 |         if refresh > 0:
18 |             with self.doc.head:
19 |                 meta(http_equiv="refresh", content=str(refresh))
20 | 
21 |     def get_image_dir(self):
22 |         return self.img_dir
23 | 
24 |     def add_header(self, str):
25 |         with self.doc:
26 |             h3(str)
27 | 
28 |     def add_table(self, border=1):
29 |         self.t = table(border=border, style="table-layout: fixed;")
30 |         self.doc.add(self.t)
31 | 
32 |     def add_images(self, ims, txts, links, width=512):
33 |         self.add_table()
34 |         with self.t:
35 |             with tr():
36 |                 for im, txt, link in zip(ims, txts, links):
37 |                     with td(style="word-wrap: break-word;", halign="center", valign="top"):
38 |                         with p():
39 |                             with a(href=os.path.join('images', link)):
40 |                                 img(style="width:%dpx" % (width), src=os.path.join('images', im))
41 |                             br()
42 |                             p(txt)
43 | 
44 |     def save(self):
45 |         html_file = '%s/index.html' % self.web_dir
46 |         f = open(html_file, 'wt')
47 |         f.write(self.doc.render())
48 |         f.close()
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     html = HTML('web/', 'test_html')
53 |     html.add_header('hello world')
54 | 
55 |     ims = []
56 |     txts = []
57 |     links = []
58 |     for n in range(4):
59 |         ims.append('image_%d.jpg' % n)
60 |         txts.append('text_%d' % n)
61 |         links.append('image_%d.jpg' % n)
62 |     html.add_images(ims, txts, links)
63 |     html.save()
64 | 


--------------------------------------------------------------------------------
/inference/util/image_pool.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import torch
 3 | from torch.autograd import Variable
 4 | class ImagePool():
 5 |     def __init__(self, pool_size):
 6 |         self.pool_size = pool_size
 7 |         if self.pool_size > 0:
 8 |             self.num_imgs = 0
 9 |             self.images = []
10 | 
11 |     def query(self, images):
12 |         if self.pool_size == 0:
13 |             return images
14 |         return_images = []
15 |         for image in images.data:
16 |             image = torch.unsqueeze(image, 0)
17 |             if self.num_imgs < self.pool_size:
18 |                 self.num_imgs = self.num_imgs + 1
19 |                 self.images.append(image)
20 |                 return_images.append(image)
21 |             else:
22 |                 p = random.uniform(0, 1)
23 |                 if p > 0.5:
24 |                     random_id = random.randint(0, self.pool_size-1)
25 |                     tmp = self.images[random_id].clone()
26 |                     self.images[random_id] = image
27 |                     return_images.append(tmp)
28 |                 else:
29 |                     return_images.append(image)
30 |         return_images = Variable(torch.cat(return_images, 0))
31 |         return return_images
32 | 


--------------------------------------------------------------------------------
/inference/util/json_config.py:
--------------------------------------------------------------------------------
 1 | import  json
 2 | 
 3 | 
 4 | def readConfig(path):
 5 |     with open(path,'r') as cf:
 6 |         nodelocaltionstr = cf.read()
 7 |         nodelocaltioninf = json.loads(nodelocaltionstr)
 8 |         if isinstance(nodelocaltioninf,str):
 9 |             nodelocaltioninf = json.loads(nodelocaltioninf)
10 |     return nodelocaltioninf
11 | 
12 | def writeConfig(path, info):
13 |     with open(path, 'w') as cf:
14 |         configjson  = json.dumps(info, indent=4)
15 |         cf.writelines(configjson)


--------------------------------------------------------------------------------
/inference/util/logo_class.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: logo_class.py
 5 | # Created Date: Tuesday June 29th 2021
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Monday, 11th October 2021 12:39:55 am
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2021 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | class logo_class:
14 |     
15 |     @staticmethod
16 |     def print_group_logo():
17 |         logo_str = """
18 | 
19 | ███╗   ██╗██████╗ ███████╗██╗ ██████╗     ███████╗     ██╗████████╗██╗   ██╗
20 | ████╗  ██║██╔══██╗██╔════╝██║██╔════╝     ██╔════╝     ██║╚══██╔══╝██║   ██║
21 | ██╔██╗ ██║██████╔╝███████╗██║██║  ███╗    ███████╗     ██║   ██║   ██║   ██║
22 | ██║╚██╗██║██╔══██╗╚════██║██║██║   ██║    ╚════██║██   ██║   ██║   ██║   ██║
23 | ██║ ╚████║██║  ██║███████║██║╚██████╔╝    ███████║╚█████╔╝   ██║   ╚██████╔╝
24 | ╚═╝  ╚═══╝╚═╝  ╚═╝╚══════╝╚═╝ ╚═════╝     ╚══════╝ ╚════╝    ╚═╝    ╚═════╝ 
25 | Neural Rendering Special Interesting Group of SJTU
26 |                                                                             
27 |         """
28 |         print(logo_str)
29 | 
30 |     @staticmethod
31 |     def print_start_training():
32 |         logo_str = """
33 |    _____  __                __     ______              _         _              
34 |   / ___/ / /_ ____ _ _____ / /_   /_  __/_____ ____ _ (_)____   (_)____   ____ _
35 |   \__ \ / __// __ `// ___// __/    / /  / ___// __ `// // __ \ / // __ \ / __ `/
36 |  ___/ // /_ / /_/ // /   / /_     / /  / /   / /_/ // // / / // // / / // /_/ / 
37 | /____/ \__/ \__,_//_/    \__/    /_/  /_/    \__,_//_//_/ /_//_//_/ /_/ \__, /  
38 |                                                                        /____/   
39 |         """
40 |         print(logo_str)
41 | 
42 | if __name__=="__main__":
43 |     # logo_class.print_group_logo()
44 |     logo_class.print_start_training()


--------------------------------------------------------------------------------
/inference/util/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import numpy as np
 3 | import torch
 4 | class SpecificNorm(nn.Module):
 5 |     def __init__(self, epsilon=1e-8):
 6 |         """
 7 |             @notice: avoid in-place ops.
 8 |             https://discuss.pytorch.org/t/encounter-the-runtimeerror-one-of-the-variables-needed-for-gradient-computation-has-been-modified-by-an-inplace-operation/836/3
 9 |         """
10 |         super(SpecificNorm, self).__init__()
11 |         self.mean = np.array([0.485, 0.456, 0.406])
12 |         self.mean = torch.from_numpy(self.mean).float().cuda()
13 |         self.mean = self.mean.view([1, 3, 1, 1])
14 | 
15 |         self.std = np.array([0.229, 0.224, 0.225])
16 |         self.std = torch.from_numpy(self.std).float().cuda()
17 |         self.std = self.std.view([1, 3, 1, 1])
18 | 
19 |     def forward(self, x):
20 |         mean = self.mean.expand([1, 3, x.shape[2], x.shape[3]])
21 |         std = self.std.expand([1, 3, x.shape[2], x.shape[3]])
22 | 
23 |         x = (x - mean) / std
24 | 
25 |         return x


--------------------------------------------------------------------------------
/inference/util/plot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | import PIL
 4 | 
 5 | def postprocess(x):
 6 |     """[0,1] to uint8."""
 7 |     
 8 |     x = np.clip(255 * x, 0, 255)
 9 |     x = np.cast[np.uint8](x)
10 |     return x
11 | 
12 | def tile(X, rows, cols):
13 |     """Tile images for display."""
14 |     tiling = np.zeros((rows * X.shape[1], cols * X.shape[2], X.shape[3]), dtype = X.dtype)
15 |     for i in range(rows):
16 |         for j in range(cols):
17 |             idx = i * cols + j
18 |             if idx < X.shape[0]:
19 |                 img = X[idx,...]
20 |                 tiling[
21 |                         i*X.shape[1]:(i+1)*X.shape[1],
22 |                         j*X.shape[2]:(j+1)*X.shape[2],
23 |                         :] = img
24 |     return tiling
25 | 
26 | 
27 | def plot_batch(X, out_path):
28 |     """Save batch of images tiled."""
29 |     n_channels = X.shape[3]
30 |     if n_channels > 3:
31 |         X = X[:,:,:,np.random.choice(n_channels, size = 3)]
32 |     X = postprocess(X)
33 |     rc = math.sqrt(X.shape[0])
34 |     rows = cols = math.ceil(rc)
35 |     canvas = tile(X, rows, cols)
36 |     canvas = np.squeeze(canvas)
37 |     PIL.Image.fromarray(canvas).save(out_path)


--------------------------------------------------------------------------------
/inference/util/save_heatmap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: save_heatmap.py
 5 | # Created Date: Friday January 15th 2021
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Wednesday, 19th January 2022 1:22:47 am
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2021 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | import os
14 | import shutil
15 | import seaborn as sns
16 | import matplotlib.pyplot as plt
17 | import cv2
18 | import numpy as np
19 | 
20 | def SaveHeatmap(heatmaps, path, row=-1, dpi=72):
21 |     """
22 |     The input tensor must be B X 1 X H X W
23 |     """
24 |     batch_size = heatmaps.shape[0]
25 |     temp_path  = ".temp/"
26 |     if not os.path.exists(temp_path):
27 |         os.makedirs(temp_path)
28 |     final_img = None
29 |     if row < 1:
30 |         col = batch_size
31 |         row = 1
32 |     else:
33 |         col = batch_size // row
34 |         if row * col <batch_size:
35 |             col +=1
36 |     
37 |     row_i = 0
38 |     col_i = 0
39 |     
40 |     for i in range(batch_size):
41 |         img_path = os.path.join(temp_path,'temp_batch_{}.png'.format(i))
42 |         sns.heatmap(heatmaps[i,0,:,:],vmin=0,vmax=heatmaps[i,0,:,:].max(),cbar=False)
43 |         plt.savefig(img_path, dpi=dpi, bbox_inches = 'tight', pad_inches = 0)
44 |         img = cv2.imread(img_path)
45 |         if i == 0:
46 |             H,W,C = img.shape
47 |             final_img = np.zeros((H*row,W*col,C))
48 |         final_img[H*row_i:H*(row_i+1),W*col_i:W*(col_i+1),:] = img
49 |         col_i += 1
50 |         if col_i >= col:
51 |             col_i = 0
52 |             row_i += 1
53 |     cv2.imwrite(path,final_img)
54 | 
55 | if __name__ == "__main__":
56 |     random_map = np.random.randn(16,1,10,10)
57 |     SaveHeatmap(random_map,"./wocao.png",1)
58 | 


--------------------------------------------------------------------------------
/inference/util/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import numpy as np
  4 | from PIL import Image
  5 | import numpy as np
  6 | import os
  7 | 
  8 | # Converts a Tensor into a Numpy array
  9 | # |imtype|: the desired type of the converted numpy array
 10 | def tensor2im(image_tensor, imtype=np.uint8, normalize=True):
 11 |     if isinstance(image_tensor, list):
 12 |         image_numpy = []
 13 |         for i in range(len(image_tensor)):
 14 |             image_numpy.append(tensor2im(image_tensor[i], imtype, normalize))
 15 |         return image_numpy
 16 |     image_numpy = image_tensor.cpu().float().numpy()
 17 |     if normalize:
 18 |         image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
 19 |     else:
 20 |         image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0      
 21 |     image_numpy = np.clip(image_numpy, 0, 255)
 22 |     if image_numpy.shape[2] == 1 or image_numpy.shape[2] > 3:        
 23 |         image_numpy = image_numpy[:,:,0]
 24 |     return image_numpy.astype(imtype)
 25 | 
 26 | # Converts a one-hot tensor into a colorful label map
 27 | def tensor2label(label_tensor, n_label, imtype=np.uint8):
 28 |     if n_label == 0:
 29 |         return tensor2im(label_tensor, imtype)
 30 |     label_tensor = label_tensor.cpu().float()    
 31 |     if label_tensor.size()[0] > 1:
 32 |         label_tensor = label_tensor.max(0, keepdim=True)[1]
 33 |     label_tensor = Colorize(n_label)(label_tensor)
 34 |     label_numpy = np.transpose(label_tensor.numpy(), (1, 2, 0))
 35 |     return label_numpy.astype(imtype)
 36 | 
 37 | def save_image(image_numpy, image_path):
 38 |     image_pil = Image.fromarray(image_numpy)
 39 |     image_pil.save(image_path)
 40 | 
 41 | def mkdirs(paths):
 42 |     if isinstance(paths, list) and not isinstance(paths, str):
 43 |         for path in paths:
 44 |             mkdir(path)
 45 |     else:
 46 |         mkdir(paths)
 47 | 
 48 | def mkdir(path):
 49 |     if not os.path.exists(path):
 50 |         os.makedirs(path)
 51 | 
 52 | ###############################################################################
 53 | # Code from
 54 | # https://github.com/ycszen/pytorch-seg/blob/master/transform.py
 55 | # Modified so it complies with the Citscape label map colors
 56 | ###############################################################################
 57 | def uint82bin(n, count=8):
 58 |     """returns the binary of integer n, count refers to amount of bits"""
 59 |     return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
 60 | 
 61 | def labelcolormap(N):
 62 |     if N == 35: # cityscape
 63 |         cmap = np.array([(  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (111, 74,  0), ( 81,  0, 81),
 64 |                      (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153),
 65 |                      (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220,  0),
 66 |                      (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255,  0,  0), (  0,  0,142), (  0,  0, 70),
 67 |                      (  0, 60,100), (  0,  0, 90), (  0,  0,110), (  0, 80,100), (  0,  0,230), (119, 11, 32), (  0,  0,142)], 
 68 |                      dtype=np.uint8)
 69 |     else:
 70 |         cmap = np.zeros((N, 3), dtype=np.uint8)
 71 |         for i in range(N):
 72 |             r, g, b = 0, 0, 0
 73 |             id = i
 74 |             for j in range(7):
 75 |                 str_id = uint82bin(id)
 76 |                 r = r ^ (np.uint8(str_id[-1]) << (7-j))
 77 |                 g = g ^ (np.uint8(str_id[-2]) << (7-j))
 78 |                 b = b ^ (np.uint8(str_id[-3]) << (7-j))
 79 |                 id = id >> 3
 80 |             cmap[i, 0] = r
 81 |             cmap[i, 1] = g
 82 |             cmap[i, 2] = b
 83 |     return cmap
 84 | 
 85 | class Colorize(object):
 86 |     def __init__(self, n=35):
 87 |         self.cmap = labelcolormap(n)
 88 |         self.cmap = torch.from_numpy(self.cmap[:n])
 89 | 
 90 |     def __call__(self, gray_image):
 91 |         size = gray_image.size()
 92 |         color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0)
 93 | 
 94 |         for label in range(0, len(self.cmap)):
 95 |             mask = (label == gray_image[0]).cpu()
 96 |             color_image[0][mask] = self.cmap[label][0]
 97 |             color_image[1][mask] = self.cmap[label][1]
 98 |             color_image[2][mask] = self.cmap[label][2]
 99 | 
100 |         return color_image
101 | 


--------------------------------------------------------------------------------
/inference/util/videoswap.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 19:19:52
  7 | Description: 
  8 | '''
  9 | import os 
 10 | import cv2
 11 | import glob
 12 | import torch
 13 | import shutil
 14 | import numpy as np
 15 | from tqdm import tqdm
 16 | from util.reverse2original import reverse2wholeimage
 17 | import moviepy.editor as mp
 18 | from moviepy.editor import AudioFileClip, VideoFileClip 
 19 | from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
 20 | import  time
 21 | from util.add_watermark import watermark_image
 22 | from util.norm import SpecificNorm
 23 | from parsing_model.model import BiSeNet
 24 | 
 25 | def _totensor(array):
 26 |     tensor = torch.from_numpy(array)
 27 |     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
 28 |     return img.float().div(255)
 29 | 
 30 | def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
 31 |     video_forcheck = VideoFileClip(video_path)
 32 |     if video_forcheck.audio is None:
 33 |         no_audio = True
 34 |     else:
 35 |         no_audio = False
 36 | 
 37 |     del video_forcheck
 38 | 
 39 |     if not no_audio:
 40 |         video_audio_clip = AudioFileClip(video_path)
 41 | 
 42 |     video = cv2.VideoCapture(video_path)
 43 |     logoclass = watermark_image('./simswaplogo/simswaplogo.png')
 44 |     ret = True
 45 |     frame_index = 0
 46 | 
 47 |     frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 48 | 
 49 |     # video_WIDTH = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
 50 | 
 51 |     # video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 52 |     
 53 |     fps = video.get(cv2.CAP_PROP_FPS)
 54 |     if  os.path.exists(temp_results_dir):
 55 |             shutil.rmtree(temp_results_dir)
 56 | 
 57 |     spNorm =SpecificNorm()
 58 |     if use_mask:
 59 |         n_classes = 19
 60 |         net = BiSeNet(n_classes=n_classes)
 61 |         net.cuda()
 62 |         save_pth = os.path.join('./parsing_model/checkpoint', '79999_iter.pth')
 63 |         net.load_state_dict(torch.load(save_pth))
 64 |         net.eval()
 65 |     else:
 66 |         net =None
 67 | 
 68 |     # while ret:
 69 |     for frame_index in tqdm(range(frame_count)): 
 70 |         ret, frame = video.read()
 71 |         if  ret:
 72 |             detect_results = detect_model.get(frame,crop_size)
 73 | 
 74 |             if detect_results is not None:
 75 |                 # print(frame_index)
 76 |                 if not os.path.exists(temp_results_dir):
 77 |                         os.mkdir(temp_results_dir)
 78 |                 frame_align_crop_list = detect_results[0]
 79 |                 frame_mat_list = detect_results[1]
 80 |                 swap_result_list = []
 81 |                 frame_align_crop_tenor_list = []
 82 |                 for frame_align_crop in frame_align_crop_list:
 83 | 
 84 |                     # BGR TO RGB
 85 |                     # frame_align_crop_RGB = frame_align_crop[...,::-1]
 86 | 
 87 |                     frame_align_crop_tenor = _totensor(cv2.cvtColor(frame_align_crop,cv2.COLOR_BGR2RGB))[None,...].cuda()
 88 | 
 89 |                     swap_result = swap_model(None, frame_align_crop_tenor, id_vetor, None, True)[0]
 90 |                     cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
 91 |                     swap_result_list.append(swap_result)
 92 |                     frame_align_crop_tenor_list.append(frame_align_crop_tenor)
 93 | 
 94 |                     
 95 | 
 96 |                 reverse2wholeimage(frame_align_crop_tenor_list,swap_result_list, frame_mat_list, crop_size, frame, logoclass,\
 97 |                     os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask=use_mask, norm = spNorm)
 98 | 
 99 |             else:
100 |                 if not os.path.exists(temp_results_dir):
101 |                     os.mkdir(temp_results_dir)
102 |                 frame = frame.astype(np.uint8)
103 |                 if not no_simswaplogo:
104 |                     frame = logoclass.apply_frames(frame)
105 |                 cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
106 |         else:
107 |             break
108 | 
109 |     video.release()
110 | 
111 |     # image_filename_list = []
112 |     path = os.path.join(temp_results_dir,'*.jpg')
113 |     image_filenames = sorted(glob.glob(path))
114 | 
115 |     clips = ImageSequenceClip(image_filenames,fps = fps)
116 | 
117 |     if not no_audio:
118 |         clips = clips.set_audio(video_audio_clip)
119 | 
120 | 
121 |     clips.write_videofile(save_path,audio_codec='aac')
122 | 
123 | 


--------------------------------------------------------------------------------
/inference/util/videoswap_specific.py:
--------------------------------------------------------------------------------
  1 | import os 
  2 | import cv2
  3 | import glob
  4 | import torch
  5 | import shutil
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | from util.reverse2original import reverse2wholeimage
  9 | import moviepy.editor as mp
 10 | from moviepy.editor import AudioFileClip, VideoFileClip 
 11 | from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
 12 | import  time
 13 | from util.add_watermark import watermark_image
 14 | from util.norm import SpecificNorm
 15 | import torch.nn.functional as F
 16 | from parsing_model.model import BiSeNet
 17 | 
 18 | def _totensor(array):
 19 |     tensor = torch.from_numpy(array)
 20 |     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
 21 |     return img.float().div(255)
 22 | 
 23 | def video_swap(video_path, id_vetor,specific_person_id_nonorm,id_thres, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
 24 |     video_forcheck = VideoFileClip(video_path)
 25 |     if video_forcheck.audio is None:
 26 |         no_audio = True
 27 |     else:
 28 |         no_audio = False
 29 | 
 30 |     del video_forcheck
 31 | 
 32 |     if not no_audio:
 33 |         video_audio_clip = AudioFileClip(video_path)
 34 | 
 35 |     video = cv2.VideoCapture(video_path)
 36 |     logoclass = watermark_image('./simswaplogo/simswaplogo.png')
 37 |     ret = True
 38 |     frame_index = 0
 39 | 
 40 |     frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 41 | 
 42 |     # video_WIDTH = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
 43 | 
 44 |     # video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 45 |     
 46 |     fps = video.get(cv2.CAP_PROP_FPS)
 47 |     if  os.path.exists(temp_results_dir):
 48 |             shutil.rmtree(temp_results_dir)
 49 | 
 50 |     spNorm =SpecificNorm()
 51 |     mse = torch.nn.MSELoss().cuda()
 52 | 
 53 |     if use_mask:
 54 |         n_classes = 19
 55 |         net = BiSeNet(n_classes=n_classes)
 56 |         net.cuda()
 57 |         save_pth = os.path.join('./parsing_model/checkpoint', '79999_iter.pth')
 58 |         net.load_state_dict(torch.load(save_pth))
 59 |         net.eval()
 60 |     else:
 61 |         net =None
 62 | 
 63 |     # while ret:
 64 |     for frame_index in tqdm(range(frame_count)): 
 65 |         ret, frame = video.read()
 66 |         if  ret:
 67 |             detect_results = detect_model.get(frame,crop_size)
 68 | 
 69 |             if detect_results is not None:
 70 |                 # print(frame_index)
 71 |                 if not os.path.exists(temp_results_dir):
 72 |                         os.mkdir(temp_results_dir)
 73 |                 frame_align_crop_list = detect_results[0]
 74 |                 frame_mat_list = detect_results[1]
 75 | 
 76 |                 id_compare_values = [] 
 77 |                 frame_align_crop_tenor_list = []
 78 |                 for frame_align_crop in frame_align_crop_list:
 79 | 
 80 |                     # BGR TO RGB
 81 |                     # frame_align_crop_RGB = frame_align_crop[...,::-1]
 82 | 
 83 |                     frame_align_crop_tenor = _totensor(cv2.cvtColor(frame_align_crop,cv2.COLOR_BGR2RGB))[None,...].cuda()
 84 | 
 85 |                     frame_align_crop_tenor_arcnorm = spNorm(frame_align_crop_tenor)
 86 |                     frame_align_crop_tenor_arcnorm_downsample = F.interpolate(frame_align_crop_tenor_arcnorm, size=(112,112))
 87 |                     frame_align_crop_crop_id_nonorm = swap_model.netArc(frame_align_crop_tenor_arcnorm_downsample)
 88 | 
 89 |                     id_compare_values.append(mse(frame_align_crop_crop_id_nonorm,specific_person_id_nonorm).detach().cpu().numpy())
 90 |                     frame_align_crop_tenor_list.append(frame_align_crop_tenor)
 91 |                 id_compare_values_array = np.array(id_compare_values)
 92 |                 min_index = np.argmin(id_compare_values_array)
 93 |                 min_value = id_compare_values_array[min_index]
 94 |                 if min_value < id_thres:
 95 |                     swap_result = swap_model(None, frame_align_crop_tenor_list[min_index], id_vetor, None, True)[0]
 96 |                 
 97 |                     reverse2wholeimage([frame_align_crop_tenor_list[min_index]], [swap_result], [frame_mat_list[min_index]], crop_size, frame, logoclass,\
 98 |                         os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask= use_mask, norm = spNorm)
 99 |                 else:
100 |                     if not os.path.exists(temp_results_dir):
101 |                         os.mkdir(temp_results_dir)
102 |                     frame = frame.astype(np.uint8)
103 |                     if not no_simswaplogo:
104 |                         frame = logoclass.apply_frames(frame)
105 |                     cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
106 | 
107 |             else:
108 |                 if not os.path.exists(temp_results_dir):
109 |                     os.mkdir(temp_results_dir)
110 |                 frame = frame.astype(np.uint8)
111 |                 if not no_simswaplogo:
112 |                     frame = logoclass.apply_frames(frame)
113 |                 cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
114 |         else:
115 |             break
116 | 
117 |     video.release()
118 | 
119 |     # image_filename_list = []
120 |     path = os.path.join(temp_results_dir,'*.jpg')
121 |     image_filenames = sorted(glob.glob(path))
122 | 
123 |     clips = ImageSequenceClip(image_filenames,fps = fps)
124 | 
125 |     if not no_audio:
126 |         clips = clips.set_audio(video_audio_clip)
127 | 
128 | 
129 |     clips.write_videofile(save_path,audio_codec='aac')
130 | 
131 | 


--------------------------------------------------------------------------------
/inference/util/visualizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import ntpath
  4 | import time
  5 | from . import util
  6 | from . import html
  7 | import scipy.misc
  8 | try:
  9 |     from StringIO import StringIO  # Python 2.7
 10 | except ImportError:
 11 |     from io import BytesIO         # Python 3.x
 12 | 
 13 | class Visualizer():
 14 |     def __init__(self, opt):
 15 |         # self.opt = opt
 16 |         self.tf_log = opt.tf_log
 17 |         self.use_html = opt.isTrain and not opt.no_html
 18 |         self.win_size = opt.display_winsize
 19 |         self.name = opt.name
 20 |         if self.tf_log:
 21 |             import tensorflow as tf
 22 |             self.tf = tf
 23 |             self.log_dir = os.path.join(opt.checkpoints_dir, opt.name, 'logs')
 24 |             self.writer = tf.summary.FileWriter(self.log_dir)
 25 | 
 26 |         if self.use_html:
 27 |             self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
 28 |             self.img_dir = os.path.join(self.web_dir, 'images')
 29 |             print('create web directory %s...' % self.web_dir)
 30 |             util.mkdirs([self.web_dir, self.img_dir])
 31 |         self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
 32 |         with open(self.log_name, "a") as log_file:
 33 |             now = time.strftime("%c")
 34 |             log_file.write('================ Training Loss (%s) ================\n' % now)
 35 | 
 36 |     # |visuals|: dictionary of images to display or save
 37 |     def display_current_results(self, visuals, epoch, step):
 38 |         if self.tf_log: # show images in tensorboard output
 39 |             img_summaries = []
 40 |             for label, image_numpy in visuals.items():
 41 |                 # Write the image to a string
 42 |                 try:
 43 |                     s = StringIO()
 44 |                 except:
 45 |                     s = BytesIO()
 46 |                 scipy.misc.toimage(image_numpy).save(s, format="jpeg")
 47 |                 # Create an Image object
 48 |                 img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1])
 49 |                 # Create a Summary value
 50 |                 img_summaries.append(self.tf.Summary.Value(tag=label, image=img_sum))
 51 | 
 52 |             # Create and write Summary
 53 |             summary = self.tf.Summary(value=img_summaries)
 54 |             self.writer.add_summary(summary, step)
 55 | 
 56 |         if self.use_html: # save images to a html file
 57 |             for label, image_numpy in visuals.items():
 58 |                 if isinstance(image_numpy, list):
 59 |                     for i in range(len(image_numpy)):
 60 |                         img_path = os.path.join(self.img_dir, 'epoch%.3d_%s_%d.jpg' % (epoch, label, i))
 61 |                         util.save_image(image_numpy[i], img_path)
 62 |                 else:
 63 |                     img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.jpg' % (epoch, label))
 64 |                     util.save_image(image_numpy, img_path)
 65 | 
 66 |             # update website
 67 |             webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=30)
 68 |             for n in range(epoch, 0, -1):
 69 |                 webpage.add_header('epoch [%d]' % n)
 70 |                 ims = []
 71 |                 txts = []
 72 |                 links = []
 73 | 
 74 |                 for label, image_numpy in visuals.items():
 75 |                     if isinstance(image_numpy, list):
 76 |                         for i in range(len(image_numpy)):
 77 |                             img_path = 'epoch%.3d_%s_%d.jpg' % (n, label, i)
 78 |                             ims.append(img_path)
 79 |                             txts.append(label+str(i))
 80 |                             links.append(img_path)
 81 |                     else:
 82 |                         img_path = 'epoch%.3d_%s.jpg' % (n, label)
 83 |                         ims.append(img_path)
 84 |                         txts.append(label)
 85 |                         links.append(img_path)
 86 |                 if len(ims) < 10:
 87 |                     webpage.add_images(ims, txts, links, width=self.win_size)
 88 |                 else:
 89 |                     num = int(round(len(ims)/2.0))
 90 |                     webpage.add_images(ims[:num], txts[:num], links[:num], width=self.win_size)
 91 |                     webpage.add_images(ims[num:], txts[num:], links[num:], width=self.win_size)
 92 |             webpage.save()
 93 | 
 94 |     # errors: dictionary of error labels and values
 95 |     def plot_current_errors(self, errors, step):
 96 |         if self.tf_log:
 97 |             for tag, value in errors.items():
 98 |                 summary = self.tf.Summary(value=[self.tf.Summary.Value(tag=tag, simple_value=value)])
 99 |                 self.writer.add_summary(summary, step)
100 | 
101 |     # errors: same format as |errors| of plotCurrentErrors
102 |     def print_current_errors(self, epoch, i, errors, t):
103 |         message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t)
104 |         for k, v in errors.items():
105 |             if v != 0:
106 |                 message += '%s: %.3f ' % (k, v)
107 | 
108 |         print(message)
109 |         with open(self.log_name, "a") as log_file:
110 |             log_file.write('%s\n' % message)
111 | 
112 |     # save image to the disk
113 |     def save_images(self, webpage, visuals, image_path):
114 |         image_dir = webpage.get_image_dir()
115 |         short_path = ntpath.basename(image_path[0])
116 |         name = os.path.splitext(short_path)[0]
117 | 
118 |         webpage.add_header(name)
119 |         ims = []
120 |         txts = []
121 |         links = []
122 | 
123 |         for label, image_numpy in visuals.items():
124 |             image_name = '%s_%s.jpg' % (name, label)
125 |             save_path = os.path.join(image_dir, image_name)
126 |             util.save_image(image_numpy, save_path)
127 | 
128 |             ims.append(image_name)
129 |             txts.append(label)
130 |             links.append(image_name)
131 |         webpage.add_images(ims, txts, links, width=self.win_size)
132 | 


--------------------------------------------------------------------------------
/preprocess/1_split.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | 
 5 | import argparse
 6 | 
 7 | def get_video_duration(video_path):
 8 |     ext = os.path.splitext(video_path)[-1]
 9 |     if ext != '.mp4' and ext != '.avi' and ext != '.flv':
10 |         print('not a video')
11 |         return False
12 |     ffprobe_cmd = 'ffprobe -i {} -show_entries format=duration -v quiet -of csv="p=0"'
13 |     p = subprocess.Popen(
14 |         ffprobe_cmd.format(video_path),
15 |         stdout=subprocess.PIPE,
16 |         stderr=subprocess.PIPE,
17 |         shell=True
18 |     )
19 |     out, err = p.communicate()
20 |     # something wrong
21 |     if len(str(err, 'gbk')) > 0:
22 |         print('our:{} err:{}'.format(out, str(err, 'gbk')))
23 |         return False
24 |     # video length==0
25 |     if len(str(out, 'gbk')) == 0:
26 |         print('video length is 0')
27 |         return False
28 |     second = int(float(out))
29 |     print('video time: {}s'.format(second))
30 |     return second
31 | 
32 | 
33 | def one_func(root_dir, convert_to_30fps=False):
34 |     video_name = 'raw.mp4'
35 |     '''
36 |     convert to 30 fps
37 |     '''
38 |     if convert_to_30fps:
39 |         video_30fps_name = 'raw_30fps.mp4'
40 |         cmd = 'ffmpeg -i {} -r 30 {}'.format( os.path.join(root_dir, video_name), os.path.join(root_dir, video_30fps_name) )
41 |         os.system(cmd)
42 |     '''
43 |     split by 10s
44 |     '''
45 |     if convert_to_30fps:
46 |         video_path = os.path.join(root_dir, video_30fps_name)
47 |     else:
48 |         video_path = os.path.join(root_dir, video_name)
49 |     time = get_video_duration(video_path)
50 |     interval=10
51 |     for idx, start_time in enumerate( range(0, time, interval)):
52 |         split_dir = os.path.join(root_dir, 'splits', 'split_{}'.format(idx+1))
53 |         os.makedirs(split_dir, exist_ok=True)
54 |         output_file = os.path.join(split_dir, 'raw.mp4' )
55 |         end_time = start_time+interval
56 |         cmd = f"ffmpeg -i {video_path} -ss { str(start_time) } -to { str(end_time) } -filter:v 'cropdetect=limit=100:round=2:reset=0' -c:a copy -avoid_negative_ts 1 {output_file} -y"
57 |         os.system(cmd)
58 |         '''
59 |         get frames
60 |         '''
61 |         output_frames_dir = os.path.join(split_dir, 'image')
62 |         os.makedirs(output_frames_dir, exist_ok=True)
63 |         cmd = 'ffmpeg -i {}  {}/%06d.png'.format(output_file, output_frames_dir)
64 |         os.system(cmd)
65 |         '''
66 |         get audio
67 |         '''
68 |         output_wav = os.path.join(split_dir, 'raw.wav')
69 |         cmd = 'ffmpeg -i {}  {}'.format(output_file, output_wav)
70 |         os.system(cmd)
71 |         
72 | if __name__ == '__main__':
73 |     parser = argparse.ArgumentParser()
74 |     parser.add_argument( "--video_dir", type=str, default=None, required=True)
75 |     parser.add_argument( "--convert_to_30fps", default=False, action='store_true')
76 |     args = parser.parse_args()
77 | 
78 |     one_func(args.video_dir, args.convert_to_30fps)


--------------------------------------------------------------------------------
/preprocess/3_get_body_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from tqdm import tqdm
 4 | import random
 5 | from glob import glob
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument( "--root", type=str, default=None, required=True)
10 | parser.add_argument( "--json_file", type=str, default=None, required=True)
11 | args = parser.parse_args()
12 | 
13 | temp_dir_paths = glob(args.root+'/split_*')
14 | 
15 | # filter the bad dirs
16 | dir_paths = []
17 | for dir_path in temp_dir_paths:
18 |     src_dir = os.path.join(dir_path, 'ours_exp/mica_org/000001.png')
19 |     if os.path.exists(src_dir):
20 |         dir_paths.append(dir_path)
21 | 
22 | def get_body_json(json_pth, dir_paths):
23 |     with open(json_pth, 'w') as f:
24 |         for dir_path in tqdm(dir_paths):
25 |             tgt_dir = os.path.join(dir_path, 'image')
26 |             src_dir = os.path.join(dir_path, 'ours_exp/mica_org')
27 |             img_names = os.listdir(tgt_dir)
28 |             for img_name in img_names:
29 |                 ref_img_name = random.choice(img_names)
30 |                 ref_img_pth = os.path.join(tgt_dir, ref_img_name)
31 | 
32 |                 tgt_pth = os.path.join(tgt_dir, img_name)
33 |                 src_pth = os.path.join(src_dir, img_name[:-4] + '.png')
34 |                 if not os.path.exists(tgt_pth):
35 |                     continue
36 |                 if not os.path.exists(src_pth):
37 |                     continue
38 |                 data = json.dumps({'source':src_pth, 'target':tgt_pth, 'reference':ref_img_pth})
39 |                 f.write(data)
40 |                 f.write('\n')
41 | 
42 | 
43 | get_body_json(args.json_file, dir_paths)
44 | 


--------------------------------------------------------------------------------
/preprocess/3_get_head_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from tqdm import tqdm
 4 | import random
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument( "--json_inp_file", type=str, default=None, required=True)
10 | parser.add_argument( "--json_pth", type=str, default=None, required=True)
11 | args = parser.parse_args()
12 | 
13 | data = []
14 | with open(args.json_inp_file, 'rt') as f_r:
15 |     for line in f_r:
16 |         data.append(json.loads(line))
17 | 
18 | 
19 | with open(args.json_pth, 'w') as f:
20 |     for d in tqdm(data):
21 |         src = d['source']
22 |         tgt = d['target']
23 |         ref = d['reference']
24 | 
25 |         image_name = os.path.basename(src)
26 |         root_dir = os.path.dirname(tgt)
27 |         root_dir = os.path.join(root_dir[:-5], 'head_crop')
28 |         src = os.path.join(root_dir, 'aligned', image_name[:-4]+'.jpg')
29 |         tgt = os.path.join(root_dir, 'raw_aligned', image_name[:-4]+'.png')
30 |         mask_pth = os.path.join(root_dir, 'raw_aligned_mask', image_name[:-4]+'.png')
31 | 
32 |         ref_name = os.path.basename(ref)
33 |         ref = os.path.join(root_dir, 'raw_aligned', ref_name[:-4]+'.png')
34 | 
35 |         if not os.path.exists(mask_pth):
36 |             print(mask_pth+' mask')
37 |             continue
38 |         if not os.path.exists(ref):
39 |             print('ref:'+ref)
40 |             continue
41 |         if not os.path.exists(src):
42 |             print(src + ' src')
43 |             continue
44 |         if not os.path.exists(tgt):
45 |             print(tgt + ' tgt')
46 |             continue
47 | 
48 |         one_item = json.dumps({'source':src, 'target':tgt, 'reference':ref, 'mask':mask_pth,})
49 |         f.write(one_item)
50 |         f.write('\n')


--------------------------------------------------------------------------------
/preprocess/README.md:
--------------------------------------------------------------------------------
  1 | <u>Preprocess are annoying and the carefully check is not achieved for the following content. I hope the following guidance works, and if any problem, please contract me.</u> 
  2 | 
  3 | Preprocess aims to collect training data for Make-Your-Anchor, and it can be divided into three steps:
  4 | 
  5 | #### Step 1: Collect your personal videos for training.
  6 | 
  7 | You should collect your own anchor videos for training.  In the *Make-Your-Anchor* process, you need to collect 1-5 minutes of video for training (the longer, the better). The anchor (human) should remain at the center of the frame, and the background should ideally remain static. 
  8 | 
  9 | Afterward, ensure the anchor stays centered in the frame, then crop and resize the video to a size of 512x512. Other resolution may works, depends on your hardware. For example:
 10 | 
 11 | ![](assets/ref_advisor.png)
 12 | 
 13 | (That's my advisor with 512x512 resolution, I capture his videos for experiments.)
 14 | 
 15 | You finally get a video on *your_dir/raw.mp4* (The default video name is *raw.mp4* for preprocessing). Then, due to the [SHOW ](https://github.com/yhw-yhw/SHOW.git)preprocessing code I am using, which can only process 300 frames at a time, I split the video into multiple ten-second, 30fps clips. You can use the following script:
 16 | 
 17 | ```bash
 18 | python 1_split.py --video_dir you_dir --convert_to_30fps
 19 | ```
 20 | 
 21 | where FFMPEG is required to run this code. Then the video will be split as:
 22 | ```
 23 | you_dir/
 24 | ├── raw.mp4
 25 | ├── raw_30fps.mp4
 26 | ├── split_1/
 27 | │	├── raw.mp4
 28 | │   └── raw.wav
 29 | ├── split_2/
 30 | ├── split_3/
 31 |  ...
 32 | ```
 33 | 
 34 | #### Step 2: Extract conditions.
 35 | 
 36 | ##### Body 3D Mesh and Video Frames:
 37 | 
 38 | Make-Your-Anchor extracts body 3D meshes via [SHOW](https://github.com/yhw-yhw/SHOW.git). After installed this repos, run this script:
 39 | 
 40 | ```bash
 41 | # move to the directory of SHOW
 42 | cd SHOW
 43 | 
 44 | # process all splits
 45 | for folder in your_dir/split_*; do
 46 |   python main.py --speaker_name -1 --all_top_dir $folder/raw.mp4
 47 | done
 48 | ```
 49 | 
 50 | The results can be found as:
 51 | 
 52 | 
 53 | ```
 54 | you_dir/
 55 |  ...
 56 | ├── split_1/
 57 | │   ├── ours_exp/
 58 | │   │    ├── mica_org/
 59 | │	│	 │	 └── *png
 60 | │	│	 ...  
 61 | │	├── image/
 62 | │	│	└── *png
 63 | │	├── raw.mp4
 64 | │   └── raw.wav
 65 |  ...
 66 | ```
 67 | 
 68 | the results in *your_dir/split_\*/ours_exp/mica_org/\*png* are required as 3D body mesh. The corresponding frames could be find in *your_dir/split_\*/image/\*png*
 69 | 
 70 | ##### Head Conditions:
 71 | 
 72 | The face alignment and face parsing code are utilized to get head conditions. You can follow the same installation in README for head condition codes. We extract head meshes by the following script:
 73 | 
 74 | ```bash
 75 | # process all splits
 76 | for folder in your_dir/split_*; do
 77 |   python main.py --speaker_name -1 --all_top_dir $folder/raw.mp4
 78 |   # face alignment
 79 |   python face_alignment.py \
 80 |      --imgdir_pth $folder/ours_exp/mica_org/ \
 81 |      --raw_imgdir_pth $folder/image/ \
 82 |      --results_dir $folder/head/ \
 83 |      --crop_size 512
 84 |   # get face mask
 85 |   python get_mask.py \
 86 |     --input_pth $folder/head/raw_aligned \
 87 |     --mask_pth $folder/head/raw_aligned_mask
 88 | done
 89 | 
 90 | ```
 91 | 
 92 | #### Step 3: Organize the json files.
 93 | 
 94 | We use two *json* files to organize the training dataset, one for body and one for head. Run the following script:
 95 | ```bash
 96 | python 3_get_body_json.py --root your_dir/  --json_file your_dir/body_train.json
 97 | python 3_get_head_json.py --json_inp_file your_dir/body_train.json  --json_file your_dir/head_train.json
 98 | ```
 99 | 
100 | If finished, the *your_dir/body_train.json* and *your_dir/head_train.json* are final config files for training. Then, you can follow the guidance in ***Fine-Tuning***!


--------------------------------------------------------------------------------
/preprocess/assets/ref_advisor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/assets/ref_advisor.png


--------------------------------------------------------------------------------
/preprocess/face_alignment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import torch
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | from insightface_func.face_detect_crop_single import Face_detect_crop
 7 | 
 8 | import argparse
 9 | 
10 | def align(img, M, crop_size):
11 |     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
12 |     return align_img
13 | 
14 | def _totensor(array):
15 |     tensor = torch.from_numpy(array)
16 |     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
17 |     return img.float().div(255)
18 | 
19 | 
20 | def imgdir_align(imgdir_path, raw_imgdir_pth,  detect_model, results_dir='./temp_results', crop_size=224):    
21 |     os.makedirs(results_dir, exist_ok=True)
22 |     os.makedirs(os.path.join(results_dir, 'aligned'), exist_ok=True)
23 |     os.makedirs(os.path.join(results_dir, 'raw_aligned'), exist_ok=True)
24 |     os.makedirs(os.path.join(results_dir, 'matrix'), exist_ok=True)
25 |             
26 |     for file_name in tqdm(os.listdir(imgdir_path)): 
27 |         img_pth = os.path.join(imgdir_path, file_name)
28 |         img = cv2.imread(img_pth)
29 | 
30 |         raw_img_pth = os.path.join(raw_imgdir_pth, file_name[:-4]+'.jpg')
31 |         raw_file_name = file_name[:-4]+'.jpg' if os.path.exists(raw_img_pth) else file_name[:-4]+'.png'
32 |         raw_img_pth = os.path.join(raw_imgdir_pth, raw_file_name)
33 |         if not os.path.exists(raw_img_pth):
34 |             continue
35 |         raw_img = cv2.imread(raw_img_pth)
36 | 
37 |         if img is not None:
38 |             detect_results = detect_model.get(img,crop_size)
39 | 
40 |             if detect_results is not None:
41 | 
42 |                 img_align_crop_list = detect_results[0]
43 |                 img_mat_list = detect_results[1]
44 |         
45 |                 for img_align_crop in img_align_crop_list:
46 |                     cv2.imwrite(os.path.join(results_dir, 'aligned', file_name[:-4]+'.jpg'), img_align_crop)
47 | 
48 |                     raw_img_align_crop = align(raw_img, img_mat_list[0], crop_size)
49 |                     cv2.imwrite(os.path.join(results_dir, 'raw_aligned', raw_file_name), raw_img_align_crop)
50 |                     np.save(os.path.join(results_dir, 'matrix', file_name.split('.')[0]), img_mat_list[0])
51 |                     break
52 | 
53 |             else:
54 |                 print('not detected in {}'.format(img_pth))
55 |                 if not os.path.exists(results_dir):
56 |                     os.mkdir(results_dir)
57 | 
58 |         else:
59 |             pass
60 | 
61 | 
62 | 
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 
67 |     parser = argparse.ArgumentParser()
68 |     parser.add_argument( "--imgdir_pth", type=str, default=None, required=True )
69 |     parser.add_argument( "--raw_imgdir_pth", type=str, default=None, required=True )
70 |     parser.add_argument( "--results_dir", type=str, default=None, required=True )
71 |     parser.add_argument( "--crop_size", type=int, default=512 )
72 | 
73 |     args = parser.parse_args()
74 | 
75 | 
76 |     imgdir_pth = args.imgdir_pth
77 |     raw_imgdir_pth = args.raw_imgdir_pth
78 | 
79 |     crop_size=args.crop_size
80 |     if crop_size == 512:
81 |         mode = 'ffhq'
82 |     else:
83 |         mode = 'None'
84 |     
85 |     app = Face_detect_crop(name='antelope', root='./insightface_func/models')
86 |     app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640),mode=mode)
87 | 
88 |     results_dir = args.results_dir
89 |     
90 |     imgdir_align(imgdir_pth, raw_imgdir_pth, app, crop_size=crop_size, results_dir=results_dir)


--------------------------------------------------------------------------------
/preprocess/get_mask.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | # from logger import setup_logger
  5 | from bisenet import BiSeNet
  6 | 
  7 | import torch
  8 | 
  9 | import os
 10 | import os.path as osp
 11 | import numpy as np
 12 | from PIL import Image
 13 | import torchvision.transforms as transforms
 14 | import cv2
 15 | 
 16 | import glob
 17 | import json
 18 | 
 19 | from tqdm import tqdm
 20 | 
 21 | import argparse
 22 | 
 23 | # [0, 'background', 1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye',
 24 | # 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',  10 'nose', 11 'mouth', 12 'u_lip',
 25 | # 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
 26 | 
 27 | def dilate(img,  reverse=False):
 28 |     img = torch.from_numpy(img)
 29 |     mask = torch.ones_like(img)
 30 |     
 31 |     parsing = img
 32 |     mask = mask - ((parsing == 0).float())
 33 |     mask = mask - ((parsing == 14).float())
 34 |     mask = mask - ((parsing == 15).float())
 35 |     mask = mask - ((parsing == 16).float())
 36 |     mask = mask - ((parsing == 17).float())
 37 |     mask = mask - ((parsing == 18).float())
 38 |     
 39 |     kernel = np.ones((3,3), dtype=np.uint8) # origin maybe
 40 |     mask_numpy = mask.numpy()
 41 |     mask_numpy = cv2.dilate(mask_numpy, kernel, iterations=1)
 42 |     if reverse:
 43 |         mask_numpy = 1-mask_numpy
 44 |     mask_numpy = 255*mask_numpy
 45 | 
 46 |     return mask_numpy
 47 | 
 48 | def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg', ifdilate=True, reverse=False):
 49 |     im = np.array(im)
 50 |     vis_im = im.copy().astype(np.uint8)
 51 |     vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
 52 |     vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
 53 | 
 54 |     if ifdilate:
 55 |         vis_parsing_anno = dilate(vis_parsing_anno, reverse=reverse)
 56 | 
 57 |     # Save result or not
 58 |     if save_im:
 59 |         cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
 60 | 
 61 | 
 62 | def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth', ifdilate=True, reverse=False):
 63 | 
 64 |     if not os.path.exists(respth):
 65 |         os.makedirs(respth)
 66 | 
 67 |     n_classes = 19
 68 |     net = BiSeNet(n_classes=n_classes)
 69 |     net.cuda()
 70 |     save_pth = osp.join('res/cp', cp)
 71 |     net.load_state_dict(torch.load(save_pth))
 72 |     net.eval()
 73 | 
 74 |     to_tensor = transforms.Compose([
 75 |         transforms.ToTensor(),
 76 |         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 77 |     ])
 78 |     pathes = glob.glob( os.path.join(dspth,'*.jpg') ) + glob.glob( os.path.join(dspth,'*.png') )
 79 |     with torch.no_grad():
 80 |         for image_path in tqdm(pathes):
 81 |             image_path = os.path.basename(image_path)
 82 |             # print(image_path)
 83 |             img = Image.open(osp.join(dspth, image_path))
 84 |             image = img.resize((512, 512), Image.BILINEAR)
 85 |             img = to_tensor(image)
 86 |             img = torch.unsqueeze(img, 0)
 87 |             img = img.cuda()
 88 |             out = net(img)[0]
 89 |             parsing = out.squeeze(0).cpu().numpy().argmax(0)
 90 | 
 91 | 
 92 |             vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path), ifdilate=ifdilate, reverse=reverse)
 93 | 
 94 | if __name__ == "__main__":
 95 |     parser = argparse.ArgumentParser(description="Simple example of a ControlNet training script.")
 96 |     parser.add_argument( "--input_pth", type=str, default=None, required=True )
 97 |     parser.add_argument( "--mask_pth", type=str, default=None, required=True )
 98 | 
 99 |     args = parser.parse_args()
100 | 
101 |     ifdilate=True
102 |     reverse=True
103 | 
104 |     respth=args.mask_pth
105 |     dspth=args.input_pth
106 | 
107 |     evaluate(respth=respth, dspth=dspth, cp='79999_iter.pth', ifdilate=ifdilate, reverse=reverse)
108 | 
109 | 


--------------------------------------------------------------------------------
/preprocess/insightface_func/.ipynb_checkpoints/face_detect_crop_single-checkpoint.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Naiyuan liu
 3 | Github: https://github.com/NNNNAI
 4 | Date: 2021-11-23 17:03:58
 5 | LastEditors: Naiyuan liu
 6 | LastEditTime: 2021-11-24 16:46:04
 7 | Description: 
 8 | '''
 9 | from __future__ import division
10 | import collections
11 | import numpy as np
12 | import glob
13 | import os
14 | import os.path as osp
15 | import cv2
16 | from insightface.model_zoo import model_zoo
17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
18 | 
19 | __all__ = ['Face_detect_crop', 'Face']
20 | 
21 | Face = collections.namedtuple('Face', [
22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
23 |     'embedding_norm', 'normed_embedding',
24 |     'landmark'
25 | ])
26 | 
27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
28 | 
29 | 
30 | class Face_detect_crop:
31 |     def __init__(self, name, root='~/.insightface_func/models'):
32 |         self.models = {}
33 |         root = os.path.expanduser(root)
34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
35 |         onnx_files = sorted(onnx_files)
36 |         for onnx_file in onnx_files:
37 |             if onnx_file.find('_selfgen_')>0:
38 |                 #print('ignore:', onnx_file)
39 |                 continue
40 |             model = model_zoo.get_model(onnx_file)
41 |             if model.taskname not in self.models:
42 |                 print('find model:', onnx_file, model.taskname)
43 |                 self.models[model.taskname] = model
44 |             else:
45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
46 |                 del model
47 |         assert 'detection' in self.models
48 |         self.det_model = self.models['detection']
49 | 
50 | 
51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
52 |         self.det_thresh = det_thresh
53 |         self.mode = mode
54 |         assert det_size is not None
55 |         print('set det-size:', det_size)
56 |         self.det_size = det_size
57 |         for taskname, model in self.models.items():
58 |             if taskname=='detection':
59 |                 model.prepare(ctx_id, input_size=det_size)
60 |             else:
61 |                 model.prepare(ctx_id)
62 | 
63 |     def get(self, img, crop_size, max_num=0):
64 |         bboxes, kpss = self.det_model.detect(img,
65 |                                              threshold=self.det_thresh,
66 |                                              max_num=max_num,
67 |                                              metric='default')
68 |         if bboxes.shape[0] == 0:
69 |             print('No face detected')
70 |             return None
71 |         # ret = []
72 |         # for i in range(bboxes.shape[0]):
73 |         #     bbox = bboxes[i, 0:4]
74 |         #     det_score = bboxes[i, 4]
75 |         #     kps = None
76 |         #     if kpss is not None:
77 |         #         kps = kpss[i]
78 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
79 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
80 |         # for i in range(bboxes.shape[0]):
81 |         #     kps = None
82 |         #     if kpss is not None:
83 |         #         kps = kpss[i]
84 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
85 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
86 | 
87 |         det_score = bboxes[..., 4]
88 | 
89 |         # select the face with the hightest detection score
90 |         best_index = np.argmax(det_score)
91 | 
92 |         kps = None
93 |         if kpss is not None:
94 |             kps = kpss[best_index]
95 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
96 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
97 |         
98 |         return [align_img], [M]
99 | 


--------------------------------------------------------------------------------
/preprocess/insightface_func/.ipynb_checkpoints/face_detect_crop_single_smooth-checkpoint.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 16:46:04
  7 | Description: 
  8 | '''
  9 | from __future__ import division
 10 | import collections
 11 | import numpy as np
 12 | import glob
 13 | import os
 14 | import os.path as osp
 15 | import cv2
 16 | from insightface.model_zoo import model_zoo
 17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
 18 | 
 19 | __all__ = ['Face_detect_crop', 'Face']
 20 | 
 21 | Face = collections.namedtuple('Face', [
 22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
 23 |     'embedding_norm', 'normed_embedding',
 24 |     'landmark'
 25 | ])
 26 | 
 27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
 28 | 
 29 | 
 30 | class Face_detect_crop:
 31 |     def __init__(self, name, root='~/.insightface_func/models'):
 32 |         self.models = {}
 33 |         root = os.path.expanduser(root)
 34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
 35 |         onnx_files = sorted(onnx_files)
 36 |         for onnx_file in onnx_files:
 37 |             if onnx_file.find('_selfgen_')>0:
 38 |                 #print('ignore:', onnx_file)
 39 |                 continue
 40 |             model = model_zoo.get_model(onnx_file)
 41 |             if model.taskname not in self.models:
 42 |                 print('find model:', onnx_file, model.taskname)
 43 |                 self.models[model.taskname] = model
 44 |             else:
 45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
 46 |                 del model
 47 |         assert 'detection' in self.models
 48 |         self.det_model = self.models['detection']
 49 | 
 50 | 
 51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
 52 |         self.det_thresh = det_thresh
 53 |         self.mode = mode
 54 |         assert det_size is not None
 55 |         print('set det-size:', det_size)
 56 |         self.det_size = det_size
 57 |         for taskname, model in self.models.items():
 58 |             if taskname=='detection':
 59 |                 model.prepare(ctx_id, input_size=det_size)
 60 |             else:
 61 |                 model.prepare(ctx_id)
 62 | 
 63 |     def get(self, img, crop_size, max_num=0):
 64 |         bboxes, kpss = self.det_model.detect(img,
 65 |                                              threshold=self.det_thresh,
 66 |                                              max_num=max_num,
 67 |                                              metric='default')
 68 |         if bboxes.shape[0] == 0:
 69 |             print('No face detected')
 70 |             return None
 71 | 
 72 |         det_score = bboxes[..., 4]
 73 | 
 74 |         # select the face with the hightest detection score
 75 |         best_index = np.argmax(det_score)
 76 | 
 77 |         kps = None
 78 |         if kpss is not None:
 79 |             kps = kpss[best_index]
 80 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
 81 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 82 |         
 83 |         return [align_img], [M]
 84 |     
 85 |     def get_kps(self, img, crop_size, max_num=0):
 86 |         bboxes, kpss = self.det_model.detect(img,
 87 |                                              threshold=self.det_thresh,
 88 |                                              max_num=max_num,
 89 |                                              metric='default')
 90 |         if bboxes.shape[0] == 0:
 91 |             print('No face detected')
 92 |             return None
 93 | 
 94 |         det_score = bboxes[..., 4]
 95 | 
 96 |         # select the face with the hightest detection score
 97 |         best_index = np.argmax(det_score)
 98 |         # print(best_index)
 99 | 
100 |         kps = None
101 |         if kpss is not None:
102 |             kps = kpss[best_index]
103 |         return kps
104 |     
105 |     def smooth_kps(self, kpss):
106 |         kps = np.mean(kpss, axis=0)
107 |         return kps
108 | 
109 |     def get_smooth_m(self, img, crop_size, kpss):
110 |         '''
111 |         kpss means kps with nearest frame
112 |         '''
113 |         kps = self.smooth_kps(kpss)
114 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
115 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
116 |         
117 |         return [align_img], [M]


--------------------------------------------------------------------------------
/preprocess/insightface_func/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/insightface_func/__init__.py


--------------------------------------------------------------------------------
/preprocess/insightface_func/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/insightface_func/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/insightface_func/__pycache__/face_detect_crop_single.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/insightface_func/__pycache__/face_detect_crop_single.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/insightface_func/__pycache__/face_detect_crop_single_smooth.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/insightface_func/__pycache__/face_detect_crop_single_smooth.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/insightface_func/face_detect_crop_multi.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 16:45:41
  7 | Description: 
  8 | '''
  9 | from __future__ import division
 10 | import collections
 11 | import numpy as np
 12 | import glob
 13 | import os
 14 | import os.path as osp
 15 | import cv2
 16 | from insightface.model_zoo import model_zoo
 17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
 18 | 
 19 | __all__ = ['Face_detect_crop', 'Face']
 20 | 
 21 | Face = collections.namedtuple('Face', [
 22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
 23 |     'embedding_norm', 'normed_embedding',
 24 |     'landmark'
 25 | ])
 26 | 
 27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
 28 | 
 29 | 
 30 | class Face_detect_crop:
 31 |     def __init__(self, name, root='~/.insightface_func/models'):
 32 |         self.models = {}
 33 |         root = os.path.expanduser(root)
 34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
 35 |         onnx_files = sorted(onnx_files)
 36 |         for onnx_file in onnx_files:
 37 |             if onnx_file.find('_selfgen_')>0:
 38 |                 #print('ignore:', onnx_file)
 39 |                 continue
 40 |             model = model_zoo.get_model(onnx_file)
 41 |             if model.taskname not in self.models:
 42 |                 print('find model:', onnx_file, model.taskname)
 43 |                 self.models[model.taskname] = model
 44 |             else:
 45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
 46 |                 del model
 47 |         assert 'detection' in self.models
 48 |         self.det_model = self.models['detection']
 49 | 
 50 | 
 51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
 52 |         self.det_thresh = det_thresh
 53 |         self.mode = mode
 54 |         assert det_size is not None
 55 |         print('set det-size:', det_size)
 56 |         self.det_size = det_size
 57 |         for taskname, model in self.models.items():
 58 |             if taskname=='detection':
 59 |                 model.prepare(ctx_id, input_size=det_size)
 60 |             else:
 61 |                 model.prepare(ctx_id)
 62 | 
 63 |     def get(self, img, crop_size, max_num=0):
 64 |         bboxes, kpss = self.det_model.detect(img,
 65 |                                              threshold=self.det_thresh,
 66 |                                              max_num=max_num,
 67 |                                              metric='default')
 68 |         if bboxes.shape[0] == 0:
 69 |             return None
 70 |         ret = []
 71 |         # for i in range(bboxes.shape[0]):
 72 |         #     bbox = bboxes[i, 0:4]
 73 |         #     det_score = bboxes[i, 4]
 74 |         #     kps = None
 75 |         #     if kpss is not None:
 76 |         #         kps = kpss[i]
 77 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
 78 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 79 |         align_img_list = []
 80 |         M_list = []
 81 |         for i in range(bboxes.shape[0]):
 82 |             kps = None
 83 |             if kpss is not None:
 84 |                 kps = kpss[i]
 85 |             M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
 86 |             align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 87 |             align_img_list.append(align_img)
 88 |             M_list.append(M)
 89 | 
 90 |         # det_score = bboxes[..., 4]
 91 | 
 92 |         # best_index = np.argmax(det_score)
 93 | 
 94 |         # kps = None
 95 |         # if kpss is not None:
 96 |         #     kps = kpss[best_index]
 97 |         # M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
 98 |         # align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 99 |         
100 |         return align_img_list, M_list
101 | 


--------------------------------------------------------------------------------
/preprocess/insightface_func/face_detect_crop_single.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Naiyuan liu
 3 | Github: https://github.com/NNNNAI
 4 | Date: 2021-11-23 17:03:58
 5 | LastEditors: Naiyuan liu
 6 | LastEditTime: 2021-11-24 16:46:04
 7 | Description: 
 8 | '''
 9 | from __future__ import division
10 | import collections
11 | import numpy as np
12 | import glob
13 | import os
14 | import os.path as osp
15 | import cv2
16 | from insightface.model_zoo import model_zoo
17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
18 | 
19 | __all__ = ['Face_detect_crop', 'Face']
20 | 
21 | Face = collections.namedtuple('Face', [
22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
23 |     'embedding_norm', 'normed_embedding',
24 |     'landmark'
25 | ])
26 | 
27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
28 | 
29 | 
30 | class Face_detect_crop:
31 |     def __init__(self, name, root='~/.insightface_func/models'):
32 |         self.models = {}
33 |         root = os.path.expanduser(root)
34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
35 |         onnx_files = sorted(onnx_files)
36 |         for onnx_file in onnx_files:
37 |             if onnx_file.find('_selfgen_')>0:
38 |                 #print('ignore:', onnx_file)
39 |                 continue
40 |             model = model_zoo.get_model(onnx_file)
41 |             if model.taskname not in self.models:
42 |                 print('find model:', onnx_file, model.taskname)
43 |                 self.models[model.taskname] = model
44 |             else:
45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
46 |                 del model
47 |         assert 'detection' in self.models
48 |         self.det_model = self.models['detection']
49 | 
50 | 
51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
52 |         self.det_thresh = det_thresh
53 |         self.mode = mode
54 |         assert det_size is not None
55 |         print('set det-size:', det_size)
56 |         self.det_size = det_size
57 |         for taskname, model in self.models.items():
58 |             if taskname=='detection':
59 |                 model.prepare(ctx_id, input_size=det_size)
60 |             else:
61 |                 model.prepare(ctx_id)
62 | 
63 |     def get(self, img, crop_size, max_num=0):
64 |         bboxes, kpss = self.det_model.detect(img,
65 |                                              threshold=self.det_thresh,
66 |                                              max_num=max_num,
67 |                                              metric='default')
68 |         if bboxes.shape[0] == 0:
69 |             print('No face detected')
70 |             return None
71 |         # ret = []
72 |         # for i in range(bboxes.shape[0]):
73 |         #     bbox = bboxes[i, 0:4]
74 |         #     det_score = bboxes[i, 4]
75 |         #     kps = None
76 |         #     if kpss is not None:
77 |         #         kps = kpss[i]
78 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
79 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
80 |         # for i in range(bboxes.shape[0]):
81 |         #     kps = None
82 |         #     if kpss is not None:
83 |         #         kps = kpss[i]
84 |         #     M, _ = face_align.estimate_norm(kps, crop_size, mode ='None') 
85 |         #     align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
86 | 
87 |         det_score = bboxes[..., 4]
88 | 
89 |         # select the face with the hightest detection score
90 |         best_index = np.argmax(det_score)
91 | 
92 |         kps = None
93 |         if kpss is not None:
94 |             kps = kpss[best_index]
95 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
96 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
97 |         
98 |         return [align_img], [M]
99 | 


--------------------------------------------------------------------------------
/preprocess/insightface_func/face_detect_crop_single_smooth.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-23 17:03:58
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-24 16:46:04
  7 | Description: 
  8 | '''
  9 | from __future__ import division
 10 | import collections
 11 | import numpy as np
 12 | import glob
 13 | import os
 14 | import os.path as osp
 15 | import cv2
 16 | from insightface.model_zoo import model_zoo
 17 | from insightface_func.utils import face_align_ffhqandnewarc as face_align
 18 | 
 19 | __all__ = ['Face_detect_crop', 'Face']
 20 | 
 21 | Face = collections.namedtuple('Face', [
 22 |     'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
 23 |     'embedding_norm', 'normed_embedding',
 24 |     'landmark'
 25 | ])
 26 | 
 27 | Face.__new__.__defaults__ = (None, ) * len(Face._fields)
 28 | 
 29 | 
 30 | class Face_detect_crop:
 31 |     def __init__(self, name, root='~/.insightface_func/models'):
 32 |         self.models = {}
 33 |         root = os.path.expanduser(root)
 34 |         onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
 35 |         onnx_files = sorted(onnx_files)
 36 |         for onnx_file in onnx_files:
 37 |             if onnx_file.find('_selfgen_')>0:
 38 |                 #print('ignore:', onnx_file)
 39 |                 continue
 40 |             model = model_zoo.get_model(onnx_file)
 41 |             if model.taskname not in self.models:
 42 |                 print('find model:', onnx_file, model.taskname)
 43 |                 self.models[model.taskname] = model
 44 |             else:
 45 |                 print('duplicated model task type, ignore:', onnx_file, model.taskname)
 46 |                 del model
 47 |         assert 'detection' in self.models
 48 |         self.det_model = self.models['detection']
 49 | 
 50 | 
 51 |     def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
 52 |         self.det_thresh = det_thresh
 53 |         self.mode = mode
 54 |         assert det_size is not None
 55 |         print('set det-size:', det_size)
 56 |         self.det_size = det_size
 57 |         for taskname, model in self.models.items():
 58 |             if taskname=='detection':
 59 |                 model.prepare(ctx_id, input_size=det_size)
 60 |             else:
 61 |                 model.prepare(ctx_id)
 62 | 
 63 |     def get(self, img, crop_size, max_num=0):
 64 |         bboxes, kpss = self.det_model.detect(img,
 65 |                                              threshold=self.det_thresh,
 66 |                                              max_num=max_num,
 67 |                                              metric='default')
 68 |         if bboxes.shape[0] == 0:
 69 |             print('No face detected')
 70 |             return None
 71 | 
 72 |         det_score = bboxes[..., 4]
 73 | 
 74 |         # select the face with the hightest detection score
 75 |         best_index = np.argmax(det_score)
 76 | 
 77 |         kps = None
 78 |         if kpss is not None:
 79 |             kps = kpss[best_index]
 80 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
 81 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
 82 |         
 83 |         return [align_img], [M]
 84 |     
 85 |     def get_kps(self, img, crop_size, max_num=0):
 86 |         bboxes, kpss = self.det_model.detect(img,
 87 |                                              threshold=self.det_thresh,
 88 |                                              max_num=max_num,
 89 |                                              metric='default')
 90 |         if bboxes.shape[0] == 0:
 91 |             print('No face detected')
 92 |             return None
 93 | 
 94 |         det_score = bboxes[..., 4]
 95 | 
 96 |         # select the face with the hightest detection score
 97 |         best_index = np.argmax(det_score)
 98 |         # print(best_index)
 99 | 
100 |         kps = None
101 |         if kpss is not None:
102 |             kps = kpss[best_index]
103 |         return kps
104 |     
105 |     def smooth_kps(self, kpss):
106 |         kps = np.mean(kpss, axis=0)
107 |         return kps
108 | 
109 |     def get_smooth_m(self, img, crop_size, kpss):
110 |         '''
111 |         kpss means kps with nearest frame
112 |         '''
113 |         kps = self.smooth_kps(kpss)
114 |         M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode) 
115 |         align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
116 |         
117 |         return [align_img], [M]


--------------------------------------------------------------------------------
/preprocess/insightface_func/utils/.ipynb_checkpoints/face_align_ffhqandnewarc-checkpoint.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-15 19:42:42
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-15 20:01:47
  7 | Description: 
  8 | '''
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | from skimage import transform as trans
 13 | 
 14 | src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
 15 |                  [51.157, 89.050], [57.025, 89.702]],
 16 |                 dtype=np.float32)
 17 | #<--left
 18 | src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
 19 |                  [45.177, 86.190], [64.246, 86.758]],
 20 |                 dtype=np.float32)
 21 | 
 22 | #---frontal
 23 | src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
 24 |                  [42.463, 87.010], [69.537, 87.010]],
 25 |                 dtype=np.float32)
 26 | 
 27 | #-->right
 28 | src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
 29 |                  [48.167, 86.758], [67.236, 86.190]],
 30 |                 dtype=np.float32)
 31 | 
 32 | #-->right profile
 33 | src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
 34 |                  [55.388, 89.702], [61.257, 89.050]],
 35 |                 dtype=np.float32)
 36 | 
 37 | src = np.array([src1, src2, src3, src4, src5])
 38 | src_map = src
 39 | 
 40 | ffhq_src = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
 41 |                                            [201.26117, 371.41043], [313.08905, 371.15118]])
 42 | ffhq_src = np.expand_dims(ffhq_src, axis=0)
 43 | 
 44 | large_ffhq_src = ffhq_src/1.5 + 110
 45 | 
 46 | # arcface_src = np.array(
 47 | #     [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
 48 | #      [41.5493, 92.3655], [70.7299, 92.2041]],
 49 | #     dtype=np.float32)
 50 | 
 51 | # arcface_src = np.expand_dims(arcface_src, axis=0)
 52 | 
 53 | # In[66]:
 54 | 
 55 | 
 56 | # lmk is prediction; src is template
 57 | def estimate_norm(lmk, image_size=112, mode='ffhq'):
 58 |     assert lmk.shape == (5, 2)
 59 |     tform = trans.SimilarityTransform()
 60 |     lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
 61 |     min_M = []
 62 |     min_index = []
 63 |     min_error = float('inf')
 64 |     if mode == 'ffhq':
 65 |         # assert image_size == 112
 66 |         src = ffhq_src * image_size / 512
 67 |     elif mode == 'large_ffhq':
 68 |         src = large_ffhq_src * image_size / 512
 69 |     else:
 70 |         src = src_map * image_size / 112
 71 |     for i in np.arange(src.shape[0]):
 72 |         tform.estimate(lmk, src[i])
 73 |         M = tform.params[0:2, :]
 74 |         results = np.dot(M, lmk_tran.T)
 75 |         results = results.T
 76 |         error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
 77 |         #         print(error)
 78 |         if error < min_error:
 79 |             min_error = error
 80 |             min_M = M
 81 |             min_index = i
 82 |     return min_M, min_index
 83 | 
 84 | 
 85 | def norm_crop(img, landmark, image_size=112, mode='ffhq'):
 86 |     if mode == 'Both':
 87 |         M_None, _ = estimate_norm(landmark, image_size, mode = 'newarc')
 88 |         M_ffhq, _ = estimate_norm(landmark, image_size, mode='ffhq')
 89 |         warped_None = cv2.warpAffine(img, M_None, (image_size, image_size), borderValue=0.0)
 90 |         warped_ffhq = cv2.warpAffine(img, M_ffhq, (image_size, image_size), borderValue=0.0)
 91 |         return warped_ffhq, warped_None
 92 |     else:
 93 |         M, pose_index = estimate_norm(landmark, image_size, mode)
 94 |         warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
 95 |         return warped
 96 | 
 97 | def square_crop(im, S):
 98 |     if im.shape[0] > im.shape[1]:
 99 |         height = S
100 |         width = int(float(im.shape[1]) / im.shape[0] * S)
101 |         scale = float(S) / im.shape[0]
102 |     else:
103 |         width = S
104 |         height = int(float(im.shape[0]) / im.shape[1] * S)
105 |         scale = float(S) / im.shape[1]
106 |     resized_im = cv2.resize(im, (width, height))
107 |     det_im = np.zeros((S, S, 3), dtype=np.uint8)
108 |     det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
109 |     return det_im, scale
110 | 
111 | 
112 | def transform(data, center, output_size, scale, rotation):
113 |     scale_ratio = scale
114 |     rot = float(rotation) * np.pi / 180.0
115 |     #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
116 |     t1 = trans.SimilarityTransform(scale=scale_ratio)
117 |     cx = center[0] * scale_ratio
118 |     cy = center[1] * scale_ratio
119 |     t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
120 |     t3 = trans.SimilarityTransform(rotation=rot)
121 |     t4 = trans.SimilarityTransform(translation=(output_size / 2,
122 |                                                 output_size / 2))
123 |     t = t1 + t2 + t3 + t4
124 |     M = t.params[0:2]
125 |     cropped = cv2.warpAffine(data,
126 |                              M, (output_size, output_size),
127 |                              borderValue=0.0)
128 |     return cropped, M
129 | 
130 | 
131 | def trans_points2d(pts, M):
132 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
133 |     for i in range(pts.shape[0]):
134 |         pt = pts[i]
135 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
136 |         new_pt = np.dot(M, new_pt)
137 |         #print('new_pt', new_pt.shape, new_pt)
138 |         new_pts[i] = new_pt[0:2]
139 | 
140 |     return new_pts
141 | 
142 | 
143 | def trans_points3d(pts, M):
144 |     scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
145 |     #print(scale)
146 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
147 |     for i in range(pts.shape[0]):
148 |         pt = pts[i]
149 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
150 |         new_pt = np.dot(M, new_pt)
151 |         #print('new_pt', new_pt.shape, new_pt)
152 |         new_pts[i][0:2] = new_pt[0:2]
153 |         new_pts[i][2] = pts[i][2] * scale
154 | 
155 |     return new_pts
156 | 
157 | 
158 | def trans_points(pts, M):
159 |     if pts.shape[1] == 2:
160 |         return trans_points2d(pts, M)
161 |     else:
162 |         return trans_points3d(pts, M)
163 | 
164 | 


--------------------------------------------------------------------------------
/preprocess/insightface_func/utils/__pycache__/face_align_ffhqandnewarc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/insightface_func/utils/__pycache__/face_align_ffhqandnewarc.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/insightface_func/utils/face_align_ffhqandnewarc.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Naiyuan liu
  3 | Github: https://github.com/NNNNAI
  4 | Date: 2021-11-15 19:42:42
  5 | LastEditors: Naiyuan liu
  6 | LastEditTime: 2021-11-15 20:01:47
  7 | Description: 
  8 | '''
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | from skimage import transform as trans
 13 | 
 14 | src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
 15 |                  [51.157, 89.050], [57.025, 89.702]],
 16 |                 dtype=np.float32)
 17 | #<--left
 18 | src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
 19 |                  [45.177, 86.190], [64.246, 86.758]],
 20 |                 dtype=np.float32)
 21 | 
 22 | #---frontal
 23 | src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
 24 |                  [42.463, 87.010], [69.537, 87.010]],
 25 |                 dtype=np.float32)
 26 | 
 27 | #-->right
 28 | src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
 29 |                  [48.167, 86.758], [67.236, 86.190]],
 30 |                 dtype=np.float32)
 31 | 
 32 | #-->right profile
 33 | src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
 34 |                  [55.388, 89.702], [61.257, 89.050]],
 35 |                 dtype=np.float32)
 36 | 
 37 | src = np.array([src1, src2, src3, src4, src5])
 38 | src_map = src
 39 | 
 40 | ffhq_src = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
 41 |                                            [201.26117, 371.41043], [313.08905, 371.15118]])
 42 | ffhq_src = np.expand_dims(ffhq_src, axis=0)
 43 | 
 44 | large_ffhq_src = ffhq_src/1.5 + 110
 45 | 
 46 | # arcface_src = np.array(
 47 | #     [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
 48 | #      [41.5493, 92.3655], [70.7299, 92.2041]],
 49 | #     dtype=np.float32)
 50 | 
 51 | # arcface_src = np.expand_dims(arcface_src, axis=0)
 52 | 
 53 | # In[66]:
 54 | 
 55 | 
 56 | # lmk is prediction; src is template
 57 | def estimate_norm(lmk, image_size=112, mode='ffhq'):
 58 |     assert lmk.shape == (5, 2)
 59 |     tform = trans.SimilarityTransform()
 60 |     lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
 61 |     min_M = []
 62 |     min_index = []
 63 |     min_error = float('inf')
 64 |     if mode == 'ffhq':
 65 |         # assert image_size == 112
 66 |         src = ffhq_src * image_size / 512
 67 |     elif mode == 'large_ffhq':
 68 |         src = large_ffhq_src * image_size / 512
 69 |     else:
 70 |         src = src_map * image_size / 112
 71 |     for i in np.arange(src.shape[0]):
 72 |         tform.estimate(lmk, src[i])
 73 |         M = tform.params[0:2, :]
 74 |         results = np.dot(M, lmk_tran.T)
 75 |         results = results.T
 76 |         error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
 77 |         #         print(error)
 78 |         if error < min_error:
 79 |             min_error = error
 80 |             min_M = M
 81 |             min_index = i
 82 |     return min_M, min_index
 83 | 
 84 | 
 85 | def norm_crop(img, landmark, image_size=112, mode='ffhq'):
 86 |     if mode == 'Both':
 87 |         M_None, _ = estimate_norm(landmark, image_size, mode = 'newarc')
 88 |         M_ffhq, _ = estimate_norm(landmark, image_size, mode='ffhq')
 89 |         warped_None = cv2.warpAffine(img, M_None, (image_size, image_size), borderValue=0.0)
 90 |         warped_ffhq = cv2.warpAffine(img, M_ffhq, (image_size, image_size), borderValue=0.0)
 91 |         return warped_ffhq, warped_None
 92 |     else:
 93 |         M, pose_index = estimate_norm(landmark, image_size, mode)
 94 |         warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
 95 |         return warped
 96 | 
 97 | def square_crop(im, S):
 98 |     if im.shape[0] > im.shape[1]:
 99 |         height = S
100 |         width = int(float(im.shape[1]) / im.shape[0] * S)
101 |         scale = float(S) / im.shape[0]
102 |     else:
103 |         width = S
104 |         height = int(float(im.shape[0]) / im.shape[1] * S)
105 |         scale = float(S) / im.shape[1]
106 |     resized_im = cv2.resize(im, (width, height))
107 |     det_im = np.zeros((S, S, 3), dtype=np.uint8)
108 |     det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
109 |     return det_im, scale
110 | 
111 | 
112 | def transform(data, center, output_size, scale, rotation):
113 |     scale_ratio = scale
114 |     rot = float(rotation) * np.pi / 180.0
115 |     #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
116 |     t1 = trans.SimilarityTransform(scale=scale_ratio)
117 |     cx = center[0] * scale_ratio
118 |     cy = center[1] * scale_ratio
119 |     t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
120 |     t3 = trans.SimilarityTransform(rotation=rot)
121 |     t4 = trans.SimilarityTransform(translation=(output_size / 2,
122 |                                                 output_size / 2))
123 |     t = t1 + t2 + t3 + t4
124 |     M = t.params[0:2]
125 |     cropped = cv2.warpAffine(data,
126 |                              M, (output_size, output_size),
127 |                              borderValue=0.0)
128 |     return cropped, M
129 | 
130 | 
131 | def trans_points2d(pts, M):
132 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
133 |     for i in range(pts.shape[0]):
134 |         pt = pts[i]
135 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
136 |         new_pt = np.dot(M, new_pt)
137 |         #print('new_pt', new_pt.shape, new_pt)
138 |         new_pts[i] = new_pt[0:2]
139 | 
140 |     return new_pts
141 | 
142 | 
143 | def trans_points3d(pts, M):
144 |     scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
145 |     #print(scale)
146 |     new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
147 |     for i in range(pts.shape[0]):
148 |         pt = pts[i]
149 |         new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
150 |         new_pt = np.dot(M, new_pt)
151 |         #print('new_pt', new_pt.shape, new_pt)
152 |         new_pts[i][0:2] = new_pt[0:2]
153 |         new_pts[i][2] = pts[i][2] * scale
154 | 
155 |     return new_pts
156 | 
157 | 
158 | def trans_points(pts, M):
159 |     if pts.shape[1] == 2:
160 |         return trans_points2d(pts, M)
161 |     else:
162 |         return trans_points3d(pts, M)
163 | 
164 | 


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/controlnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/controlnet.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/controlnet_attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/controlnet_attention.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/controlnet_unet_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/controlnet_unet_blocks.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet_threestage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet_threestage.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet_twostage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet_twostage.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet_twounet_add.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_animatediff_videocontrolnet_twounet_add.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_controlvideo.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_controlvideo.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_controlvideo_AR.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_controlvideo_AR.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/pipeline_imageunet_animatediff_twostage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/pipeline_imageunet_animatediff_twostage.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/stable_diffusion_controlnet_img2img.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/stable_diffusion_controlnet_img2img.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/unet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/unet.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/unet_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/unet_blocks.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/models/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/preprocess/models/util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import imageio
  3 | import numpy as np
  4 | from typing import Union
  5 | import decord
  6 | decord.bridge.set_bridge('torch')
  7 | import torch
  8 | import torchvision
  9 | import PIL
 10 | from typing import List
 11 | from tqdm import tqdm
 12 | from einops import rearrange
 13 | 
 14 | from controlnet_aux import CannyDetector
 15 | 
 16 | from PIL import Image
 17 | 
 18 | def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=4, fps=8):
 19 |     videos = rearrange(videos, "b c t h w -> t b c h w")
 20 |     outputs = []
 21 |     for x in videos:
 22 |         x = torchvision.utils.make_grid(x, nrow=n_rows)
 23 |         x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
 24 |         if rescale:
 25 |             x = (x + 1.0) / 2.0  # -1,1 -> 0,1
 26 |         x = (x * 255).numpy().astype(np.uint8)
 27 |         outputs.append(x)
 28 | 
 29 |     os.makedirs(os.path.dirname(path), exist_ok=True)
 30 |     imageio.mimsave(path, outputs, fps=fps)
 31 |     return outputs
 32 | 
 33 | def save_image(image, path, rescale=False, n_rows=4,):
 34 |     image =rearrange(image, "b c t h w -> t b c h w")
 35 |     x = image[0]
 36 |     x = torchvision.utils.make_grid(x, nrow=n_rows)
 37 |     x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
 38 |     if rescale:
 39 |         x = (x + 1.0) / 2.0  # -1,1 -> 0,1
 40 |     x = (x * 255).numpy().astype(np.uint8)
 41 |     x = Image.fromarray(x)
 42 |     x.save(path)
 43 | 
 44 | def save_videos_grid_pil(videos: List[PIL.Image.Image], path: str, rescale=False, n_rows=4, fps=8):
 45 |     videos = rearrange(videos, "b c t h w -> t b c h w")
 46 |     outputs = []
 47 |     for x in videos:
 48 |         x = torchvision.utils.make_grid(x, nrow=n_rows)
 49 |         x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
 50 |         if rescale:
 51 |             x = (x + 1.0) / 2.0  # -1,1 -> 0,1
 52 |         x = (x * 255).numpy().astype(np.uint8)
 53 |         outputs.append(x)
 54 | 
 55 |     os.makedirs(os.path.dirname(path), exist_ok=True)
 56 |     imageio.mimsave(path, outputs, fps=fps)
 57 | 
 58 | def read_video(video_path, video_length, width=512, height=512, frame_rate=2):
 59 |     vr = decord.VideoReader(video_path, width=width, height=height)
 60 |     sample_index = list(range(0, len(vr), frame_rate))[:video_length]
 61 |     video = vr.get_batch(sample_index)
 62 |     video = rearrange(video, "f h w c -> f c h w")
 63 |     video = (video / 127.5 - 1.0)
 64 |     return video
 65 | 
 66 | 
 67 | def get_annotation(video, annotator):
 68 |     t2i_transform = torchvision.transforms.ToPILImage()
 69 |     annotation = []
 70 |     for frame in video:
 71 |         pil_frame = t2i_transform(frame)
 72 |         if isinstance(annotator, CannyDetector):
 73 |             annotation.append(annotator(pil_frame, low_threshold=100, high_threshold=200))
 74 |         else:
 75 |             annotation.append(annotator(pil_frame))
 76 |     return annotation
 77 | 
 78 | # DDIM Inversion
 79 | @torch.no_grad()
 80 | def init_prompt(prompt, pipeline):
 81 |     uncond_input = pipeline.tokenizer(
 82 |         [""], padding="max_length", max_length=pipeline.tokenizer.model_max_length,
 83 |         return_tensors="pt"
 84 |     )
 85 |     uncond_embeddings = pipeline.text_encoder(uncond_input.input_ids.to(pipeline.device))[0]
 86 |     text_input = pipeline.tokenizer(
 87 |         [prompt],
 88 |         padding="max_length",
 89 |         max_length=pipeline.tokenizer.model_max_length,
 90 |         truncation=True,
 91 |         return_tensors="pt",
 92 |     )
 93 |     text_embeddings = pipeline.text_encoder(text_input.input_ids.to(pipeline.device))[0]
 94 |     context = torch.cat([uncond_embeddings, text_embeddings])
 95 | 
 96 |     return context
 97 | 
 98 | 
 99 | def next_step(model_output: Union[torch.FloatTensor, np.ndarray], timestep: int,
100 |               sample: Union[torch.FloatTensor, np.ndarray], ddim_scheduler):
101 |     timestep, next_timestep = min(
102 |         timestep - ddim_scheduler.config.num_train_timesteps // ddim_scheduler.num_inference_steps, 999), timestep
103 |     alpha_prod_t = ddim_scheduler.alphas_cumprod[timestep] if timestep >= 0 else ddim_scheduler.final_alpha_cumprod
104 |     alpha_prod_t_next = ddim_scheduler.alphas_cumprod[next_timestep]
105 |     beta_prod_t = 1 - alpha_prod_t
106 |     next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
107 |     next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
108 |     next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
109 |     return next_sample
110 | 
111 | 
112 | def get_noise_pred_single(latents, t, context, unet):
113 |     noise_pred = unet(latents, t, encoder_hidden_states=context)["sample"]
114 |     return noise_pred
115 | 
116 | 
117 | @torch.no_grad()
118 | def ddim_loop(pipeline, ddim_scheduler, latent, num_inv_steps, prompt):
119 |     context = init_prompt(prompt, pipeline)
120 |     uncond_embeddings, cond_embeddings = context.chunk(2)
121 |     all_latent = [latent]
122 |     latent = latent.clone().detach()
123 |     for i in tqdm(range(num_inv_steps)):
124 |         t = ddim_scheduler.timesteps[len(ddim_scheduler.timesteps) - i - 1]
125 |         noise_pred = get_noise_pred_single(latent, t, cond_embeddings, pipeline.unet)
126 |         latent = next_step(noise_pred, t, latent, ddim_scheduler)
127 |         all_latent.append(latent)
128 |     return all_latent
129 | 
130 | 
131 | @torch.no_grad()
132 | def ddim_inversion(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt=""):
133 |     ddim_latents = ddim_loop(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt)
134 |     return ddim_latents
135 | 


--------------------------------------------------------------------------------
/preprocess/res/cp/79999_iter.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ICTMCG/Make-Your-Anchor/da94dda7d32d150628e90e3398d5ba912bd300e1/preprocess/res/cp/79999_iter.pth


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.17.1
 2 | bitsandbytes==0.38.0.post1
 3 | controlnet_aux==0.0.3
 4 | datasets==2.15.0
 5 | decord==0.6.0
 6 | diffusers==0.25.1
 7 | dominate==2.9.1
 8 | einops==0.3.0
 9 | huggingface_hub==0.20.2
10 | imageio==2.9.0
11 | insightface==0.2.1
12 | matplotlib==3.7.1
13 | moviepy==1.0.3
14 | numpy==1.23.1
15 | opencv_contrib_python==4.3.0.36
16 | opencv_python==4.7.0.72
17 | opencv_python_headless==4.7.0.72
18 | packaging==24.0
19 | Pillow==9.4.0
20 | Pillow==10.3.0
21 | positional_encodings==6.0.1
22 | pytorch_lightning==1.5.0
23 | safetensors==0.4.3
24 | scipy==1.9.1
25 | seaborn==0.13.2
26 | skimage==0.0
27 | tensorflow==2.16.1
28 | torch==2.1.2+cu121
29 | torchvision==0.16.2+cu121
30 | tqdm==4.65.0
31 | transformers==4.27.1
32 | wandb==0.14.0
33 | xformers==0.0.23.post1
34 | 


--------------------------------------------------------------------------------
/train_body.sh:
--------------------------------------------------------------------------------
 1 | cd train
 2 | 
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | export MODEL_DIR="runwayml/stable-diffusion-v1-5"
 5 | 
 6 | # load weights from stage-1
 7 | unet_model_name_or_path="./checkpoints/pre-trained_weight/body/unet"
 8 | controlnet_model_name_or_path="./checkpoints/pre-trained_weight/body/controlnet"
 9 | 
10 | export OUTPUT_DIR="path/to/save_folder"
11 | json_file=./train_data/body_train.json
12 | 
13 | # for validation
14 | val_img1=path/to/body_pose_1
15 | val_img2=path/to/body_pose_2
16 | val_img3=path/to/body_pose_3
17 | val_img4=path/to/body_pose_4
18 | 
19 | reference_img=path/to/body_ref
20 | 
21 | accelerate launch --main_process_port 65537 train_body.py \
22 |  --resume_from_checkpoint latest \
23 |  --pretrained_model_name_or_path=$MODEL_DIR \
24 |  --unet_model_name_or_path $unet_model_name_or_path \
25 |  --controlnet_model_name_or_path $controlnet_model_name_or_path \
26 |  --output_dir=$OUTPUT_DIR \
27 |  --dataset_name=json \
28 |  --dataset_config_name $json_file \
29 |  --image_column target \
30 |  --conditioning_image_column source \
31 |  --resolution=512 \
32 |  --learning_rate=1e-5 \
33 |  --validation_image $val_img1 $val_img2 $val_img3 $val_img4 \
34 |  --reference_image $reference_img $reference_img $reference_img $reference_img \
35 |  --train_batch_size=4 \
36 |  --enable_xformers_memory_efficient_attention \
37 |  --tracker_project_name train_body \
38 |  --checkpointing_steps 10000 \
39 |  --validation_steps 1000 \
40 |  --num_train_epochs 60
41 | 


--------------------------------------------------------------------------------
/train_head.sh:
--------------------------------------------------------------------------------
 1 | cd train
 2 | 
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | export MODEL_DIR="runwayml/stable-diffusion-inpainting"
 5 | 
 6 | # load weights from stage-1
 7 | unet_model_name_or_path="./checkpoints/pre-trained_weight/head/unet"
 8 | controlnet_model_name_or_path="./checkpoints/pre-trained_weight/head/controlnet"
 9 | 
10 | export OUTPUT_DIR="path/to/save_folder"
11 | json_file=./train_data/head_train.json
12 | 
13 | # for validation
14 | val_img1=path/to/head_pose_1
15 | val_img2=path/to/head_pose_2
16 | val_img3=path/to/head_pose_3
17 | val_img4=path/to/head_pose_4
18 | 
19 | val_mask1=path/to/head_mask_1
20 | val_mask2=path/to/head_mask_2
21 | val_mask3=path/to/head_mask_3
22 | val_mask4=path/to/head_mask_4
23 | 
24 | reference_img=path/to/head_ref
25 | 
26 | accelerate launch --main_process_port 65535 train_head.py \
27 |  --resume_from_checkpoint latest \
28 |  --pretrained_model_name_or_path=$MODEL_DIR \
29 |  --controlnet_model_name_or_path=$controlnet_model_name_or_path \
30 |  --output_dir=$OUTPUT_DIR \
31 |  --dataset_name=json \
32 |  --dataset_config_name $json_file \
33 |  --image_column target \
34 |  --conditioning_image_column source \
35 |  --resolution=256 \
36 |  --learning_rate=1e-5 \
37 |  --validation_image $val_img1 $val_img2 $val_img3 $val_img4 \
38 |  --validation_mask $val_mask1 $val_mask2 $val_mask3 $val_mask4 \
39 |  --reference_image $reference_img $reference_img $reference_img $reference_img \
40 |  --train_batch_size=4 \
41 |  --enable_xformers_memory_efficient_attention \
42 |  --tracker_project_name train_head \
43 |  --checkpointing_steps 10000 \
44 |  --validation_steps 1000 \
45 |  --num_train_epochs 60
46 | 


--------------------------------------------------------------------------------