├── acceleration ├── CMakeLists.txt ├── cuda_kernels.cu ├── cuda_kernels.hpp └── cuda_postprocess.cpp ├── face_modules ├── .gitignore ├── ReadMe.txt ├── __init__.py ├── infer_demo.py ├── model.py ├── mtcnn.py ├── mtcnn_pytorch │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── caffe_models │ │ ├── det1.caffemodel │ │ ├── det1.prototxt │ │ ├── det2.caffemodel │ │ ├── det2.prototxt │ │ ├── det3.caffemodel │ │ ├── det3.prototxt │ │ ├── det4.caffemodel │ │ └── det4.prototxt │ ├── extract_weights_from_caffe_models.py │ ├── get_aligned_face_from_mtcnn.ipynb │ ├── images │ │ ├── example.png │ │ ├── jf.jpg │ │ ├── office1.jpg │ │ ├── office2.jpg │ │ ├── office3.jpg │ │ ├── office4.jpg │ │ └── office5.jpg │ ├── refine_faces.ipynb │ ├── src │ │ ├── __init__.py │ │ ├── align_trans.py │ │ ├── box_utils.py │ │ ├── detector.py │ │ ├── first_stage.py │ │ ├── get_nets.py │ │ ├── matlab_cp2tform.py │ │ ├── visualization_utils.py │ │ └── weights │ │ │ ├── onet.npy │ │ │ ├── pnet.npy │ │ │ └── rnet.npy │ ├── test_on_images.ipynb │ └── try_mtcnn_step_by_step.ipynb └── preprocess_images.py ├── inference_demo.py ├── mtcnn_pytorch ├── network ├── AADLayer.py ├── AEI_Net.py ├── HEAR_Net.py ├── MultiscaleDiscriminator.py └── __init__.py ├── online_preview.py ├── tmp_script ├── __init__.py ├── check_arcface_feature_map.py └── test_structure_AEI.py ├── train_AEI.py ├── train_HEAR.py └── utils ├── Dataset.py ├── download_vggface_dataset.py └── split_hearnet_data.py /acceleration/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(FaceShifter_Accelerations) 3 | 4 | set(CMAKE_CXX_COMPILER /usr/bin/g++) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 6 | 7 | find_package(pybind11 REQUIRED) 8 | find_package(CUDA) 9 | find_package(OpenCV) 10 | 11 | include_directories( 12 | /usr/local/cuda-10.1/include/ 13 | ${OpenCV_INCLUDE_DIRS} 14 | ) 15 | link_directories( 16 | /usr/local/cuda-10.1/lib64/ 17 | ) 18 | 19 | cuda_add_library(culib SHARED cuda_kernels.cu) 20 | 21 | pybind11_add_module(cuda_postprocess cuda_postprocess.cpp) 22 | target_link_libraries(cuda_postprocess 23 | culib 24 | ${OpenCV_LIBS} 25 | ) 26 | 27 | -------------------------------------------------------------------------------- /acceleration/cuda_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_kernels.hpp" 2 | 3 | __global__ void restore_kernel(uchar* ret, const float* Yst, const float* mask, 4 | const uchar* Xt, const float* trans, 5 | const int H, const int W, const int h, const int w) { 6 | int tid = threadIdx.x + blockIdx.x*blockDim.x; 7 | int offset = blockDim.x * gridDim.x; 8 | while (tid < h * w) { 9 | int x = tid % w; 10 | int y = tid / w; 11 | float sx = x*trans[0] + y*trans[1] + trans[2]; 12 | float sy = x*trans[3] + y*trans[4] + trans[5]; 13 | if (sx < 0 || sy < 0 || sx >= W-1 || sy >= H-1){ 14 | ret[tid*3+0] = Xt[tid*3+0]; 15 | ret[tid*3+1] = Xt[tid*3+1]; 16 | ret[tid*3+2] = Xt[tid*3+2]; 17 | tid += offset; 18 | continue; 19 | } 20 | 21 | float xp = sx - (int)sx; 22 | float yp = sy - (int)sy; 23 | float color[3] = {0}; 24 | for(int i=0;i<3;i++){ 25 | float v = 0; 26 | float a = Yst[i*H*W + int(sy)*W + (int)(sx)]; 27 | float b = Yst[i*H*W + int(sy)*W + (int)(sx+1)]; 28 | float c = Yst[i*H*W + int(sy+1)*W + (int)(sx)]; 29 | float d = Yst[i*H*W + int(sy+1)*W + (int)(sx+1)]; 30 | float x1 = a + (b-a)*xp; float x2 = c + (d-c)*xp; 31 | v = x1 + (x2-x1)*yp; 32 | color[i] = v * 0.5 + 0.5; 33 | } 34 | float alpha = 0; 35 | { 36 | float a = mask[(int)(sy)*W + (int)(sx)]; 37 | float b = mask[(int)(sy)*W + (int)(sx+1)]; 38 | float c = mask[(int)(sy+1)*W + (int)(sx)]; 39 | float d = mask[(int)(sy+1)*W + (int)(sx+1)]; 40 | float x1 = a + (b-a)*xp; float x2 = c + (d-c)*xp; 41 | alpha = x1 + (x2-x1)*yp; 42 | } 43 | 44 | for(int i=0;i<3;i++){ 45 | float c = color[i]*255*alpha + Xt[tid*3+(i)]*(1-alpha); 46 | c = c < 0 ? 0 : c; 47 | c = c > 255 ? 255 : c; 48 | ret[tid*3+i] = c; 49 | } 50 | 51 | tid += offset; 52 | } 53 | } 54 | 55 | void restore_image(uchar* ret, const float* Yst, const float* mask, const uchar* Xt, 56 | const float* trans, const int H, const int W, const int h, const int w) { 57 | restore_kernel<<<1000, 64>>>(ret, Yst, mask, Xt, trans, H, W, h, w); 58 | cudaThreadSynchronize(); 59 | } 60 | -------------------------------------------------------------------------------- /acceleration/cuda_kernels.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CUDA_KERNELS_H 3 | #define CUDA_KERNELS_H 4 | 5 | #include 6 | #include 7 | 8 | typedef unsigned char uchar; 9 | 10 | void restore_image(uchar*, const float*, const float*, const uchar*, 11 | const float*, const int, const int, const int, const int); 12 | 13 | 14 | 15 | #endif /* End of CUDA_KERNELS.H */ 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /acceleration/cuda_postprocess.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cuda_kernels.hpp" 6 | 7 | using std::cout; 8 | using std::endl; 9 | 10 | namespace py = pybind11; 11 | using namespace std; 12 | 13 | class CudaPostprocess{ 14 | public: 15 | CudaPostprocess(int H, int W){ 16 | this->H = H; 17 | this->W = W; 18 | cudaMalloc((void**)&Yst_dev, H*W*3*sizeof(float)); 19 | cudaMalloc((void**)&mask_dev, H*W*sizeof(float)); 20 | cudaMalloc((void**)&trans_dev, 6*sizeof(float)); 21 | ret_dev = NULL; 22 | Xt_raw_dev = NULL; 23 | ret_size = 0; 24 | } 25 | py::array_t restore(py::array_t Yst, py::array_t mask, 26 | py::array_t trans, py::array_t Xt_raw, int h, int w) { 27 | py::buffer_info Yst_buf = Yst.request(); 28 | float* Yst_ptr = (float*)Yst_buf.ptr; 29 | 30 | py::buffer_info mask_buf = mask.request(); 31 | float* mask_ptr = (float*)mask_buf.ptr; 32 | 33 | py::buffer_info Xt_buf = Xt_raw.request(); 34 | uchar* Xt_ptr = (uchar*)Xt_buf.ptr; 35 | 36 | float* trans_ptr = (float*)trans.request().ptr; 37 | 38 | if (h * w * 3 * sizeof(uchar) > ret_size){ 39 | if (ret_dev != NULL){ 40 | cudaFree(ret_dev); 41 | cudaFree(Xt_raw_dev); 42 | ret_dev = NULL; 43 | Xt_raw_dev = NULL; 44 | } 45 | cudaMalloc((void**)&ret_dev, h*w*3*sizeof(uchar)); 46 | cudaMalloc((void**)&Xt_raw_dev, h*w*3*sizeof(uchar)); 47 | ret_size = h * w * 3 * sizeof(uchar); 48 | } 49 | cudaMemcpy(Yst_dev, Yst_ptr, H*W*3*sizeof(float), cudaMemcpyHostToDevice); 50 | cudaMemcpy(mask_dev, mask_ptr, H*W*sizeof(float), cudaMemcpyHostToDevice); 51 | cudaMemcpy(trans_dev, trans_ptr, 6*sizeof(float), cudaMemcpyHostToDevice); 52 | cudaMemcpy(Xt_raw_dev, Xt_ptr, h*w*3*sizeof(uchar), cudaMemcpyHostToDevice); 53 | 54 | restore_image(ret_dev, Yst_dev, mask_dev, Xt_raw_dev, trans_dev, H, W, h, w); 55 | 56 | auto ret = py::array_t({h, w, 3}); 57 | py::buffer_info info = ret.request(); 58 | cudaMemcpy(info.ptr, ret_dev, h*w*3*sizeof(uchar), cudaMemcpyDeviceToHost); 59 | return ret; 60 | } 61 | ~CudaPostprocess(){ 62 | cudaFree(Yst_dev); 63 | cudaFree(mask_dev); 64 | cudaFree(trans_dev); 65 | if (ret_dev != NULL) 66 | cudaFree(ret_dev); 67 | } 68 | 69 | private: 70 | float *Yst_dev, *mask_dev; 71 | uchar *ret_dev; 72 | uchar *Xt_raw_dev; 73 | int ret_size; 74 | float *trans_dev; 75 | int H, W; 76 | }; 77 | 78 | PYBIND11_MODULE(cuda_postprocess, m) 79 | { 80 | m.doc() = "FaceShifter postprocess accelerated by cuda"; 81 | py::class_(m, "CudaPostprocess") 82 | .def(py::init()) 83 | .def("restore", &CudaPostprocess::restore); 84 | } 85 | 86 | 87 | 88 | 89 | /* EOF */ 90 | 91 | -------------------------------------------------------------------------------- /face_modules/.gitignore: -------------------------------------------------------------------------------- 1 | model_ir_se50.pth 2 | libnvjpeg.cpython-36m-x86_64-linux-gnu.so 3 | -------------------------------------------------------------------------------- /face_modules/ReadMe.txt: -------------------------------------------------------------------------------- 1 | https://github.com/TreB1eN/InsightFace_Pytorch 2 | -------------------------------------------------------------------------------- /face_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/__init__.py -------------------------------------------------------------------------------- /face_modules/infer_demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mtcnn import MTCNN 3 | import cv2 4 | import numpy as np 5 | 6 | import PIL.Image as Image 7 | from model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 8 | from torchvision import transforms as trans 9 | 10 | device = torch.device('cuda:0') 11 | mtcnn = MTCNN() 12 | 13 | model = Backbone(50, 0.6, 'ir_se').to(device) 14 | model.eval() 15 | model.load_state_dict(torch.load('./model_ir_se50.pth')) 16 | 17 | # threshold = 1.54 18 | test_transform = trans.Compose([ 19 | trans.ToTensor(), 20 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 21 | ]) 22 | 23 | 24 | img = cv2.imread('/home/taotao/Downloads/celeba-512/000014.jpg.jpg')[:, :, ::-1] 25 | 26 | bboxes, faces = mtcnn.align_multi(Image.fromarray(img), limit=10, min_face_size=30) 27 | input = test_transform(faces[0]).unsqueeze(0) 28 | embbed = model(input.cuda()) 29 | print(embbed.shape) 30 | print(bboxes) 31 | face = np.array(faces[0])[:,:,::-1] 32 | cv2.imshow('', face) 33 | cv2.waitKey(0) 34 | -------------------------------------------------------------------------------- /face_modules/model.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout2d, Dropout, AvgPool2d, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module, Parameter 2 | import torch.nn.functional as F 3 | import torch 4 | from collections import namedtuple 5 | import math 6 | import pdb 7 | 8 | ################################## Original Arcface Model ############################################################# 9 | 10 | class Flatten(Module): 11 | def forward(self, input): 12 | return input.view(input.size(0), -1) 13 | 14 | def l2_norm(input,axis=1): 15 | norm = torch.norm(input,2,axis,True) 16 | output = torch.div(input, norm) 17 | return output 18 | 19 | class SEModule(Module): 20 | def __init__(self, channels, reduction): 21 | super(SEModule, self).__init__() 22 | self.avg_pool = AdaptiveAvgPool2d(1) 23 | self.fc1 = Conv2d( 24 | channels, channels // reduction, kernel_size=1, padding=0 ,bias=False) 25 | self.relu = ReLU(inplace=True) 26 | self.fc2 = Conv2d( 27 | channels // reduction, channels, kernel_size=1, padding=0 ,bias=False) 28 | self.sigmoid = Sigmoid() 29 | 30 | def forward(self, x): 31 | module_input = x 32 | x = self.avg_pool(x) 33 | x = self.fc1(x) 34 | x = self.relu(x) 35 | x = self.fc2(x) 36 | x = self.sigmoid(x) 37 | return module_input * x 38 | 39 | class bottleneck_IR(Module): 40 | def __init__(self, in_channel, depth, stride): 41 | super(bottleneck_IR, self).__init__() 42 | if in_channel == depth: 43 | self.shortcut_layer = MaxPool2d(1, stride) 44 | else: 45 | self.shortcut_layer = Sequential( 46 | Conv2d(in_channel, depth, (1, 1), stride ,bias=False), BatchNorm2d(depth)) 47 | self.res_layer = Sequential( 48 | BatchNorm2d(in_channel), 49 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1 ,bias=False), PReLU(depth), 50 | Conv2d(depth, depth, (3, 3), stride, 1 ,bias=False), BatchNorm2d(depth)) 51 | 52 | def forward(self, x): 53 | shortcut = self.shortcut_layer(x) 54 | res = self.res_layer(x) 55 | return res + shortcut 56 | 57 | class bottleneck_IR_SE(Module): 58 | def __init__(self, in_channel, depth, stride): 59 | super(bottleneck_IR_SE, self).__init__() 60 | if in_channel == depth: 61 | self.shortcut_layer = MaxPool2d(1, stride) 62 | else: 63 | self.shortcut_layer = Sequential( 64 | Conv2d(in_channel, depth, (1, 1), stride ,bias=False), 65 | BatchNorm2d(depth)) 66 | self.res_layer = Sequential( 67 | BatchNorm2d(in_channel), 68 | Conv2d(in_channel, depth, (3,3), (1,1),1 ,bias=False), 69 | PReLU(depth), 70 | Conv2d(depth, depth, (3,3), stride, 1 ,bias=False), 71 | BatchNorm2d(depth), 72 | SEModule(depth,16) 73 | ) 74 | def forward(self,x): 75 | shortcut = self.shortcut_layer(x) 76 | res = self.res_layer(x) 77 | return res + shortcut 78 | 79 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): 80 | '''A named tuple describing a ResNet block.''' 81 | 82 | def get_block(in_channel, depth, num_units, stride = 2): 83 | return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units-1)] 84 | 85 | def get_blocks(num_layers): 86 | if num_layers == 50: 87 | blocks = [ 88 | get_block(in_channel=64, depth=64, num_units = 3), 89 | get_block(in_channel=64, depth=128, num_units=4), 90 | get_block(in_channel=128, depth=256, num_units=14), 91 | get_block(in_channel=256, depth=512, num_units=3) 92 | ] 93 | elif num_layers == 100: 94 | blocks = [ 95 | get_block(in_channel=64, depth=64, num_units=3), 96 | get_block(in_channel=64, depth=128, num_units=13), 97 | get_block(in_channel=128, depth=256, num_units=30), 98 | get_block(in_channel=256, depth=512, num_units=3) 99 | ] 100 | elif num_layers == 152: 101 | blocks = [ 102 | get_block(in_channel=64, depth=64, num_units=3), 103 | get_block(in_channel=64, depth=128, num_units=8), 104 | get_block(in_channel=128, depth=256, num_units=36), 105 | get_block(in_channel=256, depth=512, num_units=3) 106 | ] 107 | return blocks 108 | 109 | class Backbone(Module): 110 | def __init__(self, num_layers, drop_ratio, mode='ir'): 111 | super(Backbone, self).__init__() 112 | assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' 113 | assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' 114 | blocks = get_blocks(num_layers) 115 | if mode == 'ir': 116 | unit_module = bottleneck_IR 117 | elif mode == 'ir_se': 118 | unit_module = bottleneck_IR_SE 119 | self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1 ,bias=False), 120 | BatchNorm2d(64), 121 | PReLU(64)) 122 | self.output_layer = Sequential(BatchNorm2d(512), 123 | Dropout(drop_ratio), 124 | Flatten(), 125 | Linear(512 * 7 * 7, 512), 126 | BatchNorm1d(512)) 127 | # ) 128 | modules = [] 129 | for block in blocks: 130 | for bottleneck in block: 131 | modules.append( 132 | unit_module(bottleneck.in_channel, 133 | bottleneck.depth, 134 | bottleneck.stride)) 135 | self.body = Sequential(*modules) 136 | 137 | def forward(self,x): 138 | feats = [] 139 | x = self.input_layer(x) 140 | for m in self.body.children(): 141 | x = m(x) 142 | feats.append(x) 143 | # x = self.body(x) 144 | x = self.output_layer(x) 145 | return l2_norm(x), feats 146 | # return x 147 | 148 | ################################## MobileFaceNet ############################################################# 149 | 150 | class Conv_block(Module): 151 | def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): 152 | super(Conv_block, self).__init__() 153 | self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) 154 | self.bn = BatchNorm2d(out_c) 155 | self.prelu = PReLU(out_c) 156 | def forward(self, x): 157 | x = self.conv(x) 158 | x = self.bn(x) 159 | x = self.prelu(x) 160 | return x 161 | 162 | class Linear_block(Module): 163 | def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): 164 | super(Linear_block, self).__init__() 165 | self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) 166 | self.bn = BatchNorm2d(out_c) 167 | def forward(self, x): 168 | x = self.conv(x) 169 | x = self.bn(x) 170 | return x 171 | 172 | class Depth_Wise(Module): 173 | def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): 174 | super(Depth_Wise, self).__init__() 175 | self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) 176 | self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride) 177 | self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) 178 | self.residual = residual 179 | def forward(self, x): 180 | if self.residual: 181 | short_cut = x 182 | x = self.conv(x) 183 | x = self.conv_dw(x) 184 | x = self.project(x) 185 | if self.residual: 186 | output = short_cut + x 187 | else: 188 | output = x 189 | return output 190 | 191 | class Residual(Module): 192 | def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): 193 | super(Residual, self).__init__() 194 | modules = [] 195 | for _ in range(num_block): 196 | modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups)) 197 | self.model = Sequential(*modules) 198 | def forward(self, x): 199 | return self.model(x) 200 | 201 | class MobileFaceNet(Module): 202 | def __init__(self, embedding_size): 203 | super(MobileFaceNet, self).__init__() 204 | self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) 205 | self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64) 206 | self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) 207 | self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) 208 | self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) 209 | self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) 210 | self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) 211 | self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) 212 | self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) 213 | self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)) 214 | self.conv_6_flatten = Flatten() 215 | self.linear = Linear(512, embedding_size, bias=False) 216 | self.bn = BatchNorm1d(embedding_size) 217 | 218 | def forward(self, x): 219 | out = self.conv1(x) 220 | 221 | out = self.conv2_dw(out) 222 | 223 | out = self.conv_23(out) 224 | 225 | out = self.conv_3(out) 226 | 227 | out = self.conv_34(out) 228 | 229 | out = self.conv_4(out) 230 | 231 | out = self.conv_45(out) 232 | 233 | out = self.conv_5(out) 234 | 235 | out = self.conv_6_sep(out) 236 | 237 | out = self.conv_6_dw(out) 238 | 239 | out = self.conv_6_flatten(out) 240 | 241 | out = self.linear(out) 242 | 243 | out = self.bn(out) 244 | return l2_norm(out) 245 | 246 | ################################## Arcface head ############################################################# 247 | 248 | class Arcface(Module): 249 | # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599 250 | def __init__(self, embedding_size=512, classnum=51332, s=64., m=0.5): 251 | super(Arcface, self).__init__() 252 | self.classnum = classnum 253 | self.kernel = Parameter(torch.Tensor(embedding_size,classnum)) 254 | # initial kernel 255 | self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5) 256 | self.m = m # the margin value, default is 0.5 257 | self.s = s # scalar value default is 64, see normface https://arxiv.org/abs/1704.06369 258 | self.cos_m = math.cos(m) 259 | self.sin_m = math.sin(m) 260 | self.mm = self.sin_m * m # issue 1 261 | self.threshold = math.cos(math.pi - m) 262 | def forward(self, embbedings, label): 263 | # weights norm 264 | nB = len(embbedings) 265 | kernel_norm = l2_norm(self.kernel,axis=0) 266 | # cos(theta+m) 267 | cos_theta = torch.mm(embbedings,kernel_norm) 268 | # output = torch.mm(embbedings,kernel_norm) 269 | cos_theta = cos_theta.clamp(-1,1) # for numerical stability 270 | cos_theta_2 = torch.pow(cos_theta, 2) 271 | sin_theta_2 = 1 - cos_theta_2 272 | sin_theta = torch.sqrt(sin_theta_2) 273 | cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m) 274 | # this condition controls the theta+m should in range [0, pi] 275 | # 0<=theta+m<=pi 276 | # -m<=theta<=pi-m 277 | cond_v = cos_theta - self.threshold 278 | cond_mask = cond_v <= 0 279 | keep_val = (cos_theta - self.mm) # when theta not in [0,pi], use cosface instead 280 | cos_theta_m[cond_mask] = keep_val[cond_mask] 281 | output = cos_theta * 1.0 # a little bit hacky way to prevent in_place operation on cos_theta 282 | idx_ = torch.arange(0, nB, dtype=torch.long) 283 | output[idx_, label] = cos_theta_m[idx_, label] 284 | output *= self.s # scale up in order to make softmax work, first introduced in normface 285 | return output 286 | 287 | ################################## Cosface head ############################################################# 288 | 289 | class Am_softmax(Module): 290 | # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599 291 | def __init__(self,embedding_size=512,classnum=51332): 292 | super(Am_softmax, self).__init__() 293 | self.classnum = classnum 294 | self.kernel = Parameter(torch.Tensor(embedding_size,classnum)) 295 | # initial kernel 296 | self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5) 297 | self.m = 0.35 # additive margin recommended by the paper 298 | self.s = 30. # see normface https://arxiv.org/abs/1704.06369 299 | def forward(self,embbedings,label): 300 | kernel_norm = l2_norm(self.kernel,axis=0) 301 | cos_theta = torch.mm(embbedings,kernel_norm) 302 | cos_theta = cos_theta.clamp(-1,1) # for numerical stability 303 | phi = cos_theta - self.m 304 | label = label.view(-1,1) #size=(B,1) 305 | index = cos_theta.data * 0.0 #size=(B,Classnum) 306 | index.scatter_(1,label.data.view(-1,1),1) 307 | index = index.byte() 308 | output = cos_theta * 1.0 309 | output[index] = phi[index] #only change the correct predicted output 310 | output *= self.s # scale up in order to make softmax work, first introduced in normface 311 | return output 312 | 313 | -------------------------------------------------------------------------------- /face_modules/mtcnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from PIL import Image 4 | from torch.autograd import Variable 5 | from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet 6 | from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square 7 | from mtcnn_pytorch.src.first_stage import run_first_stage 8 | from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face 9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 10 | # device = 'cpu' 11 | 12 | class MTCNN(): 13 | def __init__(self): 14 | self.pnet = PNet().to(device) 15 | self.rnet = RNet().to(device) 16 | self.onet = ONet().to(device) 17 | self.pnet.eval() 18 | self.rnet.eval() 19 | self.onet.eval() 20 | self.refrence = get_reference_facial_points(default_square= True) 21 | 22 | def align(self, img, crop_size=(112, 112), return_trans_inv=False): 23 | _, landmarks = self.detect_faces(img) 24 | if len(landmarks) == 0: 25 | return None if not return_trans_inv else (None, None) 26 | facial5points = [[landmarks[0][j],landmarks[0][j+5]] for j in range(5)] 27 | warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=crop_size, 28 | return_trans_inv=return_trans_inv) 29 | if return_trans_inv: 30 | return Image.fromarray(warped_face[0]), warped_face[1] 31 | else: 32 | return Image.fromarray(warped_face) 33 | 34 | def align_fully(self, img, crop_size=(112, 112), return_trans_inv=False, ori=[0, 1, 3], fast_mode=True): 35 | ori_size = img.copy() 36 | h = img.size[1] 37 | w = img.size[0] 38 | sw = 320. if fast_mode else w 39 | scale = sw / w 40 | img = img.resize((int(w*scale), int(h*scale))) 41 | candi = [] 42 | for i in ori: 43 | if len(candi) > 0: 44 | break 45 | if i > 0: 46 | rimg = img.transpose(i+1) 47 | else: 48 | rimg = img 49 | box, landmarks = self.detect_faces(rimg, min_face_size=sw/10, thresholds=[0.6, 0.7, 0.7]) 50 | landmarks /= scale 51 | if len(landmarks) == 0: 52 | continue 53 | if i == 0: 54 | f5p = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)] 55 | elif i == 1: 56 | f5p = [[w-1-landmarks[0][j+5], landmarks[0][j]] for j in range(5)] 57 | elif i == 2: 58 | f5p = [[w-1-landmarks[0][j], h-1-landmarks[0][j+5]] for j in range(5)] 59 | elif i == 3: 60 | f5p = [[landmarks[0][j + 5], h-1-landmarks[0][j]] for j in range(5)] 61 | candi.append((box[0][4], f5p)) 62 | if len(candi) == 0: 63 | return None if not return_trans_inv else (None, None) 64 | while len(candi) > 1: 65 | if candi[0][0] > candi[1][0]: 66 | del candi[1] 67 | else: 68 | del candi[0] 69 | facial5points = candi[0][1] 70 | warped_face = warp_and_crop_face(np.array(ori_size), facial5points, self.refrence, crop_size=crop_size, 71 | return_trans_inv=return_trans_inv) 72 | if return_trans_inv: 73 | return Image.fromarray(warped_face[0]), warped_face[1] 74 | else: 75 | return Image.fromarray(warped_face) 76 | 77 | def align_multi(self, img, limit=None, min_face_size=64.0, crop_size=(112, 112)): 78 | boxes, landmarks = self.detect_faces(img, min_face_size) 79 | if len(landmarks) == 0: 80 | return None 81 | if limit: 82 | boxes = boxes[:limit] 83 | landmarks = landmarks[:limit] 84 | faces = [] 85 | for landmark in landmarks: 86 | facial5points = [[landmark[j],landmark[j+5]] for j in range(5)] 87 | warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=crop_size) 88 | faces.append(Image.fromarray(warped_face)) 89 | # return boxes, faces 90 | return faces 91 | 92 | def get_landmarks(self, img, min_face_size=32, crop_size=(256, 256), fast_mode=False, ori=[0,1,3]): 93 | ori_size = img.copy() 94 | h = img.size[1] 95 | w = img.size[0] 96 | sw = 640. if fast_mode else w 97 | scale = sw / w 98 | img = img.resize((int(w*scale), int(h*scale))) 99 | min_face_size = min_face_size if not fast_mode else sw/20 100 | candi = [] 101 | boxes = np.zeros([0, 5]) 102 | for i in ori: 103 | if i > 0: 104 | rimg = img.transpose(i+1) 105 | else: 106 | rimg = img 107 | box, landmarks = self.detect_faces(rimg, min_face_size=min_face_size, thresholds=[0.6, 0.7, 0.7]) 108 | landmarks /= scale 109 | if len(landmarks) == 0: 110 | continue 111 | if i == 0: 112 | f5p = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)] 113 | elif i == 1: 114 | f5p = [[w-1-landmarks[0][j+5], landmarks[0][j]] for j in range(5)] 115 | x1 = w-1-box[:, 1] 116 | y1 = box[:, 0] 117 | x2 = w-1-box[:, 3] 118 | y2 = box[:, 2] 119 | box[:, :4] = np.stack((x2, y1, x1, y2), axis=1) 120 | elif i == 2: 121 | f5p = [[w-1-landmarks[0][j], h-1-landmarks[0][j+5]] for j in range(5)] 122 | x1 = w-1-box[:, 0] 123 | y1 = h-1-box[:, 1] 124 | x2 = w-1-box[:, 2] 125 | y2 = h-1-box[:, 3] 126 | box[:, :4] = np.stack((x2, y2, x1, y1), axis=1) 127 | elif i == 3: 128 | f5p = [[landmarks[0][j + 5], h-1-landmarks[0][j]] for j in range(5)] 129 | x1 = box[:, 1] 130 | y1 = h-1-box[:, 0] 131 | x2 = box[:, 3] 132 | y2 = h-1-box[:, 2] 133 | box[:, :4] = np.stack((x1, y2, x2, y1), axis=1) 134 | candi.append(f5p) 135 | boxes = np.concatenate((boxes, box), axis=0) 136 | # pick = nms(boxes) 137 | faces = [] 138 | for idx, facial5points in enumerate(candi): 139 | # if idx not in pick: 140 | # continue 141 | warped_face = warp_and_crop_face(np.array(ori_size), facial5points, self.refrence, crop_size=crop_size, 142 | return_trans_inv=False) 143 | faces.append((warped_face, facial5points)) 144 | return faces 145 | 146 | def detect_faces(self, image, min_face_size=64.0, 147 | thresholds=[0.6, 0.7, 0.8], 148 | nms_thresholds=[0.7, 0.7, 0.7]): 149 | """ 150 | Arguments: 151 | image: an instance of PIL.Image. 152 | min_face_size: a float number. 153 | thresholds: a list of length 3. 154 | nms_thresholds: a list of length 3. 155 | 156 | Returns: 157 | two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], 158 | bounding boxes and facial landmarks. 159 | """ 160 | 161 | # BUILD AN IMAGE PYRAMID 162 | width, height = image.size 163 | min_length = min(height, width) 164 | 165 | min_detection_size = 12 166 | factor = 0.707 # sqrt(0.5) 167 | 168 | # scales for scaling the image 169 | scales = [] 170 | 171 | # scales the image so that 172 | # minimum size that we can detect equals to 173 | # minimum face size that we want to detect 174 | m = min_detection_size/min_face_size 175 | min_length *= m 176 | 177 | factor_count = 0 178 | while min_length > min_detection_size: 179 | scales.append(m*factor**factor_count) 180 | min_length *= factor 181 | factor_count += 1 182 | 183 | # STAGE 1 184 | 185 | # it will be returned 186 | bounding_boxes = [] 187 | 188 | with torch.no_grad(): 189 | # run P-Net on different scales 190 | for s in scales: 191 | boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0]) 192 | bounding_boxes.append(boxes) 193 | 194 | # collect boxes (and offsets, and scores) from different scales 195 | bounding_boxes = [i for i in bounding_boxes if i is not None] 196 | if len(bounding_boxes) == 0: 197 | return np.zeros([0]), np.zeros([0]) 198 | bounding_boxes = np.vstack(bounding_boxes) 199 | 200 | keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) 201 | bounding_boxes = bounding_boxes[keep] 202 | 203 | # use offsets predicted by pnet to transform bounding boxes 204 | bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) 205 | # shape [n_boxes, 5] 206 | 207 | bounding_boxes = convert_to_square(bounding_boxes) 208 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 209 | 210 | # STAGE 2 211 | 212 | img_boxes = get_image_boxes(bounding_boxes, image, size=24) 213 | img_boxes = torch.FloatTensor(img_boxes).to(device) 214 | 215 | output = self.rnet(img_boxes) 216 | offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4] 217 | probs = output[1].cpu().data.numpy() # shape [n_boxes, 2] 218 | 219 | keep = np.where(probs[:, 1] > thresholds[1])[0] 220 | bounding_boxes = bounding_boxes[keep] 221 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) 222 | offsets = offsets[keep] 223 | 224 | keep = nms(bounding_boxes, nms_thresholds[1]) 225 | bounding_boxes = bounding_boxes[keep] 226 | bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) 227 | bounding_boxes = convert_to_square(bounding_boxes) 228 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 229 | 230 | # STAGE 3 231 | 232 | img_boxes = get_image_boxes(bounding_boxes, image, size=48) 233 | if len(img_boxes) == 0: 234 | return np.zeros([0]), np.zeros([0]) 235 | img_boxes = torch.FloatTensor(img_boxes).to(device) 236 | output = self.onet(img_boxes) 237 | landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10] 238 | offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4] 239 | probs = output[2].cpu().data.numpy() # shape [n_boxes, 2] 240 | 241 | keep = np.where(probs[:, 1] > thresholds[2])[0] 242 | bounding_boxes = bounding_boxes[keep] 243 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) 244 | offsets = offsets[keep] 245 | landmarks = landmarks[keep] 246 | 247 | # compute landmark points 248 | width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 249 | height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 250 | xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] 251 | landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5] 252 | landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10] 253 | 254 | bounding_boxes = calibrate_box(bounding_boxes, offsets) 255 | keep = nms(bounding_boxes, nms_thresholds[2], mode='min') 256 | bounding_boxes = bounding_boxes[keep] 257 | landmarks = landmarks[keep] 258 | 259 | return bounding_boxes, landmarks 260 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | 4 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Dan Antoshchenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/README.md: -------------------------------------------------------------------------------- 1 | # MTCNN 2 | 3 | `pytorch` implementation of **inference stage** of face detection algorithm described in 4 | [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878). 5 | 6 | ## Example 7 | ![example of a face detection](images/example.png) 8 | 9 | ## How to use it 10 | Just download the repository and then do this 11 | ```python 12 | from src import detect_faces 13 | from PIL import Image 14 | 15 | image = Image.open('image.jpg') 16 | bounding_boxes, landmarks = detect_faces(image) 17 | ``` 18 | For examples see `test_on_images.ipynb`. 19 | 20 | ## Requirements 21 | * pytorch 0.2 22 | * Pillow, numpy 23 | 24 | ## Credit 25 | This implementation is heavily inspired by: 26 | * [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection) 27 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det1.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/caffe_models/det1.caffemodel -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det1.prototxt: -------------------------------------------------------------------------------- 1 | name: "PNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 12 6 | input_dim: 12 7 | 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 0 20 | } 21 | convolution_param { 22 | num_output: 10 23 | kernel_size: 3 24 | stride: 1 25 | weight_filler { 26 | type: "xavier" 27 | } 28 | bias_filler { 29 | type: "constant" 30 | value: 0 31 | } 32 | } 33 | } 34 | layer { 35 | name: "PReLU1" 36 | type: "PReLU" 37 | bottom: "conv1" 38 | top: "conv1" 39 | } 40 | layer { 41 | name: "pool1" 42 | type: "Pooling" 43 | bottom: "conv1" 44 | top: "pool1" 45 | pooling_param { 46 | pool: MAX 47 | kernel_size: 2 48 | stride: 2 49 | } 50 | } 51 | 52 | layer { 53 | name: "conv2" 54 | type: "Convolution" 55 | bottom: "pool1" 56 | top: "conv2" 57 | param { 58 | lr_mult: 1 59 | decay_mult: 1 60 | } 61 | param { 62 | lr_mult: 2 63 | decay_mult: 0 64 | } 65 | convolution_param { 66 | num_output: 16 67 | kernel_size: 3 68 | stride: 1 69 | weight_filler { 70 | type: "xavier" 71 | } 72 | bias_filler { 73 | type: "constant" 74 | value: 0 75 | } 76 | } 77 | } 78 | layer { 79 | name: "PReLU2" 80 | type: "PReLU" 81 | bottom: "conv2" 82 | top: "conv2" 83 | } 84 | 85 | layer { 86 | name: "conv3" 87 | type: "Convolution" 88 | bottom: "conv2" 89 | top: "conv3" 90 | param { 91 | lr_mult: 1 92 | decay_mult: 1 93 | } 94 | param { 95 | lr_mult: 2 96 | decay_mult: 0 97 | } 98 | convolution_param { 99 | num_output: 32 100 | kernel_size: 3 101 | stride: 1 102 | weight_filler { 103 | type: "xavier" 104 | } 105 | bias_filler { 106 | type: "constant" 107 | value: 0 108 | } 109 | } 110 | } 111 | layer { 112 | name: "PReLU3" 113 | type: "PReLU" 114 | bottom: "conv3" 115 | top: "conv3" 116 | } 117 | 118 | 119 | layer { 120 | name: "conv4-1" 121 | type: "Convolution" 122 | bottom: "conv3" 123 | top: "conv4-1" 124 | param { 125 | lr_mult: 1 126 | decay_mult: 1 127 | } 128 | param { 129 | lr_mult: 2 130 | decay_mult: 0 131 | } 132 | convolution_param { 133 | num_output: 2 134 | kernel_size: 1 135 | stride: 1 136 | weight_filler { 137 | type: "xavier" 138 | } 139 | bias_filler { 140 | type: "constant" 141 | value: 0 142 | } 143 | } 144 | } 145 | 146 | layer { 147 | name: "conv4-2" 148 | type: "Convolution" 149 | bottom: "conv3" 150 | top: "conv4-2" 151 | param { 152 | lr_mult: 1 153 | decay_mult: 1 154 | } 155 | param { 156 | lr_mult: 2 157 | decay_mult: 0 158 | } 159 | convolution_param { 160 | num_output: 4 161 | kernel_size: 1 162 | stride: 1 163 | weight_filler { 164 | type: "xavier" 165 | } 166 | bias_filler { 167 | type: "constant" 168 | value: 0 169 | } 170 | } 171 | } 172 | layer { 173 | name: "prob1" 174 | type: "Softmax" 175 | bottom: "conv4-1" 176 | top: "prob1" 177 | } 178 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det2.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/caffe_models/det2.caffemodel -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det2.prototxt: -------------------------------------------------------------------------------- 1 | name: "RNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 24 6 | input_dim: 24 7 | 8 | 9 | ########################## 10 | ###################### 11 | layer { 12 | name: "conv1" 13 | type: "Convolution" 14 | bottom: "data" 15 | top: "conv1" 16 | param { 17 | lr_mult: 0 18 | decay_mult: 0 19 | } 20 | param { 21 | lr_mult: 0 22 | decay_mult: 0 23 | } 24 | convolution_param { 25 | num_output: 28 26 | kernel_size: 3 27 | stride: 1 28 | weight_filler { 29 | type: "xavier" 30 | } 31 | bias_filler { 32 | type: "constant" 33 | value: 0 34 | } 35 | } 36 | } 37 | layer { 38 | name: "prelu1" 39 | type: "PReLU" 40 | bottom: "conv1" 41 | top: "conv1" 42 | propagate_down: true 43 | } 44 | layer { 45 | name: "pool1" 46 | type: "Pooling" 47 | bottom: "conv1" 48 | top: "pool1" 49 | pooling_param { 50 | pool: MAX 51 | kernel_size: 3 52 | stride: 2 53 | } 54 | } 55 | 56 | layer { 57 | name: "conv2" 58 | type: "Convolution" 59 | bottom: "pool1" 60 | top: "conv2" 61 | param { 62 | lr_mult: 0 63 | decay_mult: 0 64 | } 65 | param { 66 | lr_mult: 0 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 48 71 | kernel_size: 3 72 | stride: 1 73 | weight_filler { 74 | type: "xavier" 75 | } 76 | bias_filler { 77 | type: "constant" 78 | value: 0 79 | } 80 | } 81 | } 82 | layer { 83 | name: "prelu2" 84 | type: "PReLU" 85 | bottom: "conv2" 86 | top: "conv2" 87 | propagate_down: true 88 | } 89 | layer { 90 | name: "pool2" 91 | type: "Pooling" 92 | bottom: "conv2" 93 | top: "pool2" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | stride: 2 98 | } 99 | } 100 | #################################### 101 | 102 | ################################## 103 | layer { 104 | name: "conv3" 105 | type: "Convolution" 106 | bottom: "pool2" 107 | top: "conv3" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 64 118 | kernel_size: 2 119 | stride: 1 120 | weight_filler { 121 | type: "xavier" 122 | } 123 | bias_filler { 124 | type: "constant" 125 | value: 0 126 | } 127 | } 128 | } 129 | layer { 130 | name: "prelu3" 131 | type: "PReLU" 132 | bottom: "conv3" 133 | top: "conv3" 134 | propagate_down: true 135 | } 136 | ############################### 137 | 138 | ############################### 139 | 140 | layer { 141 | name: "conv4" 142 | type: "InnerProduct" 143 | bottom: "conv3" 144 | top: "conv4" 145 | param { 146 | lr_mult: 0 147 | decay_mult: 0 148 | } 149 | param { 150 | lr_mult: 0 151 | decay_mult: 0 152 | } 153 | inner_product_param { 154 | num_output: 128 155 | weight_filler { 156 | type: "xavier" 157 | } 158 | bias_filler { 159 | type: "constant" 160 | value: 0 161 | } 162 | } 163 | } 164 | layer { 165 | name: "prelu4" 166 | type: "PReLU" 167 | bottom: "conv4" 168 | top: "conv4" 169 | } 170 | 171 | layer { 172 | name: "conv5-1" 173 | type: "InnerProduct" 174 | bottom: "conv4" 175 | top: "conv5-1" 176 | param { 177 | lr_mult: 0 178 | decay_mult: 0 179 | } 180 | param { 181 | lr_mult: 0 182 | decay_mult: 0 183 | } 184 | inner_product_param { 185 | num_output: 2 186 | #kernel_size: 1 187 | #stride: 1 188 | weight_filler { 189 | type: "xavier" 190 | } 191 | bias_filler { 192 | type: "constant" 193 | value: 0 194 | } 195 | } 196 | } 197 | layer { 198 | name: "conv5-2" 199 | type: "InnerProduct" 200 | bottom: "conv4" 201 | top: "conv5-2" 202 | param { 203 | lr_mult: 1 204 | decay_mult: 1 205 | } 206 | param { 207 | lr_mult: 2 208 | decay_mult: 1 209 | } 210 | inner_product_param { 211 | num_output: 4 212 | #kernel_size: 1 213 | #stride: 1 214 | weight_filler { 215 | type: "xavier" 216 | } 217 | bias_filler { 218 | type: "constant" 219 | value: 0 220 | } 221 | } 222 | } 223 | layer { 224 | name: "prob1" 225 | type: "Softmax" 226 | bottom: "conv5-1" 227 | top: "prob1" 228 | } -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det3.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/caffe_models/det3.caffemodel -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det3.prototxt: -------------------------------------------------------------------------------- 1 | name: "ONet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 48 6 | input_dim: 48 7 | ################################## 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 1 20 | } 21 | convolution_param { 22 | num_output: 32 23 | kernel_size: 3 24 | stride: 1 25 | weight_filler { 26 | type: "xavier" 27 | } 28 | bias_filler { 29 | type: "constant" 30 | value: 0 31 | } 32 | } 33 | } 34 | layer { 35 | name: "prelu1" 36 | type: "PReLU" 37 | bottom: "conv1" 38 | top: "conv1" 39 | } 40 | layer { 41 | name: "pool1" 42 | type: "Pooling" 43 | bottom: "conv1" 44 | top: "pool1" 45 | pooling_param { 46 | pool: MAX 47 | kernel_size: 3 48 | stride: 2 49 | } 50 | } 51 | layer { 52 | name: "conv2" 53 | type: "Convolution" 54 | bottom: "pool1" 55 | top: "conv2" 56 | param { 57 | lr_mult: 1 58 | decay_mult: 1 59 | } 60 | param { 61 | lr_mult: 2 62 | decay_mult: 1 63 | } 64 | convolution_param { 65 | num_output: 64 66 | kernel_size: 3 67 | stride: 1 68 | weight_filler { 69 | type: "xavier" 70 | } 71 | bias_filler { 72 | type: "constant" 73 | value: 0 74 | } 75 | } 76 | } 77 | 78 | layer { 79 | name: "prelu2" 80 | type: "PReLU" 81 | bottom: "conv2" 82 | top: "conv2" 83 | } 84 | layer { 85 | name: "pool2" 86 | type: "Pooling" 87 | bottom: "conv2" 88 | top: "pool2" 89 | pooling_param { 90 | pool: MAX 91 | kernel_size: 3 92 | stride: 2 93 | } 94 | } 95 | 96 | layer { 97 | name: "conv3" 98 | type: "Convolution" 99 | bottom: "pool2" 100 | top: "conv3" 101 | param { 102 | lr_mult: 1 103 | decay_mult: 1 104 | } 105 | param { 106 | lr_mult: 2 107 | decay_mult: 1 108 | } 109 | convolution_param { 110 | num_output: 64 111 | kernel_size: 3 112 | weight_filler { 113 | type: "xavier" 114 | } 115 | bias_filler { 116 | type: "constant" 117 | value: 0 118 | } 119 | } 120 | } 121 | layer { 122 | name: "prelu3" 123 | type: "PReLU" 124 | bottom: "conv3" 125 | top: "conv3" 126 | } 127 | layer { 128 | name: "pool3" 129 | type: "Pooling" 130 | bottom: "conv3" 131 | top: "pool3" 132 | pooling_param { 133 | pool: MAX 134 | kernel_size: 2 135 | stride: 2 136 | } 137 | } 138 | layer { 139 | name: "conv4" 140 | type: "Convolution" 141 | bottom: "pool3" 142 | top: "conv4" 143 | param { 144 | lr_mult: 1 145 | decay_mult: 1 146 | } 147 | param { 148 | lr_mult: 2 149 | decay_mult: 1 150 | } 151 | convolution_param { 152 | num_output: 128 153 | kernel_size: 2 154 | weight_filler { 155 | type: "xavier" 156 | } 157 | bias_filler { 158 | type: "constant" 159 | value: 0 160 | } 161 | } 162 | } 163 | layer { 164 | name: "prelu4" 165 | type: "PReLU" 166 | bottom: "conv4" 167 | top: "conv4" 168 | } 169 | 170 | 171 | layer { 172 | name: "conv5" 173 | type: "InnerProduct" 174 | bottom: "conv4" 175 | top: "conv5" 176 | param { 177 | lr_mult: 1 178 | decay_mult: 1 179 | } 180 | param { 181 | lr_mult: 2 182 | decay_mult: 1 183 | } 184 | inner_product_param { 185 | #kernel_size: 3 186 | num_output: 256 187 | weight_filler { 188 | type: "xavier" 189 | } 190 | bias_filler { 191 | type: "constant" 192 | value: 0 193 | } 194 | } 195 | } 196 | 197 | layer { 198 | name: "drop5" 199 | type: "Dropout" 200 | bottom: "conv5" 201 | top: "conv5" 202 | dropout_param { 203 | dropout_ratio: 0.25 204 | } 205 | } 206 | layer { 207 | name: "prelu5" 208 | type: "PReLU" 209 | bottom: "conv5" 210 | top: "conv5" 211 | } 212 | 213 | 214 | layer { 215 | name: "conv6-1" 216 | type: "InnerProduct" 217 | bottom: "conv5" 218 | top: "conv6-1" 219 | param { 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | lr_mult: 2 225 | decay_mult: 1 226 | } 227 | inner_product_param { 228 | #kernel_size: 1 229 | num_output: 2 230 | weight_filler { 231 | type: "xavier" 232 | } 233 | bias_filler { 234 | type: "constant" 235 | value: 0 236 | } 237 | } 238 | } 239 | layer { 240 | name: "conv6-2" 241 | type: "InnerProduct" 242 | bottom: "conv5" 243 | top: "conv6-2" 244 | param { 245 | lr_mult: 1 246 | decay_mult: 1 247 | } 248 | param { 249 | lr_mult: 2 250 | decay_mult: 1 251 | } 252 | inner_product_param { 253 | #kernel_size: 1 254 | num_output: 4 255 | weight_filler { 256 | type: "xavier" 257 | } 258 | bias_filler { 259 | type: "constant" 260 | value: 0 261 | } 262 | } 263 | } 264 | layer { 265 | name: "conv6-3" 266 | type: "InnerProduct" 267 | bottom: "conv5" 268 | top: "conv6-3" 269 | param { 270 | lr_mult: 1 271 | decay_mult: 1 272 | } 273 | param { 274 | lr_mult: 2 275 | decay_mult: 1 276 | } 277 | inner_product_param { 278 | #kernel_size: 1 279 | num_output: 10 280 | weight_filler { 281 | type: "xavier" 282 | } 283 | bias_filler { 284 | type: "constant" 285 | value: 0 286 | } 287 | } 288 | } 289 | layer { 290 | name: "prob1" 291 | type: "Softmax" 292 | bottom: "conv6-1" 293 | top: "prob1" 294 | } 295 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det4.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/caffe_models/det4.caffemodel -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/caffe_models/det4.prototxt: -------------------------------------------------------------------------------- 1 | name: "LNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 15 5 | input_dim: 24 6 | input_dim: 24 7 | 8 | layer { 9 | name: "slicer_data" 10 | type: "Slice" 11 | bottom: "data" 12 | top: "data241" 13 | top: "data242" 14 | top: "data243" 15 | top: "data244" 16 | top: "data245" 17 | slice_param { 18 | axis: 1 19 | slice_point: 3 20 | slice_point: 6 21 | slice_point: 9 22 | slice_point: 12 23 | } 24 | } 25 | layer { 26 | name: "conv1_1" 27 | type: "Convolution" 28 | bottom: "data241" 29 | top: "conv1_1" 30 | param { 31 | lr_mult: 1 32 | decay_mult: 1 33 | } 34 | param { 35 | lr_mult: 2 36 | decay_mult: 1 37 | } 38 | convolution_param { 39 | num_output: 28 40 | kernel_size: 3 41 | stride: 1 42 | weight_filler { 43 | type: "xavier" 44 | } 45 | bias_filler { 46 | type: "constant" 47 | value: 0 48 | } 49 | } 50 | 51 | } 52 | layer { 53 | name: "prelu1_1" 54 | type: "PReLU" 55 | bottom: "conv1_1" 56 | top: "conv1_1" 57 | 58 | } 59 | layer { 60 | name: "pool1_1" 61 | type: "Pooling" 62 | bottom: "conv1_1" 63 | top: "pool1_1" 64 | pooling_param { 65 | pool: MAX 66 | kernel_size: 3 67 | stride: 2 68 | } 69 | } 70 | 71 | layer { 72 | name: "conv2_1" 73 | type: "Convolution" 74 | bottom: "pool1_1" 75 | top: "conv2_1" 76 | param { 77 | lr_mult: 1 78 | decay_mult: 1 79 | } 80 | param { 81 | lr_mult: 2 82 | decay_mult: 1 83 | } 84 | convolution_param { 85 | num_output: 48 86 | kernel_size: 3 87 | stride: 1 88 | weight_filler { 89 | type: "xavier" 90 | } 91 | bias_filler { 92 | type: "constant" 93 | value: 0 94 | } 95 | } 96 | 97 | } 98 | layer { 99 | name: "prelu2_1" 100 | type: "PReLU" 101 | bottom: "conv2_1" 102 | top: "conv2_1" 103 | } 104 | layer { 105 | name: "pool2_1" 106 | type: "Pooling" 107 | bottom: "conv2_1" 108 | top: "pool2_1" 109 | pooling_param { 110 | pool: MAX 111 | kernel_size: 3 112 | stride: 2 113 | } 114 | 115 | } 116 | layer { 117 | name: "conv3_1" 118 | type: "Convolution" 119 | bottom: "pool2_1" 120 | top: "conv3_1" 121 | param { 122 | lr_mult: 1 123 | decay_mult: 1 124 | } 125 | param { 126 | lr_mult: 2 127 | decay_mult: 1 128 | } 129 | convolution_param { 130 | num_output: 64 131 | kernel_size: 2 132 | stride: 1 133 | weight_filler { 134 | type: "xavier" 135 | } 136 | bias_filler { 137 | type: "constant" 138 | value: 0 139 | } 140 | } 141 | 142 | } 143 | layer { 144 | name: "prelu3_1" 145 | type: "PReLU" 146 | bottom: "conv3_1" 147 | top: "conv3_1" 148 | } 149 | ########################## 150 | layer { 151 | name: "conv1_2" 152 | type: "Convolution" 153 | bottom: "data242" 154 | top: "conv1_2" 155 | param { 156 | lr_mult: 1 157 | decay_mult: 1 158 | } 159 | param { 160 | lr_mult: 2 161 | decay_mult: 1 162 | } 163 | convolution_param { 164 | num_output: 28 165 | kernel_size: 3 166 | stride: 1 167 | weight_filler { 168 | type: "xavier" 169 | } 170 | bias_filler { 171 | type: "constant" 172 | value: 0 173 | } 174 | } 175 | 176 | } 177 | layer { 178 | name: "prelu1_2" 179 | type: "PReLU" 180 | bottom: "conv1_2" 181 | top: "conv1_2" 182 | 183 | } 184 | layer { 185 | name: "pool1_2" 186 | type: "Pooling" 187 | bottom: "conv1_2" 188 | top: "pool1_2" 189 | pooling_param { 190 | pool: MAX 191 | kernel_size: 3 192 | stride: 2 193 | } 194 | } 195 | 196 | layer { 197 | name: "conv2_2" 198 | type: "Convolution" 199 | bottom: "pool1_2" 200 | top: "conv2_2" 201 | param { 202 | lr_mult: 1 203 | decay_mult: 1 204 | } 205 | param { 206 | lr_mult: 2 207 | decay_mult: 1 208 | } 209 | convolution_param { 210 | num_output: 48 211 | kernel_size: 3 212 | stride: 1 213 | weight_filler { 214 | type: "xavier" 215 | } 216 | bias_filler { 217 | type: "constant" 218 | value: 0 219 | } 220 | } 221 | 222 | } 223 | layer { 224 | name: "prelu2_2" 225 | type: "PReLU" 226 | bottom: "conv2_2" 227 | top: "conv2_2" 228 | } 229 | layer { 230 | name: "pool2_2" 231 | type: "Pooling" 232 | bottom: "conv2_2" 233 | top: "pool2_2" 234 | pooling_param { 235 | pool: MAX 236 | kernel_size: 3 237 | stride: 2 238 | } 239 | 240 | } 241 | layer { 242 | name: "conv3_2" 243 | type: "Convolution" 244 | bottom: "pool2_2" 245 | top: "conv3_2" 246 | param { 247 | lr_mult: 1 248 | decay_mult: 1 249 | } 250 | param { 251 | lr_mult: 2 252 | decay_mult: 1 253 | } 254 | convolution_param { 255 | num_output: 64 256 | kernel_size: 2 257 | stride: 1 258 | weight_filler { 259 | type: "xavier" 260 | } 261 | bias_filler { 262 | type: "constant" 263 | value: 0 264 | } 265 | } 266 | 267 | } 268 | layer { 269 | name: "prelu3_2" 270 | type: "PReLU" 271 | bottom: "conv3_2" 272 | top: "conv3_2" 273 | } 274 | ########################## 275 | ########################## 276 | layer { 277 | name: "conv1_3" 278 | type: "Convolution" 279 | bottom: "data243" 280 | top: "conv1_3" 281 | param { 282 | lr_mult: 1 283 | decay_mult: 1 284 | } 285 | param { 286 | lr_mult: 2 287 | decay_mult: 1 288 | } 289 | convolution_param { 290 | num_output: 28 291 | kernel_size: 3 292 | stride: 1 293 | weight_filler { 294 | type: "xavier" 295 | } 296 | bias_filler { 297 | type: "constant" 298 | value: 0 299 | } 300 | } 301 | 302 | } 303 | layer { 304 | name: "prelu1_3" 305 | type: "PReLU" 306 | bottom: "conv1_3" 307 | top: "conv1_3" 308 | 309 | } 310 | layer { 311 | name: "pool1_3" 312 | type: "Pooling" 313 | bottom: "conv1_3" 314 | top: "pool1_3" 315 | pooling_param { 316 | pool: MAX 317 | kernel_size: 3 318 | stride: 2 319 | } 320 | } 321 | 322 | layer { 323 | name: "conv2_3" 324 | type: "Convolution" 325 | bottom: "pool1_3" 326 | top: "conv2_3" 327 | param { 328 | lr_mult: 1 329 | decay_mult: 1 330 | } 331 | param { 332 | lr_mult: 2 333 | decay_mult: 1 334 | } 335 | convolution_param { 336 | num_output: 48 337 | kernel_size: 3 338 | stride: 1 339 | weight_filler { 340 | type: "xavier" 341 | } 342 | bias_filler { 343 | type: "constant" 344 | value: 0 345 | } 346 | } 347 | 348 | } 349 | layer { 350 | name: "prelu2_3" 351 | type: "PReLU" 352 | bottom: "conv2_3" 353 | top: "conv2_3" 354 | } 355 | layer { 356 | name: "pool2_3" 357 | type: "Pooling" 358 | bottom: "conv2_3" 359 | top: "pool2_3" 360 | pooling_param { 361 | pool: MAX 362 | kernel_size: 3 363 | stride: 2 364 | } 365 | 366 | } 367 | layer { 368 | name: "conv3_3" 369 | type: "Convolution" 370 | bottom: "pool2_3" 371 | top: "conv3_3" 372 | param { 373 | lr_mult: 1 374 | decay_mult: 1 375 | } 376 | param { 377 | lr_mult: 2 378 | decay_mult: 1 379 | } 380 | convolution_param { 381 | num_output: 64 382 | kernel_size: 2 383 | stride: 1 384 | weight_filler { 385 | type: "xavier" 386 | } 387 | bias_filler { 388 | type: "constant" 389 | value: 0 390 | } 391 | } 392 | 393 | } 394 | layer { 395 | name: "prelu3_3" 396 | type: "PReLU" 397 | bottom: "conv3_3" 398 | top: "conv3_3" 399 | } 400 | ########################## 401 | ########################## 402 | layer { 403 | name: "conv1_4" 404 | type: "Convolution" 405 | bottom: "data244" 406 | top: "conv1_4" 407 | param { 408 | lr_mult: 1 409 | decay_mult: 1 410 | } 411 | param { 412 | lr_mult: 2 413 | decay_mult: 1 414 | } 415 | convolution_param { 416 | num_output: 28 417 | kernel_size: 3 418 | stride: 1 419 | weight_filler { 420 | type: "xavier" 421 | } 422 | bias_filler { 423 | type: "constant" 424 | value: 0 425 | } 426 | } 427 | 428 | } 429 | layer { 430 | name: "prelu1_4" 431 | type: "PReLU" 432 | bottom: "conv1_4" 433 | top: "conv1_4" 434 | 435 | } 436 | layer { 437 | name: "pool1_4" 438 | type: "Pooling" 439 | bottom: "conv1_4" 440 | top: "pool1_4" 441 | pooling_param { 442 | pool: MAX 443 | kernel_size: 3 444 | stride: 2 445 | } 446 | } 447 | 448 | layer { 449 | name: "conv2_4" 450 | type: "Convolution" 451 | bottom: "pool1_4" 452 | top: "conv2_4" 453 | param { 454 | lr_mult: 1 455 | decay_mult: 1 456 | } 457 | param { 458 | lr_mult: 2 459 | decay_mult: 1 460 | } 461 | convolution_param { 462 | num_output: 48 463 | kernel_size: 3 464 | stride: 1 465 | weight_filler { 466 | type: "xavier" 467 | } 468 | bias_filler { 469 | type: "constant" 470 | value: 0 471 | } 472 | } 473 | 474 | } 475 | layer { 476 | name: "prelu2_4" 477 | type: "PReLU" 478 | bottom: "conv2_4" 479 | top: "conv2_4" 480 | } 481 | layer { 482 | name: "pool2_4" 483 | type: "Pooling" 484 | bottom: "conv2_4" 485 | top: "pool2_4" 486 | pooling_param { 487 | pool: MAX 488 | kernel_size: 3 489 | stride: 2 490 | } 491 | 492 | } 493 | layer { 494 | name: "conv3_4" 495 | type: "Convolution" 496 | bottom: "pool2_4" 497 | top: "conv3_4" 498 | param { 499 | lr_mult: 1 500 | decay_mult: 1 501 | } 502 | param { 503 | lr_mult: 2 504 | decay_mult: 1 505 | } 506 | convolution_param { 507 | num_output: 64 508 | kernel_size: 2 509 | stride: 1 510 | weight_filler { 511 | type: "xavier" 512 | } 513 | bias_filler { 514 | type: "constant" 515 | value: 0 516 | } 517 | } 518 | 519 | } 520 | layer { 521 | name: "prelu3_4" 522 | type: "PReLU" 523 | bottom: "conv3_4" 524 | top: "conv3_4" 525 | } 526 | ########################## 527 | ########################## 528 | layer { 529 | name: "conv1_5" 530 | type: "Convolution" 531 | bottom: "data245" 532 | top: "conv1_5" 533 | param { 534 | lr_mult: 1 535 | decay_mult: 1 536 | } 537 | param { 538 | lr_mult: 2 539 | decay_mult: 1 540 | } 541 | convolution_param { 542 | num_output: 28 543 | kernel_size: 3 544 | stride: 1 545 | weight_filler { 546 | type: "xavier" 547 | } 548 | bias_filler { 549 | type: "constant" 550 | value: 0 551 | } 552 | } 553 | 554 | } 555 | layer { 556 | name: "prelu1_5" 557 | type: "PReLU" 558 | bottom: "conv1_5" 559 | top: "conv1_5" 560 | 561 | } 562 | layer { 563 | name: "pool1_5" 564 | type: "Pooling" 565 | bottom: "conv1_5" 566 | top: "pool1_5" 567 | pooling_param { 568 | pool: MAX 569 | kernel_size: 3 570 | stride: 2 571 | } 572 | } 573 | 574 | layer { 575 | name: "conv2_5" 576 | type: "Convolution" 577 | bottom: "pool1_5" 578 | top: "conv2_5" 579 | param { 580 | lr_mult: 1 581 | decay_mult: 1 582 | } 583 | param { 584 | lr_mult: 2 585 | decay_mult: 1 586 | } 587 | convolution_param { 588 | num_output: 48 589 | kernel_size: 3 590 | stride: 1 591 | weight_filler { 592 | type: "xavier" 593 | } 594 | bias_filler { 595 | type: "constant" 596 | value: 0 597 | } 598 | } 599 | 600 | } 601 | layer { 602 | name: "prelu2_5" 603 | type: "PReLU" 604 | bottom: "conv2_5" 605 | top: "conv2_5" 606 | } 607 | layer { 608 | name: "pool2_5" 609 | type: "Pooling" 610 | bottom: "conv2_5" 611 | top: "pool2_5" 612 | pooling_param { 613 | pool: MAX 614 | kernel_size: 3 615 | stride: 2 616 | } 617 | 618 | } 619 | layer { 620 | name: "conv3_5" 621 | type: "Convolution" 622 | bottom: "pool2_5" 623 | top: "conv3_5" 624 | param { 625 | lr_mult: 1 626 | decay_mult: 1 627 | } 628 | param { 629 | lr_mult: 2 630 | decay_mult: 1 631 | } 632 | convolution_param { 633 | num_output: 64 634 | kernel_size: 2 635 | stride: 1 636 | weight_filler { 637 | type: "xavier" 638 | } 639 | bias_filler { 640 | type: "constant" 641 | value: 0 642 | } 643 | } 644 | 645 | } 646 | layer { 647 | name: "prelu3_5" 648 | type: "PReLU" 649 | bottom: "conv3_5" 650 | top: "conv3_5" 651 | } 652 | ########################## 653 | layer { 654 | name: "concat" 655 | bottom: "conv3_1" 656 | bottom: "conv3_2" 657 | bottom: "conv3_3" 658 | bottom: "conv3_4" 659 | bottom: "conv3_5" 660 | top: "conv3" 661 | type: "Concat" 662 | concat_param { 663 | axis: 1 664 | } 665 | } 666 | ########################## 667 | layer { 668 | name: "fc4" 669 | type: "InnerProduct" 670 | bottom: "conv3" 671 | top: "fc4" 672 | param { 673 | lr_mult: 1 674 | decay_mult: 1 675 | } 676 | param { 677 | lr_mult: 2 678 | decay_mult: 1 679 | } 680 | inner_product_param { 681 | num_output: 256 682 | weight_filler { 683 | type: "xavier" 684 | } 685 | bias_filler { 686 | type: "constant" 687 | value: 0 688 | } 689 | } 690 | 691 | } 692 | layer { 693 | name: "prelu4" 694 | type: "PReLU" 695 | bottom: "fc4" 696 | top: "fc4" 697 | } 698 | ############################ 699 | layer { 700 | name: "fc4_1" 701 | type: "InnerProduct" 702 | bottom: "fc4" 703 | top: "fc4_1" 704 | param { 705 | lr_mult: 1 706 | decay_mult: 1 707 | } 708 | param { 709 | lr_mult: 2 710 | decay_mult: 1 711 | } 712 | inner_product_param { 713 | num_output: 64 714 | weight_filler { 715 | type: "xavier" 716 | } 717 | bias_filler { 718 | type: "constant" 719 | value: 0 720 | } 721 | } 722 | 723 | } 724 | layer { 725 | name: "prelu4_1" 726 | type: "PReLU" 727 | bottom: "fc4_1" 728 | top: "fc4_1" 729 | } 730 | layer { 731 | name: "fc5_1" 732 | type: "InnerProduct" 733 | bottom: "fc4_1" 734 | top: "fc5_1" 735 | param { 736 | lr_mult: 1 737 | decay_mult: 1 738 | } 739 | param { 740 | lr_mult: 2 741 | decay_mult: 1 742 | } 743 | inner_product_param { 744 | num_output: 2 745 | weight_filler { 746 | type: "xavier" 747 | #type: "constant" 748 | #value: 0 749 | } 750 | bias_filler { 751 | type: "constant" 752 | value: 0 753 | } 754 | } 755 | } 756 | 757 | 758 | ######################### 759 | layer { 760 | name: "fc4_2" 761 | type: "InnerProduct" 762 | bottom: "fc4" 763 | top: "fc4_2" 764 | param { 765 | lr_mult: 1 766 | decay_mult: 1 767 | } 768 | param { 769 | lr_mult: 2 770 | decay_mult: 1 771 | } 772 | inner_product_param { 773 | num_output: 64 774 | weight_filler { 775 | type: "xavier" 776 | } 777 | bias_filler { 778 | type: "constant" 779 | value: 0 780 | } 781 | } 782 | 783 | } 784 | layer { 785 | name: "prelu4_2" 786 | type: "PReLU" 787 | bottom: "fc4_2" 788 | top: "fc4_2" 789 | } 790 | layer { 791 | name: "fc5_2" 792 | type: "InnerProduct" 793 | bottom: "fc4_2" 794 | top: "fc5_2" 795 | param { 796 | lr_mult: 1 797 | decay_mult: 1 798 | } 799 | param { 800 | lr_mult: 2 801 | decay_mult: 1 802 | } 803 | inner_product_param { 804 | num_output: 2 805 | weight_filler { 806 | type: "xavier" 807 | #type: "constant" 808 | #value: 0 809 | } 810 | bias_filler { 811 | type: "constant" 812 | value: 0 813 | } 814 | } 815 | } 816 | 817 | ######################### 818 | layer { 819 | name: "fc4_3" 820 | type: "InnerProduct" 821 | bottom: "fc4" 822 | top: "fc4_3" 823 | param { 824 | lr_mult: 1 825 | decay_mult: 1 826 | } 827 | param { 828 | lr_mult: 2 829 | decay_mult: 1 830 | } 831 | inner_product_param { 832 | num_output: 64 833 | weight_filler { 834 | type: "xavier" 835 | } 836 | bias_filler { 837 | type: "constant" 838 | value: 0 839 | } 840 | } 841 | 842 | } 843 | layer { 844 | name: "prelu4_3" 845 | type: "PReLU" 846 | bottom: "fc4_3" 847 | top: "fc4_3" 848 | } 849 | layer { 850 | name: "fc5_3" 851 | type: "InnerProduct" 852 | bottom: "fc4_3" 853 | top: "fc5_3" 854 | param { 855 | lr_mult: 1 856 | decay_mult: 1 857 | } 858 | param { 859 | lr_mult: 2 860 | decay_mult: 1 861 | } 862 | inner_product_param { 863 | num_output: 2 864 | weight_filler { 865 | type: "xavier" 866 | #type: "constant" 867 | #value: 0 868 | } 869 | bias_filler { 870 | type: "constant" 871 | value: 0 872 | } 873 | } 874 | } 875 | 876 | ######################### 877 | layer { 878 | name: "fc4_4" 879 | type: "InnerProduct" 880 | bottom: "fc4" 881 | top: "fc4_4" 882 | param { 883 | lr_mult: 1 884 | decay_mult: 1 885 | } 886 | param { 887 | lr_mult: 2 888 | decay_mult: 1 889 | } 890 | inner_product_param { 891 | num_output: 64 892 | weight_filler { 893 | type: "xavier" 894 | } 895 | bias_filler { 896 | type: "constant" 897 | value: 0 898 | } 899 | } 900 | 901 | } 902 | layer { 903 | name: "prelu4_4" 904 | type: "PReLU" 905 | bottom: "fc4_4" 906 | top: "fc4_4" 907 | } 908 | layer { 909 | name: "fc5_4" 910 | type: "InnerProduct" 911 | bottom: "fc4_4" 912 | top: "fc5_4" 913 | param { 914 | lr_mult: 1 915 | decay_mult: 1 916 | } 917 | param { 918 | lr_mult: 2 919 | decay_mult: 1 920 | } 921 | inner_product_param { 922 | num_output: 2 923 | weight_filler { 924 | type: "xavier" 925 | #type: "constant" 926 | #value: 0 927 | } 928 | bias_filler { 929 | type: "constant" 930 | value: 0 931 | } 932 | } 933 | } 934 | 935 | ######################### 936 | layer { 937 | name: "fc4_5" 938 | type: "InnerProduct" 939 | bottom: "fc4" 940 | top: "fc4_5" 941 | param { 942 | lr_mult: 1 943 | decay_mult: 1 944 | } 945 | param { 946 | lr_mult: 2 947 | decay_mult: 1 948 | } 949 | inner_product_param { 950 | num_output: 64 951 | weight_filler { 952 | type: "xavier" 953 | } 954 | bias_filler { 955 | type: "constant" 956 | value: 0 957 | } 958 | } 959 | 960 | } 961 | layer { 962 | name: "prelu4_5" 963 | type: "PReLU" 964 | bottom: "fc4_5" 965 | top: "fc4_5" 966 | } 967 | layer { 968 | name: "fc5_5" 969 | type: "InnerProduct" 970 | bottom: "fc4_5" 971 | top: "fc5_5" 972 | param { 973 | lr_mult: 1 974 | decay_mult: 1 975 | } 976 | param { 977 | lr_mult: 2 978 | decay_mult: 1 979 | } 980 | inner_product_param { 981 | num_output: 2 982 | weight_filler { 983 | type: "xavier" 984 | #type: "constant" 985 | #value: 0 986 | } 987 | bias_filler { 988 | type: "constant" 989 | value: 0 990 | } 991 | } 992 | } 993 | 994 | ######################### 995 | 996 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/extract_weights_from_caffe_models.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import numpy as np 3 | 4 | """ 5 | The purpose of this script is to convert pretrained weights taken from 6 | official implementation here: 7 | https://github.com/kpzhang93/MTCNN_face_detection_alignment/tree/master/code/codes/MTCNNv2 8 | to required format. 9 | 10 | In a nutshell, it just renames and transposes some of the weights. 11 | You don't have to use this script because weights are already in `src/weights`. 12 | """ 13 | 14 | 15 | def get_all_weights(net): 16 | all_weights = {} 17 | for p in net.params: 18 | if 'conv' in p: 19 | name = 'features.' + p 20 | if '-' in p: 21 | s = list(p) 22 | s[-2] = '_' 23 | s = ''.join(s) 24 | all_weights[s + '.weight'] = net.params[p][0].data 25 | all_weights[s + '.bias'] = net.params[p][1].data 26 | elif len(net.params[p][0].data.shape) == 4: 27 | all_weights[name + '.weight'] = net.params[p][0].data.transpose((0, 1, 3, 2)) 28 | all_weights[name + '.bias'] = net.params[p][1].data 29 | else: 30 | all_weights[name + '.weight'] = net.params[p][0].data 31 | all_weights[name + '.bias'] = net.params[p][1].data 32 | elif 'prelu' in p.lower(): 33 | all_weights['features.' + p.lower() + '.weight'] = net.params[p][0].data 34 | return all_weights 35 | 36 | 37 | # P-Net 38 | net = caffe.Net('caffe_models/det1.prototxt', 'caffe_models/det1.caffemodel', caffe.TEST) 39 | np.save('src/weights/pnet.npy', get_all_weights(net)) 40 | 41 | # R-Net 42 | net = caffe.Net('caffe_models/det2.prototxt', 'caffe_models/det2.caffemodel', caffe.TEST) 43 | np.save('src/weights/rnet.npy', get_all_weights(net)) 44 | 45 | # O-Net 46 | net = caffe.Net('caffe_models/det3.prototxt', 'caffe_models/det3.caffemodel', caffe.TEST) 47 | np.save('src/weights/onet.npy', get_all_weights(net)) 48 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/example.png -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/jf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/jf.jpg -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/office1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/office1.jpg -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/office2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/office2.jpg -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/office3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/office3.jpg -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/office4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/office4.jpg -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/images/office5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/images/office5.jpg -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .visualization_utils import show_bboxes 2 | from .detector import detect_faces 3 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/align_trans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Apr 24 15:43:29 2017 4 | @author: zhaoy 5 | """ 6 | import numpy as np 7 | import cv2 8 | 9 | # from scipy.linalg import lstsq 10 | # from scipy.ndimage import geometric_transform # , map_coordinates 11 | 12 | from mtcnn_pytorch.src.matlab_cp2tform import get_similarity_transform_for_cv2 13 | 14 | # reference facial points, a list of coordinates (x,y) 15 | REFERENCE_FACIAL_POINTS = [ 16 | [30.29459953, 51.69630051], 17 | [65.53179932, 51.50139999], 18 | [48.02519989, 71.73660278], 19 | [33.54930115, 92.3655014], 20 | [62.72990036, 92.20410156] 21 | ] 22 | 23 | DEFAULT_CROP_SIZE = (96, 112) 24 | 25 | 26 | class FaceWarpException(Exception): 27 | def __str__(self): 28 | return 'In File {}:{}'.format( 29 | __file__, super.__str__(self)) 30 | 31 | 32 | def get_reference_facial_points(output_size=None, 33 | inner_padding_factor=0.0, 34 | outer_padding=(0, 0), 35 | default_square=False): 36 | """ 37 | Function: 38 | ---------- 39 | get reference 5 key points according to crop settings: 40 | 0. Set default crop_size: 41 | if default_square: 42 | crop_size = (112, 112) 43 | else: 44 | crop_size = (96, 112) 45 | 1. Pad the crop_size by inner_padding_factor in each side; 46 | 2. Resize crop_size into (output_size - outer_padding*2), 47 | pad into output_size with outer_padding; 48 | 3. Output reference_5point; 49 | Parameters: 50 | ---------- 51 | @output_size: (w, h) or None 52 | size of aligned face image 53 | @inner_padding_factor: (w_factor, h_factor) 54 | padding factor for inner (w, h) 55 | @outer_padding: (w_pad, h_pad) 56 | each row is a pair of coordinates (x, y) 57 | @default_square: True or False 58 | if True: 59 | default crop_size = (112, 112) 60 | else: 61 | default crop_size = (96, 112); 62 | !!! make sure, if output_size is not None: 63 | (output_size - outer_padding) 64 | = some_scale * (default crop_size * (1.0 + inner_padding_factor)) 65 | Returns: 66 | ---------- 67 | @reference_5point: 5x2 np.array 68 | each row is a pair of transformed coordinates (x, y) 69 | """ 70 | #print('\n===> get_reference_facial_points():') 71 | 72 | #print('---> Params:') 73 | #print(' output_size: ', output_size) 74 | #print(' inner_padding_factor: ', inner_padding_factor) 75 | #print(' outer_padding:', outer_padding) 76 | #print(' default_square: ', default_square) 77 | 78 | tmp_5pts = np.array(REFERENCE_FACIAL_POINTS) 79 | tmp_crop_size = np.array(DEFAULT_CROP_SIZE) 80 | 81 | # 0) make the inner region a square 82 | if default_square: 83 | size_diff = max(tmp_crop_size) - tmp_crop_size 84 | tmp_5pts += size_diff / 2 85 | tmp_crop_size += size_diff 86 | 87 | #print('---> default:') 88 | #print(' crop_size = ', tmp_crop_size) 89 | #print(' reference_5pts = ', tmp_5pts) 90 | 91 | if (output_size and 92 | output_size[0] == tmp_crop_size[0] and 93 | output_size[1] == tmp_crop_size[1]): 94 | #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size)) 95 | return tmp_5pts 96 | 97 | if (inner_padding_factor == 0 and 98 | outer_padding == (0, 0)): 99 | if output_size is None: 100 | #print('No paddings to do: return default reference points') 101 | return tmp_5pts 102 | else: 103 | raise FaceWarpException( 104 | 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size)) 105 | 106 | # check output size 107 | if not (0 <= inner_padding_factor <= 1.0): 108 | raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)') 109 | 110 | if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) 111 | and output_size is None): 112 | output_size = tmp_crop_size * \ 113 | (1 + inner_padding_factor * 2).astype(np.int32) 114 | output_size += np.array(outer_padding) 115 | #print(' deduced from paddings, output_size = ', output_size) 116 | 117 | if not (outer_padding[0] < output_size[0] 118 | and outer_padding[1] < output_size[1]): 119 | raise FaceWarpException('Not (outer_padding[0] < output_size[0]' 120 | 'and outer_padding[1] < output_size[1])') 121 | 122 | # 1) pad the inner region according inner_padding_factor 123 | #print('---> STEP1: pad the inner region according inner_padding_factor') 124 | if inner_padding_factor > 0: 125 | size_diff = tmp_crop_size * inner_padding_factor * 2 126 | tmp_5pts += size_diff / 2 127 | tmp_crop_size += np.round(size_diff).astype(np.int32) 128 | 129 | #print(' crop_size = ', tmp_crop_size) 130 | #print(' reference_5pts = ', tmp_5pts) 131 | 132 | # 2) resize the padded inner region 133 | #print('---> STEP2: resize the padded inner region') 134 | size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2 135 | #print(' crop_size = ', tmp_crop_size) 136 | #print(' size_bf_outer_pad = ', size_bf_outer_pad) 137 | 138 | if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]: 139 | raise FaceWarpException('Must have (output_size - outer_padding)' 140 | '= some_scale * (crop_size * (1.0 + inner_padding_factor)') 141 | 142 | scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0] 143 | #print(' resize scale_factor = ', scale_factor) 144 | tmp_5pts = tmp_5pts * scale_factor 145 | # size_diff = tmp_crop_size * (scale_factor - min(scale_factor)) 146 | # tmp_5pts = tmp_5pts + size_diff / 2 147 | tmp_crop_size = size_bf_outer_pad 148 | #print(' crop_size = ', tmp_crop_size) 149 | #print(' reference_5pts = ', tmp_5pts) 150 | 151 | # 3) add outer_padding to make output_size 152 | reference_5point = tmp_5pts + np.array(outer_padding) 153 | tmp_crop_size = output_size 154 | #print('---> STEP3: add outer_padding to make output_size') 155 | #print(' crop_size = ', tmp_crop_size) 156 | #print(' reference_5pts = ', tmp_5pts) 157 | 158 | #print('===> end get_reference_facial_points\n') 159 | 160 | return reference_5point 161 | 162 | 163 | def get_affine_transform_matrix(src_pts, dst_pts): 164 | """ 165 | Function: 166 | ---------- 167 | get affine transform matrix 'tfm' from src_pts to dst_pts 168 | Parameters: 169 | ---------- 170 | @src_pts: Kx2 np.array 171 | source points matrix, each row is a pair of coordinates (x, y) 172 | @dst_pts: Kx2 np.array 173 | destination points matrix, each row is a pair of coordinates (x, y) 174 | Returns: 175 | ---------- 176 | @tfm: 2x3 np.array 177 | transform matrix from src_pts to dst_pts 178 | """ 179 | 180 | tfm = np.float32([[1, 0, 0], [0, 1, 0]]) 181 | n_pts = src_pts.shape[0] 182 | ones = np.ones((n_pts, 1), src_pts.dtype) 183 | src_pts_ = np.hstack([src_pts, ones]) 184 | dst_pts_ = np.hstack([dst_pts, ones]) 185 | 186 | # #print(('src_pts_:\n' + str(src_pts_)) 187 | # #print(('dst_pts_:\n' + str(dst_pts_)) 188 | 189 | A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_) 190 | 191 | # #print(('np.linalg.lstsq return A: \n' + str(A)) 192 | # #print(('np.linalg.lstsq return res: \n' + str(res)) 193 | # #print(('np.linalg.lstsq return rank: \n' + str(rank)) 194 | # #print(('np.linalg.lstsq return s: \n' + str(s)) 195 | 196 | if rank == 3: 197 | tfm = np.float32([ 198 | [A[0, 0], A[1, 0], A[2, 0]], 199 | [A[0, 1], A[1, 1], A[2, 1]] 200 | ]) 201 | elif rank == 2: 202 | tfm = np.float32([ 203 | [A[0, 0], A[1, 0], 0], 204 | [A[0, 1], A[1, 1], 0] 205 | ]) 206 | 207 | return tfm 208 | 209 | 210 | def warp_and_crop_face(src_img, 211 | facial_pts, 212 | reference_pts=None, 213 | crop_size=(96, 112), 214 | align_type='smilarity', return_trans_inv=False): 215 | """ 216 | Function: 217 | ---------- 218 | apply affine transform 'trans' to uv 219 | Parameters: 220 | ---------- 221 | @src_img: 3x3 np.array 222 | input image 223 | @facial_pts: could be 224 | 1)a list of K coordinates (x,y) 225 | or 226 | 2) Kx2 or 2xK np.array 227 | each row or col is a pair of coordinates (x, y) 228 | @reference_pts: could be 229 | 1) a list of K coordinates (x,y) 230 | or 231 | 2) Kx2 or 2xK np.array 232 | each row or col is a pair of coordinates (x, y) 233 | or 234 | 3) None 235 | if None, use default reference facial points 236 | @crop_size: (w, h) 237 | output face image size 238 | @align_type: transform type, could be one of 239 | 1) 'similarity': use similarity transform 240 | 2) 'cv2_affine': use the first 3 points to do affine transform, 241 | by calling cv2.getAffineTransform() 242 | 3) 'affine': use all points to do affine transform 243 | Returns: 244 | ---------- 245 | @face_img: output face image with size (w, h) = @crop_size 246 | """ 247 | 248 | if reference_pts is None: 249 | if crop_size[0] == 96 and crop_size[1] == 112: 250 | reference_pts = REFERENCE_FACIAL_POINTS 251 | else: 252 | default_square = False 253 | inner_padding_factor = 0 254 | outer_padding = (0, 0) 255 | output_size = crop_size 256 | 257 | reference_pts = get_reference_facial_points(output_size, 258 | inner_padding_factor, 259 | outer_padding, 260 | default_square) 261 | 262 | ref_pts = np.float32(reference_pts) 263 | ref_pts = (ref_pts - 112/2)*0.85 + 112/2 264 | ref_pts *= crop_size[0]/112. 265 | ref_pts_shp = ref_pts.shape 266 | if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2: 267 | raise FaceWarpException( 268 | 'reference_pts.shape must be (K,2) or (2,K) and K>2') 269 | 270 | if ref_pts_shp[0] == 2: 271 | ref_pts = ref_pts.T 272 | 273 | src_pts = np.float32(facial_pts) 274 | src_pts_shp = src_pts.shape 275 | if max(src_pts_shp) < 3 or min(src_pts_shp) != 2: 276 | raise FaceWarpException( 277 | 'facial_pts.shape must be (K,2) or (2,K) and K>2') 278 | 279 | if src_pts_shp[0] == 2: 280 | src_pts = src_pts.T 281 | 282 | # #print('--->src_pts:\n', src_pts 283 | # #print('--->ref_pts\n', ref_pts 284 | 285 | if src_pts.shape != ref_pts.shape: 286 | raise FaceWarpException( 287 | 'facial_pts and reference_pts must have the same shape') 288 | 289 | if align_type is 'cv2_affine': 290 | tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3]) 291 | # #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm)) 292 | elif align_type is 'affine': 293 | tfm = get_affine_transform_matrix(src_pts, ref_pts) 294 | # #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm)) 295 | else: 296 | tfm, tfm_inv = get_similarity_transform_for_cv2(src_pts, ref_pts) 297 | # #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm)) 298 | 299 | # #print('--->Transform matrix: ' 300 | # #print(('type(tfm):' + str(type(tfm))) 301 | # #print(('tfm.dtype:' + str(tfm.dtype)) 302 | # #print( tfm 303 | 304 | face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1])) 305 | 306 | if return_trans_inv: 307 | return face_img, tfm_inv 308 | else: 309 | return face_img 310 | 311 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/box_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | 5 | def nms(boxes, overlap_threshold=0.5, mode='union'): 6 | """Non-maximum suppression. 7 | 8 | Arguments: 9 | boxes: a float numpy array of shape [n, 5], 10 | where each row is (xmin, ymin, xmax, ymax, score). 11 | overlap_threshold: a float number. 12 | mode: 'union' or 'min'. 13 | 14 | Returns: 15 | list with indices of the selected boxes 16 | """ 17 | 18 | # if there are no boxes, return the empty list 19 | if len(boxes) == 0: 20 | return [] 21 | 22 | # list of picked indices 23 | pick = [] 24 | 25 | # grab the coordinates of the bounding boxes 26 | x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] 27 | 28 | area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0) 29 | ids = np.argsort(score) # in increasing order 30 | 31 | while len(ids) > 0: 32 | 33 | # grab index of the largest value 34 | last = len(ids) - 1 35 | i = ids[last] 36 | pick.append(i) 37 | 38 | # compute intersections 39 | # of the box with the largest score 40 | # with the rest of boxes 41 | 42 | # left top corner of intersection boxes 43 | ix1 = np.maximum(x1[i], x1[ids[:last]]) 44 | iy1 = np.maximum(y1[i], y1[ids[:last]]) 45 | 46 | # right bottom corner of intersection boxes 47 | ix2 = np.minimum(x2[i], x2[ids[:last]]) 48 | iy2 = np.minimum(y2[i], y2[ids[:last]]) 49 | 50 | # width and height of intersection boxes 51 | w = np.maximum(0.0, ix2 - ix1 + 1.0) 52 | h = np.maximum(0.0, iy2 - iy1 + 1.0) 53 | 54 | # intersections' areas 55 | inter = w * h 56 | if mode == 'min': 57 | overlap = inter/np.minimum(area[i], area[ids[:last]]) 58 | elif mode == 'union': 59 | # intersection over union (IoU) 60 | overlap = inter/(area[i] + area[ids[:last]] - inter) 61 | 62 | # delete all boxes where overlap is too big 63 | ids = np.delete( 64 | ids, 65 | np.concatenate([[last], np.where(overlap > overlap_threshold)[0]]) 66 | ) 67 | 68 | return pick 69 | 70 | 71 | def convert_to_square(bboxes): 72 | """Convert bounding boxes to a square form. 73 | 74 | Arguments: 75 | bboxes: a float numpy array of shape [n, 5]. 76 | 77 | Returns: 78 | a float numpy array of shape [n, 5], 79 | squared bounding boxes. 80 | """ 81 | 82 | square_bboxes = np.zeros_like(bboxes) 83 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 84 | h = y2 - y1 + 1.0 85 | w = x2 - x1 + 1.0 86 | max_side = np.maximum(h, w) 87 | square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5 88 | square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5 89 | square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 90 | square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 91 | return square_bboxes 92 | 93 | 94 | def calibrate_box(bboxes, offsets): 95 | """Transform bounding boxes to be more like true bounding boxes. 96 | 'offsets' is one of the outputs of the nets. 97 | 98 | Arguments: 99 | bboxes: a float numpy array of shape [n, 5]. 100 | offsets: a float numpy array of shape [n, 4]. 101 | 102 | Returns: 103 | a float numpy array of shape [n, 5]. 104 | """ 105 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 106 | w = x2 - x1 + 1.0 107 | h = y2 - y1 + 1.0 108 | w = np.expand_dims(w, 1) 109 | h = np.expand_dims(h, 1) 110 | 111 | # this is what happening here: 112 | # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] 113 | # x1_true = x1 + tx1*w 114 | # y1_true = y1 + ty1*h 115 | # x2_true = x2 + tx2*w 116 | # y2_true = y2 + ty2*h 117 | # below is just more compact form of this 118 | 119 | # are offsets always such that 120 | # x1 < x2 and y1 < y2 ? 121 | 122 | translation = np.hstack([w, h, w, h])*offsets 123 | bboxes[:, 0:4] = bboxes[:, 0:4] + translation 124 | return bboxes 125 | 126 | 127 | def get_image_boxes(bounding_boxes, img, size=24): 128 | """Cut out boxes from the image. 129 | 130 | Arguments: 131 | bounding_boxes: a float numpy array of shape [n, 5]. 132 | img: an instance of PIL.Image. 133 | size: an integer, size of cutouts. 134 | 135 | Returns: 136 | a float numpy array of shape [n, 3, size, size]. 137 | """ 138 | 139 | num_boxes = len(bounding_boxes) 140 | width, height = img.size 141 | 142 | [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height) 143 | img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') 144 | 145 | for i in range(num_boxes): 146 | img_box = np.zeros((h[i], w[i], 3), 'uint8') 147 | 148 | img_array = np.asarray(img, 'uint8') 149 | img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\ 150 | img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] 151 | 152 | # resize 153 | img_box = Image.fromarray(img_box) 154 | img_box = img_box.resize((size, size), Image.BILINEAR) 155 | img_box = np.asarray(img_box, 'float32') 156 | 157 | img_boxes[i, :, :, :] = _preprocess(img_box) 158 | 159 | return img_boxes 160 | 161 | 162 | def correct_bboxes(bboxes, width, height): 163 | """Crop boxes that are too big and get coordinates 164 | with respect to cutouts. 165 | 166 | Arguments: 167 | bboxes: a float numpy array of shape [n, 5], 168 | where each row is (xmin, ymin, xmax, ymax, score). 169 | width: a float number. 170 | height: a float number. 171 | 172 | Returns: 173 | dy, dx, edy, edx: a int numpy arrays of shape [n], 174 | coordinates of the boxes with respect to the cutouts. 175 | y, x, ey, ex: a int numpy arrays of shape [n], 176 | corrected ymin, xmin, ymax, xmax. 177 | h, w: a int numpy arrays of shape [n], 178 | just heights and widths of boxes. 179 | 180 | in the following order: 181 | [dy, edy, dx, edx, y, ey, x, ex, w, h]. 182 | """ 183 | 184 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 185 | w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 186 | num_boxes = bboxes.shape[0] 187 | 188 | # 'e' stands for end 189 | # (x, y) -> (ex, ey) 190 | x, y, ex, ey = x1, y1, x2, y2 191 | 192 | # we need to cut out a box from the image. 193 | # (x, y, ex, ey) are corrected coordinates of the box 194 | # in the image. 195 | # (dx, dy, edx, edy) are coordinates of the box in the cutout 196 | # from the image. 197 | dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) 198 | edx, edy = w.copy() - 1.0, h.copy() - 1.0 199 | 200 | # if box's bottom right corner is too far right 201 | ind = np.where(ex > width - 1.0)[0] 202 | edx[ind] = w[ind] + width - 2.0 - ex[ind] 203 | ex[ind] = width - 1.0 204 | 205 | # if box's bottom right corner is too low 206 | ind = np.where(ey > height - 1.0)[0] 207 | edy[ind] = h[ind] + height - 2.0 - ey[ind] 208 | ey[ind] = height - 1.0 209 | 210 | # if box's top left corner is too far left 211 | ind = np.where(x < 0.0)[0] 212 | dx[ind] = 0.0 - x[ind] 213 | x[ind] = 0.0 214 | 215 | # if box's top left corner is too high 216 | ind = np.where(y < 0.0)[0] 217 | dy[ind] = 0.0 - y[ind] 218 | y[ind] = 0.0 219 | 220 | return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] 221 | return_list = [i.astype('int32') for i in return_list] 222 | 223 | return return_list 224 | 225 | 226 | def _preprocess(img): 227 | """Preprocessing step before feeding the network. 228 | 229 | Arguments: 230 | img: a float numpy array of shape [h, w, c]. 231 | 232 | Returns: 233 | a float numpy array of shape [1, c, h, w]. 234 | """ 235 | img = img.transpose((2, 0, 1)) 236 | img = np.expand_dims(img, 0) 237 | img = (img - 127.5)*0.0078125 238 | return img 239 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Variable 4 | from .get_nets import PNet, RNet, ONet 5 | from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square 6 | from .first_stage import run_first_stage 7 | 8 | 9 | def detect_faces(image, min_face_size=20.0, 10 | thresholds=[0.6, 0.7, 0.8], 11 | nms_thresholds=[0.7, 0.7, 0.7]): 12 | """ 13 | Arguments: 14 | image: an instance of PIL.Image. 15 | min_face_size: a float number. 16 | thresholds: a list of length 3. 17 | nms_thresholds: a list of length 3. 18 | 19 | Returns: 20 | two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], 21 | bounding boxes and facial landmarks. 22 | """ 23 | 24 | # LOAD MODELS 25 | pnet = PNet() 26 | rnet = RNet() 27 | onet = ONet() 28 | onet.eval() 29 | 30 | # BUILD AN IMAGE PYRAMID 31 | width, height = image.size 32 | min_length = min(height, width) 33 | 34 | min_detection_size = 12 35 | factor = 0.707 # sqrt(0.5) 36 | 37 | # scales for scaling the image 38 | scales = [] 39 | 40 | # scales the image so that 41 | # minimum size that we can detect equals to 42 | # minimum face size that we want to detect 43 | m = min_detection_size/min_face_size 44 | min_length *= m 45 | 46 | factor_count = 0 47 | while min_length > min_detection_size: 48 | scales.append(m*factor**factor_count) 49 | min_length *= factor 50 | factor_count += 1 51 | 52 | # STAGE 1 53 | 54 | # it will be returned 55 | bounding_boxes = [] 56 | 57 | with torch.no_grad(): 58 | # run P-Net on different scales 59 | for s in scales: 60 | boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0]) 61 | bounding_boxes.append(boxes) 62 | 63 | # collect boxes (and offsets, and scores) from different scales 64 | bounding_boxes = [i for i in bounding_boxes if i is not None] 65 | bounding_boxes = np.vstack(bounding_boxes) 66 | 67 | keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) 68 | bounding_boxes = bounding_boxes[keep] 69 | 70 | # use offsets predicted by pnet to transform bounding boxes 71 | bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) 72 | # shape [n_boxes, 5] 73 | 74 | bounding_boxes = convert_to_square(bounding_boxes) 75 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 76 | 77 | # STAGE 2 78 | 79 | img_boxes = get_image_boxes(bounding_boxes, image, size=24) 80 | img_boxes = torch.FloatTensor(img_boxes) 81 | 82 | output = rnet(img_boxes) 83 | offsets = output[0].data.numpy() # shape [n_boxes, 4] 84 | probs = output[1].data.numpy() # shape [n_boxes, 2] 85 | 86 | keep = np.where(probs[:, 1] > thresholds[1])[0] 87 | bounding_boxes = bounding_boxes[keep] 88 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) 89 | offsets = offsets[keep] 90 | 91 | keep = nms(bounding_boxes, nms_thresholds[1]) 92 | bounding_boxes = bounding_boxes[keep] 93 | bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) 94 | bounding_boxes = convert_to_square(bounding_boxes) 95 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 96 | 97 | # STAGE 3 98 | 99 | img_boxes = get_image_boxes(bounding_boxes, image, size=48) 100 | if len(img_boxes) == 0: 101 | return [], [] 102 | img_boxes = torch.FloatTensor(img_boxes) 103 | output = onet(img_boxes) 104 | landmarks = output[0].data.numpy() # shape [n_boxes, 10] 105 | offsets = output[1].data.numpy() # shape [n_boxes, 4] 106 | probs = output[2].data.numpy() # shape [n_boxes, 2] 107 | 108 | keep = np.where(probs[:, 1] > thresholds[2])[0] 109 | bounding_boxes = bounding_boxes[keep] 110 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) 111 | offsets = offsets[keep] 112 | landmarks = landmarks[keep] 113 | 114 | # compute landmark points 115 | width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 116 | height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 117 | xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] 118 | landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5] 119 | landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10] 120 | 121 | bounding_boxes = calibrate_box(bounding_boxes, offsets) 122 | keep = nms(bounding_boxes, nms_thresholds[2], mode='min') 123 | bounding_boxes = bounding_boxes[keep] 124 | landmarks = landmarks[keep] 125 | 126 | return bounding_boxes, landmarks 127 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/first_stage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import math 4 | from PIL import Image 5 | import numpy as np 6 | from .box_utils import nms, _preprocess 7 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 8 | # device = 'cpu' 9 | 10 | def run_first_stage(image, net, scale, threshold): 11 | """Run P-Net, generate bounding boxes, and do NMS. 12 | 13 | Arguments: 14 | image: an instance of PIL.Image. 15 | net: an instance of pytorch's nn.Module, P-Net. 16 | scale: a float number, 17 | scale width and height of the image by this number. 18 | threshold: a float number, 19 | threshold on the probability of a face when generating 20 | bounding boxes from predictions of the net. 21 | 22 | Returns: 23 | a float numpy array of shape [n_boxes, 9], 24 | bounding boxes with scores and offsets (4 + 1 + 4). 25 | """ 26 | 27 | # scale the image and convert it to a float array 28 | width, height = image.size 29 | sw, sh = math.ceil(width*scale), math.ceil(height*scale) 30 | img = image.resize((sw, sh), Image.BILINEAR) 31 | img = np.asarray(img, 'float32') 32 | 33 | img = torch.FloatTensor(_preprocess(img)).to(device) 34 | with torch.no_grad(): 35 | output = net(img) 36 | probs = output[1].cpu().data.numpy()[0, 1, :, :] 37 | offsets = output[0].cpu().data.numpy() 38 | # probs: probability of a face at each sliding window 39 | # offsets: transformations to true bounding boxes 40 | 41 | boxes = _generate_bboxes(probs, offsets, scale, threshold) 42 | if len(boxes) == 0: 43 | return None 44 | 45 | keep = nms(boxes[:, 0:5], overlap_threshold=0.5) 46 | return boxes[keep] 47 | 48 | 49 | def _generate_bboxes(probs, offsets, scale, threshold): 50 | """Generate bounding boxes at places 51 | where there is probably a face. 52 | 53 | Arguments: 54 | probs: a float numpy array of shape [n, m]. 55 | offsets: a float numpy array of shape [1, 4, n, m]. 56 | scale: a float number, 57 | width and height of the image were scaled by this number. 58 | threshold: a float number. 59 | 60 | Returns: 61 | a float numpy array of shape [n_boxes, 9] 62 | """ 63 | 64 | # applying P-Net is equivalent, in some sense, to 65 | # moving 12x12 window with stride 2 66 | stride = 2 67 | cell_size = 12 68 | 69 | # indices of boxes where there is probably a face 70 | inds = np.where(probs > threshold) 71 | 72 | if inds[0].size == 0: 73 | return np.array([]) 74 | 75 | # transformations of bounding boxes 76 | tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)] 77 | # they are defined as: 78 | # w = x2 - x1 + 1 79 | # h = y2 - y1 + 1 80 | # x1_true = x1 + tx1*w 81 | # x2_true = x2 + tx2*w 82 | # y1_true = y1 + ty1*h 83 | # y2_true = y2 + ty2*h 84 | 85 | offsets = np.array([tx1, ty1, tx2, ty2]) 86 | score = probs[inds[0], inds[1]] 87 | 88 | # P-Net is applied to scaled images 89 | # so we need to rescale bounding boxes back 90 | bounding_boxes = np.vstack([ 91 | np.round((stride*inds[1] + 1.0)/scale), 92 | np.round((stride*inds[0] + 1.0)/scale), 93 | np.round((stride*inds[1] + 1.0 + cell_size)/scale), 94 | np.round((stride*inds[0] + 1.0 + cell_size)/scale), 95 | score, offsets 96 | ]) 97 | # why one is added? 98 | 99 | return bounding_boxes.T 100 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/get_nets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | import numpy as np 6 | 7 | 8 | class Flatten(nn.Module): 9 | 10 | def __init__(self): 11 | super(Flatten, self).__init__() 12 | 13 | def forward(self, x): 14 | """ 15 | Arguments: 16 | x: a float tensor with shape [batch_size, c, h, w]. 17 | Returns: 18 | a float tensor with shape [batch_size, c*h*w]. 19 | """ 20 | 21 | # without this pretrained model isn't working 22 | x = x.transpose(3, 2).contiguous() 23 | 24 | return x.view(x.size(0), -1) 25 | 26 | 27 | class PNet(nn.Module): 28 | 29 | def __init__(self): 30 | 31 | super(PNet, self).__init__() 32 | 33 | # suppose we have input with size HxW, then 34 | # after first layer: H - 2, 35 | # after pool: ceil((H - 2)/2), 36 | # after second conv: ceil((H - 2)/2) - 2, 37 | # after last conv: ceil((H - 2)/2) - 4, 38 | # and the same for W 39 | 40 | self.features = nn.Sequential(OrderedDict([ 41 | ('conv1', nn.Conv2d(3, 10, 3, 1)), 42 | ('prelu1', nn.PReLU(10)), 43 | ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)), 44 | 45 | ('conv2', nn.Conv2d(10, 16, 3, 1)), 46 | ('prelu2', nn.PReLU(16)), 47 | 48 | ('conv3', nn.Conv2d(16, 32, 3, 1)), 49 | ('prelu3', nn.PReLU(32)) 50 | ])) 51 | 52 | self.conv4_1 = nn.Conv2d(32, 2, 1, 1) 53 | self.conv4_2 = nn.Conv2d(32, 4, 1, 1) 54 | 55 | weights = np.load('mtcnn_pytorch/src/weights/pnet.npy', allow_pickle=True)[()] 56 | for n, p in self.named_parameters(): 57 | p.data = torch.FloatTensor(weights[n]) 58 | 59 | def forward(self, x): 60 | """ 61 | Arguments: 62 | x: a float tensor with shape [batch_size, 3, h, w]. 63 | Returns: 64 | b: a float tensor with shape [batch_size, 4, h', w']. 65 | a: a float tensor with shape [batch_size, 2, h', w']. 66 | """ 67 | x = self.features(x) 68 | a = self.conv4_1(x) 69 | b = self.conv4_2(x) 70 | a = F.softmax(a, dim=-1) 71 | return b, a 72 | 73 | 74 | class RNet(nn.Module): 75 | 76 | def __init__(self): 77 | 78 | super(RNet, self).__init__() 79 | 80 | self.features = nn.Sequential(OrderedDict([ 81 | ('conv1', nn.Conv2d(3, 28, 3, 1)), 82 | ('prelu1', nn.PReLU(28)), 83 | ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)), 84 | 85 | ('conv2', nn.Conv2d(28, 48, 3, 1)), 86 | ('prelu2', nn.PReLU(48)), 87 | ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)), 88 | 89 | ('conv3', nn.Conv2d(48, 64, 2, 1)), 90 | ('prelu3', nn.PReLU(64)), 91 | 92 | ('flatten', Flatten()), 93 | ('conv4', nn.Linear(576, 128)), 94 | ('prelu4', nn.PReLU(128)) 95 | ])) 96 | 97 | self.conv5_1 = nn.Linear(128, 2) 98 | self.conv5_2 = nn.Linear(128, 4) 99 | 100 | weights = np.load('mtcnn_pytorch/src/weights/rnet.npy', allow_pickle=True)[()] 101 | for n, p in self.named_parameters(): 102 | p.data = torch.FloatTensor(weights[n]) 103 | 104 | def forward(self, x): 105 | """ 106 | Arguments: 107 | x: a float tensor with shape [batch_size, 3, h, w]. 108 | Returns: 109 | b: a float tensor with shape [batch_size, 4]. 110 | a: a float tensor with shape [batch_size, 2]. 111 | """ 112 | x = self.features(x) 113 | a = self.conv5_1(x) 114 | b = self.conv5_2(x) 115 | a = F.softmax(a, dim=-1) 116 | return b, a 117 | 118 | 119 | class ONet(nn.Module): 120 | 121 | def __init__(self): 122 | 123 | super(ONet, self).__init__() 124 | 125 | self.features = nn.Sequential(OrderedDict([ 126 | ('conv1', nn.Conv2d(3, 32, 3, 1)), 127 | ('prelu1', nn.PReLU(32)), 128 | ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)), 129 | 130 | ('conv2', nn.Conv2d(32, 64, 3, 1)), 131 | ('prelu2', nn.PReLU(64)), 132 | ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)), 133 | 134 | ('conv3', nn.Conv2d(64, 64, 3, 1)), 135 | ('prelu3', nn.PReLU(64)), 136 | ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)), 137 | 138 | ('conv4', nn.Conv2d(64, 128, 2, 1)), 139 | ('prelu4', nn.PReLU(128)), 140 | 141 | ('flatten', Flatten()), 142 | ('conv5', nn.Linear(1152, 256)), 143 | ('drop5', nn.Dropout(0.25)), 144 | ('prelu5', nn.PReLU(256)), 145 | ])) 146 | 147 | self.conv6_1 = nn.Linear(256, 2) 148 | self.conv6_2 = nn.Linear(256, 4) 149 | self.conv6_3 = nn.Linear(256, 10) 150 | 151 | weights = np.load('mtcnn_pytorch/src/weights/onet.npy', allow_pickle=True)[()] 152 | for n, p in self.named_parameters(): 153 | p.data = torch.FloatTensor(weights[n]) 154 | 155 | def forward(self, x): 156 | """ 157 | Arguments: 158 | x: a float tensor with shape [batch_size, 3, h, w]. 159 | Returns: 160 | c: a float tensor with shape [batch_size, 10]. 161 | b: a float tensor with shape [batch_size, 4]. 162 | a: a float tensor with shape [batch_size, 2]. 163 | """ 164 | x = self.features(x) 165 | a = self.conv6_1(x) 166 | b = self.conv6_2(x) 167 | c = self.conv6_3(x) 168 | a = F.softmax(a, dim = -1) 169 | return c, b, a 170 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/matlab_cp2tform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jul 11 06:54:28 2017 4 | 5 | @author: zhaoyafei 6 | """ 7 | 8 | import numpy as np 9 | from numpy.linalg import inv, norm, lstsq 10 | from numpy.linalg import matrix_rank as rank 11 | 12 | class MatlabCp2tormException(Exception): 13 | def __str__(self): 14 | return 'In File {}:{}'.format( 15 | __file__, super.__str__(self)) 16 | 17 | def tformfwd(trans, uv): 18 | """ 19 | Function: 20 | ---------- 21 | apply affine transform 'trans' to uv 22 | 23 | Parameters: 24 | ---------- 25 | @trans: 3x3 np.array 26 | transform matrix 27 | @uv: Kx2 np.array 28 | each row is a pair of coordinates (x, y) 29 | 30 | Returns: 31 | ---------- 32 | @xy: Kx2 np.array 33 | each row is a pair of transformed coordinates (x, y) 34 | """ 35 | uv = np.hstack(( 36 | uv, np.ones((uv.shape[0], 1)) 37 | )) 38 | xy = np.dot(uv, trans) 39 | xy = xy[:, 0:-1] 40 | return xy 41 | 42 | 43 | def tforminv(trans, uv): 44 | """ 45 | Function: 46 | ---------- 47 | apply the inverse of affine transform 'trans' to uv 48 | 49 | Parameters: 50 | ---------- 51 | @trans: 3x3 np.array 52 | transform matrix 53 | @uv: Kx2 np.array 54 | each row is a pair of coordinates (x, y) 55 | 56 | Returns: 57 | ---------- 58 | @xy: Kx2 np.array 59 | each row is a pair of inverse-transformed coordinates (x, y) 60 | """ 61 | Tinv = inv(trans) 62 | xy = tformfwd(Tinv, uv) 63 | return xy 64 | 65 | 66 | def findNonreflectiveSimilarity(uv, xy, options=None): 67 | 68 | options = {'K': 2} 69 | 70 | K = options['K'] 71 | M = xy.shape[0] 72 | x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 73 | y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 74 | # print('--->x, y:\n', x, y 75 | 76 | tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))) 77 | tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) 78 | X = np.vstack((tmp1, tmp2)) 79 | # print('--->X.shape: ', X.shape 80 | # print('X:\n', X 81 | 82 | u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 83 | v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 84 | U = np.vstack((u, v)) 85 | # print('--->U.shape: ', U.shape 86 | # print('U:\n', U 87 | 88 | # We know that X * r = U 89 | if rank(X) >= 2 * K: 90 | r, _, _, _ = lstsq(X, U) 91 | r = np.squeeze(r) 92 | else: 93 | raise Exception('cp2tform:twoUniquePointsReq') 94 | 95 | # print('--->r:\n', r 96 | 97 | sc = r[0] 98 | ss = r[1] 99 | tx = r[2] 100 | ty = r[3] 101 | 102 | Tinv = np.array([ 103 | [sc, -ss, 0], 104 | [ss, sc, 0], 105 | [tx, ty, 1] 106 | ]) 107 | 108 | # print('--->Tinv:\n', Tinv 109 | 110 | T = inv(Tinv) 111 | # print('--->T:\n', T 112 | 113 | T[:, 2] = np.array([0, 0, 1]) 114 | 115 | return T, Tinv 116 | 117 | 118 | def findSimilarity(uv, xy, options=None): 119 | 120 | options = {'K': 2} 121 | 122 | # uv = np.array(uv) 123 | # xy = np.array(xy) 124 | 125 | # Solve for trans1 126 | trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options) 127 | 128 | # Solve for trans2 129 | 130 | # manually reflect the xy data across the Y-axis 131 | xyR = xy 132 | xyR[:, 0] = -1 * xyR[:, 0] 133 | 134 | trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options) 135 | 136 | # manually reflect the tform to undo the reflection done on xyR 137 | TreflectY = np.array([ 138 | [-1, 0, 0], 139 | [0, 1, 0], 140 | [0, 0, 1] 141 | ]) 142 | 143 | trans2 = np.dot(trans2r, TreflectY) 144 | 145 | # Figure out if trans1 or trans2 is better 146 | xy1 = tformfwd(trans1, uv) 147 | norm1 = norm(xy1 - xy) 148 | 149 | xy2 = tformfwd(trans2, uv) 150 | norm2 = norm(xy2 - xy) 151 | 152 | if norm1 <= norm2: 153 | return trans1, trans1_inv 154 | else: 155 | trans2_inv = inv(trans2) 156 | return trans2, trans2_inv 157 | 158 | 159 | def get_similarity_transform(src_pts, dst_pts, reflective=True): 160 | """ 161 | Function: 162 | ---------- 163 | Find Similarity Transform Matrix 'trans': 164 | u = src_pts[:, 0] 165 | v = src_pts[:, 1] 166 | x = dst_pts[:, 0] 167 | y = dst_pts[:, 1] 168 | [x, y, 1] = [u, v, 1] * trans 169 | 170 | Parameters: 171 | ---------- 172 | @src_pts: Kx2 np.array 173 | source points, each row is a pair of coordinates (x, y) 174 | @dst_pts: Kx2 np.array 175 | destination points, each row is a pair of transformed 176 | coordinates (x, y) 177 | @reflective: True or False 178 | if True: 179 | use reflective similarity transform 180 | else: 181 | use non-reflective similarity transform 182 | 183 | Returns: 184 | ---------- 185 | @trans: 3x3 np.array 186 | transform matrix from uv to xy 187 | trans_inv: 3x3 np.array 188 | inverse of trans, transform matrix from xy to uv 189 | """ 190 | 191 | if reflective: 192 | trans, trans_inv = findSimilarity(src_pts, dst_pts) 193 | else: 194 | trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts) 195 | 196 | return trans, trans_inv 197 | 198 | 199 | def cvt_tform_mat_for_cv2(trans): 200 | """ 201 | Function: 202 | ---------- 203 | Convert Transform Matrix 'trans' into 'cv2_trans' which could be 204 | directly used by cv2.warpAffine(): 205 | u = src_pts[:, 0] 206 | v = src_pts[:, 1] 207 | x = dst_pts[:, 0] 208 | y = dst_pts[:, 1] 209 | [x, y].T = cv_trans * [u, v, 1].T 210 | 211 | Parameters: 212 | ---------- 213 | @trans: 3x3 np.array 214 | transform matrix from uv to xy 215 | 216 | Returns: 217 | ---------- 218 | @cv2_trans: 2x3 np.array 219 | transform matrix from src_pts to dst_pts, could be directly used 220 | for cv2.warpAffine() 221 | """ 222 | cv2_trans = trans[:, 0:2].T 223 | 224 | return cv2_trans 225 | 226 | 227 | def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True): 228 | """ 229 | Function: 230 | ---------- 231 | Find Similarity Transform Matrix 'cv2_trans' which could be 232 | directly used by cv2.warpAffine(): 233 | u = src_pts[:, 0] 234 | v = src_pts[:, 1] 235 | x = dst_pts[:, 0] 236 | y = dst_pts[:, 1] 237 | [x, y].T = cv_trans * [u, v, 1].T 238 | 239 | Parameters: 240 | ---------- 241 | @src_pts: Kx2 np.array 242 | source points, each row is a pair of coordinates (x, y) 243 | @dst_pts: Kx2 np.array 244 | destination points, each row is a pair of transformed 245 | coordinates (x, y) 246 | reflective: True or False 247 | if True: 248 | use reflective similarity transform 249 | else: 250 | use non-reflective similarity transform 251 | 252 | Returns: 253 | ---------- 254 | @cv2_trans: 2x3 np.array 255 | transform matrix from src_pts to dst_pts, could be directly used 256 | for cv2.warpAffine() 257 | """ 258 | trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective) 259 | cv2_trans = cvt_tform_mat_for_cv2(trans) 260 | cv2_trans_inv = cvt_tform_mat_for_cv2(trans_inv) 261 | 262 | return cv2_trans, cv2_trans_inv 263 | 264 | 265 | if __name__ == '__main__': 266 | """ 267 | u = [0, 6, -2] 268 | v = [0, 3, 5] 269 | x = [-1, 0, 4] 270 | y = [-1, -10, 4] 271 | 272 | # In Matlab, run: 273 | # 274 | # uv = [u'; v']; 275 | # xy = [x'; y']; 276 | # tform_sim=cp2tform(uv,xy,'similarity'); 277 | # 278 | # trans = tform_sim.tdata.T 279 | # ans = 280 | # -0.0764 -1.6190 0 281 | # 1.6190 -0.0764 0 282 | # -3.2156 0.0290 1.0000 283 | # trans_inv = tform_sim.tdata.Tinv 284 | # ans = 285 | # 286 | # -0.0291 0.6163 0 287 | # -0.6163 -0.0291 0 288 | # -0.0756 1.9826 1.0000 289 | # xy_m=tformfwd(tform_sim, u,v) 290 | # 291 | # xy_m = 292 | # 293 | # -3.2156 0.0290 294 | # 1.1833 -9.9143 295 | # 5.0323 2.8853 296 | # uv_m=tforminv(tform_sim, x,y) 297 | # 298 | # uv_m = 299 | # 300 | # 0.5698 1.3953 301 | # 6.0872 2.2733 302 | # -2.6570 4.3314 303 | """ 304 | u = [0, 6, -2] 305 | v = [0, 3, 5] 306 | x = [-1, 0, 4] 307 | y = [-1, -10, 4] 308 | 309 | uv = np.array((u, v)).T 310 | xy = np.array((x, y)).T 311 | 312 | print('\n--->uv:') 313 | print(uv) 314 | print('\n--->xy:') 315 | print(xy) 316 | 317 | trans, trans_inv = get_similarity_transform(uv, xy) 318 | 319 | print('\n--->trans matrix:') 320 | print(trans) 321 | 322 | print('\n--->trans_inv matrix:') 323 | print(trans_inv) 324 | 325 | print('\n---> apply transform to uv') 326 | print('\nxy_m = uv_augmented * trans') 327 | uv_aug = np.hstack(( 328 | uv, np.ones((uv.shape[0], 1)) 329 | )) 330 | xy_m = np.dot(uv_aug, trans) 331 | print(xy_m) 332 | 333 | print('\nxy_m = tformfwd(trans, uv)') 334 | xy_m = tformfwd(trans, uv) 335 | print(xy_m) 336 | 337 | print('\n---> apply inverse transform to xy') 338 | print('\nuv_m = xy_augmented * trans_inv') 339 | xy_aug = np.hstack(( 340 | xy, np.ones((xy.shape[0], 1)) 341 | )) 342 | uv_m = np.dot(xy_aug, trans_inv) 343 | print(uv_m) 344 | 345 | print('\nuv_m = tformfwd(trans_inv, xy)') 346 | uv_m = tformfwd(trans_inv, xy) 347 | print(uv_m) 348 | 349 | uv_m = tforminv(trans, xy) 350 | print('\nuv_m = tforminv(trans, xy)') 351 | print(uv_m) 352 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/visualization_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import ImageDraw 2 | 3 | 4 | def show_bboxes(img, bounding_boxes, facial_landmarks=[]): 5 | """Draw bounding boxes and facial landmarks. 6 | 7 | Arguments: 8 | img: an instance of PIL.Image. 9 | bounding_boxes: a float numpy array of shape [n, 5]. 10 | facial_landmarks: a float numpy array of shape [n, 10]. 11 | 12 | Returns: 13 | an instance of PIL.Image. 14 | """ 15 | 16 | img_copy = img.copy() 17 | draw = ImageDraw.Draw(img_copy) 18 | 19 | for b in bounding_boxes: 20 | draw.rectangle([ 21 | (b[0], b[1]), (b[2], b[3]) 22 | ], outline='white') 23 | 24 | for p in facial_landmarks: 25 | for i in range(5): 26 | draw.ellipse([ 27 | (p[i] - 1.0, p[i + 5] - 1.0), 28 | (p[i] + 1.0, p[i + 5] + 1.0) 29 | ], outline='blue') 30 | 31 | return img_copy 32 | -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/weights/onet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/src/weights/onet.npy -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/weights/pnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/src/weights/pnet.npy -------------------------------------------------------------------------------- /face_modules/mtcnn_pytorch/src/weights/rnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/face_modules/mtcnn_pytorch/src/weights/rnet.npy -------------------------------------------------------------------------------- /face_modules/preprocess_images.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mtcnn import MTCNN 3 | import cv2 4 | import numpy as np 5 | 6 | import PIL.Image as Image 7 | from model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 8 | from torchvision import transforms as trans 9 | import os 10 | # import libnvjpeg 11 | # import pickle 12 | 13 | img_root_dir = '/media/taotao/958c7d2d-c4ce-4117-a93b-c8a7aa4b88e3/taotao/part1/' 14 | save_path = '/media/taotao/958c7d2d-c4ce-4117-a93b-c8a7aa4b88e3/taotao/stars_256_0.85/' 15 | # embed_path = '/home/taotao/Downloads/celeb-aligned-256/embed.pkl' 16 | 17 | device = torch.device('cuda:0') 18 | mtcnn = MTCNN() 19 | 20 | model = Backbone(50, 0.6, 'ir_se').to(device) 21 | model.eval() 22 | model.load_state_dict(torch.load('./model_ir_se50.pth')) 23 | 24 | # threshold = 1.54 25 | test_transform = trans.Compose([ 26 | trans.ToTensor(), 27 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 28 | ]) 29 | 30 | # decoder = libnvjpeg.py_NVJpegDecoder() 31 | 32 | ind = 0 33 | embed_map = {} 34 | 35 | for root, dirs, files in os.walk(img_root_dir): 36 | for name in files: 37 | if name.endswith('jpg') or name.endswith('png'): 38 | try: 39 | p = os.path.join(root, name) 40 | img = cv2.imread(p)[:, :, ::-1] 41 | faces = mtcnn.align_multi(Image.fromarray(img), min_face_size=64, crop_size=(256, 256)) 42 | if len(faces) == 0: 43 | continue 44 | for face in faces: 45 | # scaled_img = face.resize((112, 112), Image.ANTIALIAS) 46 | # with torch.no_grad(): 47 | # embed = model(test_transform(scaled_img).unsqueeze(0).cuda()).squeeze().cpu().numpy() 48 | new_path = '%08d.jpg'%ind 49 | ind += 1 50 | print(new_path) 51 | face.save(os.path.join(save_path, new_path)) 52 | # embed_map[new_path] = embed.detach().cpu() 53 | except Exception as e: 54 | continue 55 | 56 | # with open(embed_path, 'wb') as f: 57 | # pickle.dump(embed_map, f) 58 | # 59 | # img = cv2.imread('/home/taotao/Pictures/47d947b4d9cf3e2f62c0c8023a1c0dea.jpg')[:,:,::-1] 60 | # # bboxes, faces = mtcnn.align_multi(Image.fromarray(img), limit=10, min_face_size=30) 61 | # bboxes, faces = mtcnn.align(Image.fromarray(img)) 62 | # input = test_transform(faces[0]).unsqueeze(0) 63 | # embed = model(input.cuda()) 64 | # print(embed.shape) 65 | # print(bboxes) 66 | # face = np.array(faces[0])[:,:,::-1] 67 | # cv2.imshow('', face) 68 | # cv2.waitKey(0) 69 | -------------------------------------------------------------------------------- /inference_demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./face_modules/') 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torch.nn.functional as F 6 | from face_modules.model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 7 | from network.AEI_Net import * 8 | from face_modules.mtcnn import * 9 | import cv2 10 | import PIL.Image as Image 11 | import numpy as np 12 | 13 | detector = MTCNN() 14 | device = torch.device('cuda') 15 | G = AEI_Net(c_id=512) 16 | G.eval() 17 | G.load_state_dict(torch.load('./saved_models/G_latest.pth', map_location=torch.device('cpu'))) 18 | G = G.cuda() 19 | 20 | arcface = Backbone(50, 0.6, 'ir_se').to(device) 21 | arcface.eval() 22 | arcface.load_state_dict(torch.load('./face_modules/model_ir_se50.pth', map_location=device), strict=False) 23 | 24 | test_transform = transforms.Compose([ 25 | transforms.ToTensor(), 26 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 27 | ]) 28 | 29 | Xs_path = '/home/taotao/Pictures/u=3322705847,3022779128&fm=26&gp=0.jpg' 30 | Xt_path = '/home/taotao/Pictures/u=3977885541,1855342996&fm=11&gp=0.jpg' 31 | 32 | Xs_raw = cv2.imread(Xs_path) 33 | Xt_raw = cv2.imread(Xt_path) 34 | Xs = detector.align(Image.fromarray(Xs_raw), crop_size=(256, 256)) 35 | Xt = detector.align(Image.fromarray(Xt_raw), crop_size=(256, 256)) 36 | 37 | Xs_raw = np.array(Xs) 38 | Xt_raw = np.array(Xt) 39 | 40 | Xs = test_transform(Xs) 41 | Xt = test_transform(Xt) 42 | 43 | Xs = Xs.unsqueeze(0).cuda() 44 | Xt = Xt.unsqueeze(0).cuda() 45 | with torch.no_grad(): 46 | embeds, _ = arcface(F.interpolate(Xs[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 47 | embedt, __ = arcface(F.interpolate(Xt[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 48 | Yt, _ = G(Xt, embeds) 49 | Ys, _ = G(Xs, embedt) 50 | Ys = Ys.squeeze().detach().cpu().numpy().transpose([1, 2, 0])*0.5 + 0.5 51 | Yt = Yt.squeeze().detach().cpu().numpy().transpose([1, 2, 0])*0.5 + 0.5 52 | 53 | Y = np.concatenate((Ys, Yt), axis=1) 54 | X = np.concatenate((Xs_raw/255., Xt_raw/255.), axis=1) 55 | image = np.concatenate((X, Y), axis=0) 56 | cv2.imshow('image', image) 57 | cv2.waitKey(0) 58 | -------------------------------------------------------------------------------- /mtcnn_pytorch: -------------------------------------------------------------------------------- 1 | ./face_modules/mtcnn_pytorch/ -------------------------------------------------------------------------------- /network/AADLayer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AADLayer(nn.Module): 6 | def __init__(self, c_x, attr_c, c_id=256): 7 | super(AADLayer, self).__init__() 8 | self.attr_c = attr_c 9 | self.c_id = c_id 10 | self.c_x = c_x 11 | 12 | self.conv1 = nn.Conv2d(attr_c, c_x, kernel_size=1, stride=1, padding=0, bias=True) 13 | self.conv2 = nn.Conv2d(attr_c, c_x, kernel_size=1, stride=1, padding=0, bias=True) 14 | self.fc1 = nn.Linear(c_id, c_x) 15 | self.fc2 = nn.Linear(c_id, c_x) 16 | self.norm = nn.InstanceNorm2d(c_x, affine=False) 17 | 18 | self.conv_h = nn.Conv2d(c_x, 1, kernel_size=1, stride=1, padding=0, bias=True) 19 | 20 | def forward(self, h_in, z_attr, z_id): 21 | # h_in cxnxn 22 | # zid 256x1x1 23 | # zattr cxnxn 24 | h = self.norm(h_in) 25 | gamma_attr = self.conv1(z_attr) 26 | beta_attr = self.conv2(z_attr) 27 | 28 | gamma_id = self.fc1(z_id) 29 | beta_id = self.fc2(z_id) 30 | A = gamma_attr * h + beta_attr 31 | gamma_id = gamma_id.reshape(h.shape[0], self.c_x, 1, 1).expand_as(h) 32 | beta_id = beta_id.reshape(h.shape[0], self.c_x, 1, 1).expand_as(h) 33 | I = gamma_id * h + beta_id 34 | 35 | M = torch.sigmoid(self.conv_h(h)) 36 | 37 | out = (torch.ones_like(M).to(M.device) - M) * A + M * I 38 | return out 39 | 40 | 41 | class AAD_ResBlk(nn.Module): 42 | def __init__(self, cin, cout, c_attr, c_id=256): 43 | super(AAD_ResBlk, self).__init__() 44 | self.cin = cin 45 | self.cout = cout 46 | 47 | self.AAD1 = AADLayer(cin, c_attr, c_id) 48 | self.conv1 = nn.Conv2d(cin, cin, kernel_size=3, stride=1, padding=1, bias=False) 49 | self.relu1 = nn.ReLU(inplace=True) 50 | 51 | self.AAD2 = AADLayer(cin, c_attr, c_id) 52 | self.conv2 = nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False) 53 | self.relu2 = nn.ReLU(inplace=True) 54 | 55 | if cin != cout: 56 | self.AAD3 = AADLayer(cin, c_attr, c_id) 57 | self.conv3 = nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False) 58 | self.relu3 = nn.ReLU(inplace=True) 59 | 60 | def forward(self, h, z_attr, z_id): 61 | x = self.AAD1(h, z_attr, z_id) 62 | x = self.relu1(x) 63 | x = self.conv1(x) 64 | 65 | x = self.AAD2(x,z_attr, z_id) 66 | x = self.relu2(x) 67 | x = self.conv2(x) 68 | 69 | if self.cin != self.cout: 70 | h = self.AAD3(h, z_attr, z_id) 71 | h = self.relu3(h) 72 | h = self.conv3(h) 73 | x = x + h 74 | 75 | return x 76 | 77 | 78 | -------------------------------------------------------------------------------- /network/AEI_Net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .AADLayer import * 5 | 6 | 7 | def weight_init(m): 8 | if isinstance(m, nn.Linear): 9 | m.weight.data.normal_(0, 0.001) 10 | m.bias.data.zero_() 11 | if isinstance(m, nn.Conv2d): 12 | nn.init.xavier_normal_(m.weight.data) 13 | 14 | if isinstance(m, nn.ConvTranspose2d): 15 | nn.init.xavier_normal_(m.weight.data) 16 | 17 | 18 | def conv4x4(in_c, out_c, norm=nn.BatchNorm2d): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=4, stride=2, padding=1, bias=False), 21 | norm(out_c), 22 | nn.LeakyReLU(0.1, inplace=True) 23 | ) 24 | 25 | 26 | class deconv4x4(nn.Module): 27 | def __init__(self, in_c, out_c, norm=nn.BatchNorm2d): 28 | super(deconv4x4, self).__init__() 29 | self.deconv = nn.ConvTranspose2d(in_channels=in_c, out_channels=out_c, kernel_size=4, stride=2, padding=1, bias=False) 30 | self.bn = norm(out_c) 31 | self.lrelu = nn.LeakyReLU(0.1, inplace=True) 32 | 33 | def forward(self, input, skip): 34 | x = self.deconv(input) 35 | x = self.bn(x) 36 | x = self.lrelu(x) 37 | return torch.cat((x, skip), dim=1) 38 | 39 | 40 | class MLAttrEncoder(nn.Module): 41 | def __init__(self): 42 | super(MLAttrEncoder, self).__init__() 43 | self.conv1 = conv4x4(3, 32) 44 | self.conv2 = conv4x4(32, 64) 45 | self.conv3 = conv4x4(64, 128) 46 | self.conv4 = conv4x4(128, 256) 47 | self.conv5 = conv4x4(256, 512) 48 | self.conv6 = conv4x4(512, 1024) 49 | self.conv7 = conv4x4(1024, 1024) 50 | 51 | self.deconv1 = deconv4x4(1024, 1024) 52 | self.deconv2 = deconv4x4(2048, 512) 53 | self.deconv3 = deconv4x4(1024, 256) 54 | self.deconv4 = deconv4x4(512, 128) 55 | self.deconv5 = deconv4x4(256, 64) 56 | self.deconv6 = deconv4x4(128, 32) 57 | 58 | self.apply(weight_init) 59 | 60 | def forward(self, Xt): 61 | feat1 = self.conv1(Xt) 62 | # 32x128x128 63 | feat2 = self.conv2(feat1) 64 | # 64x64x64 65 | feat3 = self.conv3(feat2) 66 | # 128x32x32 67 | feat4 = self.conv4(feat3) 68 | # 256x16xx16 69 | feat5 = self.conv5(feat4) 70 | # 512x8x8 71 | feat6 = self.conv6(feat5) 72 | # 1024x4x4 73 | z_attr1 = self.conv7(feat6) 74 | # 1024x2x2 75 | 76 | z_attr2 = self.deconv1(z_attr1, feat6) 77 | z_attr3 = self.deconv2(z_attr2, feat5) 78 | z_attr4 = self.deconv3(z_attr3, feat4) 79 | z_attr5 = self.deconv4(z_attr4, feat3) 80 | z_attr6 = self.deconv5(z_attr5, feat2) 81 | z_attr7 = self.deconv6(z_attr6, feat1) 82 | z_attr8 = F.interpolate(z_attr7, scale_factor=2, mode='bilinear', align_corners=True) 83 | return z_attr1, z_attr2, z_attr3, z_attr4, z_attr5, z_attr6, z_attr7, z_attr8 84 | 85 | 86 | class AADGenerator(nn.Module): 87 | def __init__(self, c_id=256): 88 | super(AADGenerator, self).__init__() 89 | self.up1 = nn.ConvTranspose2d(c_id, 1024, kernel_size=2, stride=1, padding=0) 90 | self.AADBlk1 = AAD_ResBlk(1024, 1024, 1024, c_id) 91 | self.AADBlk2 = AAD_ResBlk(1024, 1024, 2048, c_id) 92 | self.AADBlk3 = AAD_ResBlk(1024, 1024, 1024, c_id) 93 | self.AADBlk4 = AAD_ResBlk(1024, 512, 512, c_id) 94 | self.AADBlk5 = AAD_ResBlk(512, 256, 256, c_id) 95 | self.AADBlk6 = AAD_ResBlk(256, 128, 128, c_id) 96 | self.AADBlk7 = AAD_ResBlk(128, 64, 64, c_id) 97 | self.AADBlk8 = AAD_ResBlk(64, 3, 64, c_id) 98 | 99 | self.apply(weight_init) 100 | 101 | def forward(self, z_attr, z_id): 102 | m = self.up1(z_id.reshape(z_id.shape[0], -1, 1, 1)) 103 | m2 = F.interpolate(self.AADBlk1(m, z_attr[0], z_id), scale_factor=2, mode='bilinear', align_corners=True) 104 | m3 = F.interpolate(self.AADBlk2(m2, z_attr[1], z_id), scale_factor=2, mode='bilinear', align_corners=True) 105 | m4 = F.interpolate(self.AADBlk3(m3, z_attr[2], z_id), scale_factor=2, mode='bilinear', align_corners=True) 106 | m5 = F.interpolate(self.AADBlk4(m4, z_attr[3], z_id), scale_factor=2, mode='bilinear', align_corners=True) 107 | m6 = F.interpolate(self.AADBlk5(m5, z_attr[4], z_id), scale_factor=2, mode='bilinear', align_corners=True) 108 | m7 = F.interpolate(self.AADBlk6(m6, z_attr[5], z_id), scale_factor=2, mode='bilinear', align_corners=True) 109 | m8 = F.interpolate(self.AADBlk7(m7, z_attr[6], z_id), scale_factor=2, mode='bilinear', align_corners=True) 110 | y = self.AADBlk8(m8, z_attr[7], z_id) 111 | return torch.tanh(y) 112 | 113 | 114 | class AEI_Net(nn.Module): 115 | def __init__(self, c_id=256): 116 | super(AEI_Net, self).__init__() 117 | self.encoder = MLAttrEncoder() 118 | self.generator = AADGenerator(c_id) 119 | 120 | def forward(self, Xt, z_id): 121 | attr = self.encoder(Xt) 122 | Y = self.generator(attr, z_id) 123 | return Y, attr 124 | 125 | def get_attr(self, X): 126 | # with torch.no_grad(): 127 | return self.encoder(X) 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /network/HEAR_Net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | def conv4x4(in_c, out_c): 6 | return nn.Sequential( 7 | nn.Conv2d(in_c, out_c,kernel_size=4, stride=2, padding=1), 8 | nn.BatchNorm2d(out_c), 9 | nn.LeakyReLU(0.1, inplace=True), 10 | ) 11 | 12 | 13 | def deconv4x4(in_c, out_c): 14 | return nn.Sequential( 15 | nn.ConvTranspose2d(in_c, out_c, kernel_size=4, stride=2, padding=1), 16 | nn.BatchNorm2d(out_c), 17 | nn.LeakyReLU(0.1, inplace=True), 18 | ) 19 | 20 | 21 | class HearNet(nn.Module): 22 | def __init__(self): 23 | super(HearNet, self).__init__() 24 | self.down1 = conv4x4(6, 64) 25 | self.down2 = conv4x4(64, 128) 26 | self.down3 = conv4x4(128, 256) 27 | self.down4 = conv4x4(256, 512) 28 | self.down5 = conv4x4(512, 512) 29 | 30 | self.up1 = deconv4x4(512, 512) 31 | self.up2 = deconv4x4(512*2, 256) 32 | self.up3 = deconv4x4(256*2, 128) 33 | self.up4 = deconv4x4(128*2, 64) 34 | self.up5 = nn.Conv2d(64*2, 3, kernel_size=3, stride=1, padding=1) 35 | 36 | def forward(self, x): 37 | c1 = self.down1(x) 38 | c2 = self.down2(c1) 39 | c3 = self.down3(c2) 40 | c4 = self.down4(c3) 41 | c5 = self.down5(c4) 42 | 43 | m1 = self.up1(c5) 44 | m1 = torch.cat((c4, m1), dim=1) 45 | m2 = self.up2(m1) 46 | m2 = torch.cat((c3, m2), dim=1) 47 | m3 = self.up3(m2) 48 | m3 = torch.cat((c2, m3), dim=1) 49 | m4 = self.up4(m3) 50 | m4 = torch.cat((c1, m4), dim=1) 51 | 52 | out = nn.functional.interpolate(m4, scale_factor=2, mode='bilinear', align_corners=True) 53 | out = self.up5(out) 54 | return torch.tanh(out) 55 | -------------------------------------------------------------------------------- /network/MultiscaleDiscriminator.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import numpy as np 3 | 4 | 5 | class NLayerDiscriminator(nn.Module): 6 | def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, getIntermFeat=False): 7 | super(NLayerDiscriminator, self).__init__() 8 | self.getIntermFeat = getIntermFeat 9 | self.n_layers = n_layers 10 | 11 | kw = 4 12 | padw = int(np.ceil((kw-1.0)/2)) 13 | sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]] 14 | 15 | nf = ndf 16 | for n in range(1, n_layers): 17 | nf_prev = nf 18 | nf = min(nf * 2, 512) 19 | sequence += [[ 20 | nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw), 21 | norm_layer(nf), nn.LeakyReLU(0.2, True) 22 | ]] 23 | 24 | nf_prev = nf 25 | nf = min(nf * 2, 512) 26 | sequence += [[ 27 | nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw), 28 | norm_layer(nf), 29 | nn.LeakyReLU(0.2, True) 30 | ]] 31 | 32 | sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] 33 | 34 | if use_sigmoid: 35 | sequence += [[nn.Sigmoid()]] 36 | 37 | if getIntermFeat: 38 | for n in range(len(sequence)): 39 | setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) 40 | else: 41 | sequence_stream = [] 42 | for n in range(len(sequence)): 43 | sequence_stream += sequence[n] 44 | self.model = nn.Sequential(*sequence_stream) 45 | 46 | def forward(self, input): 47 | if self.getIntermFeat: 48 | res = [input] 49 | for n in range(self.n_layers+2): 50 | model = getattr(self, 'model'+str(n)) 51 | res.append(model(res[-1])) 52 | return res[1:] 53 | else: 54 | return self.model(input) 55 | 56 | 57 | class MultiscaleDiscriminator(nn.Module): 58 | def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, 59 | use_sigmoid=False, num_D=3, getIntermFeat=False): 60 | super(MultiscaleDiscriminator, self).__init__() 61 | self.num_D = num_D 62 | self.n_layers = n_layers 63 | self.getIntermFeat = getIntermFeat 64 | 65 | for i in range(num_D): 66 | netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer, use_sigmoid, getIntermFeat) 67 | if getIntermFeat: 68 | for j in range(n_layers + 2): 69 | setattr(self, 'scale' + str(i) + '_layer' + str(j), getattr(netD, 'model' + str(j))) 70 | else: 71 | setattr(self, 'layer' + str(i), netD.model) 72 | 73 | self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) 74 | 75 | def singleD_forward(self, model, input): 76 | if self.getIntermFeat: 77 | result = [input] 78 | for i in range(len(model)): 79 | result.append(model[i](result[-1])) 80 | return result[1:] 81 | else: 82 | return [model(input)] 83 | 84 | def forward(self, input): 85 | num_D = self.num_D 86 | result = [] 87 | input_downsampled = input 88 | for i in range(num_D): 89 | if self.getIntermFeat: 90 | model = [getattr(self, 'scale' + str(num_D - 1 - i) + '_layer' + str(j)) for j in 91 | range(self.n_layers + 2)] 92 | else: 93 | model = getattr(self, 'layer' + str(num_D - 1 - i)) 94 | result.append(self.singleD_forward(model, input_downsampled)) 95 | if i != (num_D - 1): 96 | input_downsampled = self.downsample(input_downsampled) 97 | return result 98 | -------------------------------------------------------------------------------- /network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/network/__init__.py -------------------------------------------------------------------------------- /online_preview.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./face_modules/') 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torch.nn.functional as F 6 | from face_modules.model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 7 | from network.AEI_Net import * 8 | from face_modules.mtcnn import * 9 | import cv2 10 | import PIL.Image as Image 11 | import numpy as np 12 | import glob 13 | import time 14 | 15 | from Xlib import display, X 16 | 17 | use_cuda_postprocess = True 18 | if use_cuda_postprocess: 19 | from cuda_postprocess import CudaPostprocess 20 | postprocesser = CudaPostprocess(256, 256) 21 | 22 | class Screen_Capture: 23 | def __init__(self, H, W): 24 | self.H = H 25 | self.W = W 26 | self.dsp = display.Display() 27 | self.root = self.dsp.screen().root 28 | self.actw = self.dsp.intern_atom('_NET_ACTIVE_WINDOW') 29 | self.ids = [] 30 | 31 | def read_frame(self): 32 | # W = self.W 33 | # H = self.H 34 | id = self.root.get_full_property(self.actw, X.AnyPropertyType).value[0] 35 | if len(self.ids) == 0: 36 | self.ids.append(id) 37 | return np.zeros([1,1,3]).astype(np.uint8) 38 | elif len(self.ids) == 1: 39 | if id == self.ids[0]: 40 | return np.zeros([1,1,3]).astype(np.uint8) 41 | else: 42 | self.ids.append(id) 43 | elif len(self.ids) == 2: 44 | if id != self.ids[1]: 45 | self.ids[0] = self.ids[1] 46 | self.ids[1] = id 47 | id = self.ids[0] 48 | focus = self.dsp.create_resource_object('window', id) 49 | geo = focus.get_geometry() 50 | H = geo.height 51 | W = geo.width 52 | raw = focus.get_image(0, 0, W, H, X.ZPixmap, 0xffffffff) 53 | image = Image.frombytes("RGB", (W, H), raw.data, "raw", "BGRX") 54 | return np.array(image) 55 | 56 | 57 | screen_capture = Screen_Capture(1080, 960) 58 | 59 | detector = MTCNN() 60 | device = torch.device('cuda') 61 | G = AEI_Net(c_id=512) 62 | G.eval() 63 | G.load_state_dict(torch.load('./saved_models/G_latest.pth', map_location=torch.device('cpu'))) 64 | G = G.cuda() 65 | 66 | arcface = Backbone(50, 0.6, 'ir_se').to(device) 67 | arcface.eval() 68 | arcface.load_state_dict(torch.load('./face_modules/model_ir_se50.pth', map_location=device), strict=False) 69 | 70 | test_transform = transforms.Compose([ 71 | transforms.ToTensor(), 72 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 73 | ]) 74 | 75 | jjy = glob.glob('/home/taotao/jjy/*.*g') 76 | 77 | Xs_paths = jjy 78 | Xs_raws = [cv2.imread(Xs_path) for Xs_path in Xs_paths] 79 | Xses = [] 80 | for Xs_raw in Xs_raws: 81 | try: 82 | Xs = detector.align(Image.fromarray(Xs_raw), crop_size=(256, 256)) 83 | Xs = test_transform(Xs) 84 | Xs = Xs.unsqueeze(0).cuda() 85 | Xses.append(Xs) 86 | except: 87 | continue 88 | Xses = torch.cat(Xses, dim=0) 89 | with torch.no_grad(): 90 | embeds, Xs_feats = arcface(F.interpolate(Xses[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 91 | embeds = embeds.mean(dim=0, keepdim=True) 92 | 93 | 94 | # files = glob.glob('./tmp/3/*.*g') 95 | # files.sort() 96 | ind = 0 97 | 98 | mask = np.zeros([256, 256], dtype=np.float) 99 | for i in range(256): 100 | for j in range(256): 101 | dist = np.sqrt((i-128)**2 + (j-128)**2)/128 102 | dist = np.minimum(dist, 1) 103 | mask[i, j] = 1-dist 104 | mask = cv2.dilate(mask, None, iterations=20) 105 | # for file in files[0:]: 106 | # print(file) 107 | # Xt_path = file 108 | # Xt_raw = cv2.imread(Xt_path) 109 | cv2.namedWindow('image')#, cv2.WND_PROP_FULLSCREEN) 110 | cv2.setWindowProperty('image', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) 111 | cv2.moveWindow('image', 0, 0) 112 | while True: 113 | try: 114 | Xt_raw = screen_capture.read_frame() 115 | Xt_raw = cv2.cvtColor(Xt_raw, cv2.COLOR_RGB2BGR) 116 | except: 117 | continue 118 | # try: 119 | Xt, trans_inv = detector.align_fully(Image.fromarray(Xt_raw), crop_size=(256, 256), 120 | return_trans_inv=True, ori=[0,3,1]) 121 | # except Exception as e: 122 | # print(e) 123 | # print('skip one frame') 124 | # cv2.imshow('image', Xt_raw) 125 | # cv2.imwrite('./write/%06d.jpg'%ind, Xt_raw) 126 | # ind += 1 127 | # cv2.waitKey(1) 128 | # continue 129 | 130 | if Xt is None: 131 | cv2.imshow('image', Xt_raw) 132 | # cv2.imwrite('./write/%06d.jpg'%ind, Xt_raw) 133 | ind += 1 134 | cv2.waitKey(1) 135 | print('skip one frame') 136 | continue 137 | 138 | # Xt_raw = np.array(Xt)[:, :, ::-1] 139 | # Xt_raw = Xt_raw.astype(np.float)/255.0 140 | 141 | Xt = test_transform(Xt) 142 | 143 | Xt = Xt.unsqueeze(0).cuda() 144 | with torch.no_grad(): 145 | # embeds = arcface(F.interpolate(Xs[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 146 | # embedt = arcface(F.interpolate(Xt[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 147 | st = time.time() 148 | Yt, _ = G(Xt, embeds) 149 | Yt = Yt.squeeze().detach().cpu().numpy() 150 | st = time.time() - st 151 | print(f'inference time: {st} sec') 152 | # Ys, _ = G(Xs, embedt) 153 | # Ys = Ys.squeeze().detach().cpu().numpy().transpose([1, 2, 0])*0.5 + 0.5 154 | if not use_cuda_postprocess: 155 | Yt = Yt.transpose([1, 2, 0])*0.5 + 0.5 156 | Yt = Yt 157 | Yt_trans_inv = cv2.warpAffine(Yt, trans_inv, (np.size(Xt_raw, 1), np.size(Xt_raw, 0)), borderValue=(0, 0, 0)) 158 | mask_ = cv2.warpAffine(mask,trans_inv, (np.size(Xt_raw, 1), np.size(Xt_raw, 0)), borderValue=(0, 0, 0)) 159 | mask_ = np.expand_dims(mask_, 2) 160 | Yt_trans_inv = mask_*Yt_trans_inv + (1-mask_)*(Xt_raw.astype(np.float)/255.) 161 | else: 162 | trans_inv = np.concatenate((trans_inv, np.array([0,0,1]).reshape(1, 3)), axis=0) 163 | trans = np.linalg.inv(trans_inv) 164 | trans = trans[:2, :] 165 | Yt_trans_inv = postprocesser.restore(Yt.copy(), mask, trans.copy(), Xt_raw, np.size(Xt_raw, 0), np.size(Xt_raw, 1)) 166 | 167 | merge = Yt_trans_inv 168 | 169 | cv2.imshow('image', merge) 170 | # cv2.imwrite('./write/%06d.jpg'%ind, merge*255) 171 | ind += 1 172 | cv2.waitKey(1) 173 | -------------------------------------------------------------------------------- /tmp_script/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taotaonice/FaceShifter/376b09e4ecc97848c07c585f173d10932880f961/tmp_script/__init__.py -------------------------------------------------------------------------------- /tmp_script/check_arcface_feature_map.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../Peppa_Pig_Face_Engine-master/') 3 | sys.path.append('../') 4 | from lib.core.api.facer import FaceAna 5 | 6 | from lib.core.headpose.pose import get_head_pose, line_pairs 7 | 8 | facer = FaceAna() 9 | 10 | import torch 11 | import torchvision.transforms as transforms 12 | from face_modules.model import Backbone 13 | from network.AEI_Net import * 14 | import cv2 15 | import PIL.Image as Image 16 | import numpy as np 17 | from face_modules.mtcnn_pytorch.src.align_trans import * 18 | 19 | 20 | device = torch.device('cuda') 21 | 22 | arcface = Backbone(50, 0.6, 'ir_se').to(device) 23 | arcface.eval() 24 | arcface.load_state_dict(torch.load('../face_modules/model_ir_se50.pth', map_location=device), strict=False) 25 | 26 | test_transform = transforms.Compose([ 27 | transforms.ColorJitter(0.1, 0.1, 0.1, 0.01), 28 | transforms.ToTensor(), 29 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 30 | ]) 31 | 32 | 33 | def get_f5p(landmarks): 34 | eye_left = landmarks[36:41].mean(axis=0) 35 | eye_right = landmarks[42:47].mean(axis=0) 36 | nose = landmarks[30] 37 | mouth_left = landmarks[48] 38 | mouth_right = landmarks[54] 39 | f5p = [[eye_left[0], eye_left[1]], 40 | [eye_right[0], eye_right[1]], 41 | [nose[0], nose[1]], 42 | [mouth_left[0], mouth_left[1]], 43 | [mouth_right[0], mouth_right[1]]] 44 | return f5p 45 | 46 | 47 | dn0 = '/home/taotao/Pictures/7e3e6709c93d70cf8cb965a4f6dcd100bba12bdc.jpeg' 48 | dn1 = '/home/taotao/Pictures/b3b7d0a20cf431ada7388e904536acaf2fdd98b5.jpg' 49 | dn2 = '/home/taotao/Pictures/u=3226707260,1696055340&fm=26&gp=0.jpg' 50 | 51 | ft0 = '/home/taotao/Pictures/9e3df8dcd100baa159770840d2289817c9fc2eab.jpeg' 52 | ft1 = '/home/taotao/Pictures/b999a9014c086e06eaeb811975825df20bd1cbb6.jpeg' 53 | ft2 = '/home/taotao/Pictures/b03533fa828ba61ebe7db556bb17ce0f314e59e4.png' 54 | 55 | ew0 = '/home/taotao/Pictures/u=670719782,34416986&fm=26&gp=0.jpg' 56 | ew1 = '/home/taotao/Pictures/u=1509480533,2094244881&fm=26&gp=0.jpg' 57 | 58 | sjl0 = '/home/taotao/Pictures/asdgsdasf.jpeg' 59 | sjl1 = '/home/taotao/Pictures/u=1912807554,30254209&fm=26&gp=0.jpg' 60 | sjl2 = '/home/taotao/Pictures/u=3322705847,3022779128&fm=26&gp=0.jpg' 61 | 62 | dlrb0 = '/home/taotao/dlrb/00013950.jpg' 63 | dlrb1 = '/home/taotao/matlabspace/dlrb/000092.jpg' 64 | dlrb2 = '/home/taotao/Pictures/Screenshot from 2020-02-15 14-46-32.png' 65 | 66 | fj0 = '/home/taotao/fj/00105490.jpg' 67 | fj1 = '/home/taotao/fj/00105566.jpg' 68 | fj2 = '/home/taotao/fj/00105548.jpg' 69 | 70 | A_path = dn0 71 | B_path = ft0 72 | faces = [cv2.imread(A_path), cv2.imread(B_path)] 73 | 74 | emb = [] 75 | feats = [] 76 | for face in faces: 77 | Xs_raw = face 78 | _, landmarks, _ = facer.run(Xs_raw) 79 | f5p = get_f5p(landmarks[0]) 80 | Xs = warp_and_crop_face(Xs_raw, f5p, reference_pts=get_reference_facial_points(default_square=True), crop_size=(256, 256)) 81 | cv2.imshow("", Xs) 82 | cv2.waitKey(0) 83 | Xs = Image.fromarray(Xs) 84 | Xs = test_transform(Xs) 85 | Xs = Xs.unsqueeze(0).cuda() 86 | 87 | with torch.no_grad(): 88 | embeds, Xs_feats = arcface(F.interpolate(Xs[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 89 | emb.append(embeds) 90 | feats.append(Xs_feats) 91 | 92 | emba, embb = emb[0], emb[1] 93 | # emba = emba.view(-1) 94 | # embb = embb.view(-1) 95 | 96 | print(f'embed norm diff: {(emba - embb).norm()}') 97 | print(f'cosine similarity loss: {1-torch.cosine_similarity(emba, embb)}') 98 | 99 | for i in range(len(feats[0])): 100 | fa = feats[0][i] 101 | fb = feats[1][i] 102 | 103 | fa = fa.view(-1) 104 | fb = fb.view(-1) 105 | 106 | print(f'layer {i} norm diff: {(fa-fb).norm()/fa.norm()} ' 107 | f'mean abs diff: {torch.abs(fa-fb).mean()}') 108 | 109 | -------------------------------------------------------------------------------- /tmp_script/test_structure_AEI.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | from network.AEI_Net import * 4 | from network.HEAR_Net import * 5 | from network.MultiscaleDiscriminator import * 6 | import torch.optim as opt 7 | import time 8 | from apex import amp 9 | 10 | device = torch.device('cuda:0') 11 | print(torch.backends.cudnn.benchmark) 12 | torch.backends.cudnn.benchmark = True 13 | # 14 | # c_dim = 512 15 | # net = AEI_Net(c_dim).cuda() 16 | # D = MultiscaleDiscriminator(3) 17 | # optm = opt.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) 18 | # net, optm = amp.initialize(net, optm, opt_level="O2") 19 | # 20 | # batch_size = 2 21 | # 22 | # z_id = torch.ones([batch_size, c_dim]).to(device) 23 | # x = torch.zeros([batch_size, 3, 256, 256]).to(device) 24 | # 25 | # while True: 26 | # st = time.time() 27 | # y = net(x, z_id)[0] 28 | # loss = y.mean() 29 | # with amp.scale_loss(loss, optm) as scaled_loss: 30 | # scaled_loss.backward() 31 | # attr = net.get_attr(x.half()) 32 | # st = time.time() - st 33 | # print(f'{st} sec') 34 | 35 | hearnet = HearNet() 36 | hearnet.to(device) 37 | hearnet.eval() 38 | batch_size = 1 39 | 40 | input = torch.zeros([batch_size, 6, 256, 256]).to(device) 41 | 42 | with torch.no_grad(): 43 | while True: 44 | st = time.time() 45 | Yst = hearnet(input) 46 | # Yst.mean().backward() 47 | st = time.time() - st 48 | print(st) 49 | -------------------------------------------------------------------------------- /train_AEI.py: -------------------------------------------------------------------------------- 1 | from network.AEI_Net import * 2 | from network.MultiscaleDiscriminator import * 3 | from utils.Dataset import FaceEmbed, With_Identity 4 | from torch.utils.data import DataLoader 5 | import torch.optim as optim 6 | from face_modules.model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 7 | import torch.nn.functional as F 8 | import torch 9 | import time 10 | import torchvision 11 | import cv2 12 | from apex import amp 13 | import visdom 14 | 15 | 16 | vis = visdom.Visdom(server='127.0.0.1', env='faceshifter', port=8099) 17 | batch_size = 16 18 | lr_G = 4e-4 19 | lr_D = 4e-4 20 | max_epoch = 2000 21 | show_step = 10 22 | save_epoch = 1 23 | model_save_path = './saved_models/' 24 | optim_level = 'O1' 25 | 26 | # fine_tune_with_identity = False 27 | 28 | device = torch.device('cuda') 29 | # torch.set_num_threads(12) 30 | 31 | G = AEI_Net(c_id=512).to(device) 32 | D = MultiscaleDiscriminator(input_nc=3, n_layers=6, norm_layer=torch.nn.InstanceNorm2d).to(device) 33 | G.train() 34 | D.train() 35 | 36 | arcface = Backbone(50, 0.6, 'ir_se').to(device) 37 | arcface.eval() 38 | arcface.load_state_dict(torch.load('./face_modules/model_ir_se50.pth', map_location=device), strict=False) 39 | 40 | opt_G = optim.Adam(G.parameters(), lr=lr_G, betas=(0, 0.999)) 41 | opt_D = optim.Adam(D.parameters(), lr=lr_D, betas=(0, 0.999)) 42 | 43 | G, opt_G = amp.initialize(G, opt_G, opt_level=optim_level) 44 | D, opt_D = amp.initialize(D, opt_D, opt_level=optim_level) 45 | 46 | try: 47 | G.load_state_dict(torch.load('./saved_models/G_latest.pth', map_location=torch.device('cpu')), strict=False) 48 | D.load_state_dict(torch.load('./saved_models/D_latest.pth', map_location=torch.device('cpu')), strict=False) 49 | except Exception as e: 50 | print(e) 51 | 52 | # if not fine_tune_with_identity: 53 | # dataset = FaceEmbed(['../celeb-aligned-256_0.85/', '../ffhq_256_0.85/', '../vgg_256_0.85/', '../stars_256_0.85/'], same_prob=0.5) 54 | # else: 55 | # dataset = With_Identity('../washed_img/', 0.8) 56 | dataset = FaceEmbed(['../celeb-aligned-256_0.85/', '../ffhq_256_0.85/', '../vgg_256_0.85/', '../stars_256_0.85/'], same_prob=0.8) 57 | 58 | dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True) 59 | 60 | 61 | MSE = torch.nn.MSELoss() 62 | L1 = torch.nn.L1Loss() 63 | 64 | 65 | def hinge_loss(X, positive=True): 66 | if positive: 67 | return torch.relu(1-X).mean() 68 | else: 69 | return torch.relu(X+1).mean() 70 | 71 | 72 | def get_grid_image(X): 73 | X = X[:8] 74 | X = torchvision.utils.make_grid(X.detach().cpu(), nrow=X.shape[0]) * 0.5 + 0.5 75 | return X 76 | 77 | 78 | def make_image(Xs, Xt, Y): 79 | Xs = get_grid_image(Xs) 80 | Xt = get_grid_image(Xt) 81 | Y = get_grid_image(Y) 82 | return torch.cat((Xs, Xt, Y), dim=1).numpy() 83 | 84 | 85 | # prior = torch.FloatTensor(cv2.imread('./prior.png', 0).astype(np.float)/255).to(device) 86 | 87 | print(torch.backends.cudnn.benchmark) 88 | #torch.backends.cudnn.benchmark = True 89 | for epoch in range(0, max_epoch): 90 | # torch.cuda.empty_cache() 91 | for iteration, data in enumerate(dataloader): 92 | start_time = time.time() 93 | Xs, Xt, same_person = data 94 | Xs = Xs.to(device) 95 | Xt = Xt.to(device) 96 | # embed = embed.to(device) 97 | with torch.no_grad(): 98 | embed, Xs_feats = arcface(F.interpolate(Xs[:, :, 19:237, 19:237], [112, 112], mode='bilinear', align_corners=True)) 99 | same_person = same_person.to(device) 100 | #diff_person = (1 - same_person) 101 | 102 | # train G 103 | opt_G.zero_grad() 104 | Y, Xt_attr = G(Xt, embed) 105 | 106 | Di = D(Y) 107 | L_adv = 0 108 | 109 | for di in Di: 110 | L_adv += hinge_loss(di[0], True) 111 | 112 | 113 | Y_aligned = Y[:, :, 19:237, 19:237] 114 | ZY, Y_feats = arcface(F.interpolate(Y_aligned, [112, 112], mode='bilinear', align_corners=True)) 115 | L_id =(1 - torch.cosine_similarity(embed, ZY, dim=1)).mean() 116 | 117 | Y_attr = G.get_attr(Y) 118 | L_attr = 0 119 | for i in range(len(Xt_attr)): 120 | L_attr += torch.mean(torch.pow(Xt_attr[i] - Y_attr[i], 2).reshape(batch_size, -1), dim=1).mean() 121 | L_attr /= 2.0 122 | 123 | L_rec = torch.sum(0.5 * torch.mean(torch.pow(Y - Xt, 2).reshape(batch_size, -1), dim=1) * same_person) / (same_person.sum() + 1e-6) 124 | 125 | lossG = 1*L_adv + 10*L_attr + 5*L_id + 10*L_rec 126 | # lossG = 1*L_adv + 10*L_attr + 5*L_id + 10*L_rec 127 | with amp.scale_loss(lossG, opt_G) as scaled_loss: 128 | scaled_loss.backward() 129 | 130 | # lossG.backward() 131 | opt_G.step() 132 | 133 | # train D 134 | opt_D.zero_grad() 135 | # with torch.no_grad(): 136 | # Y, _ = G(Xt, embed) 137 | fake_D = D(Y.detach()) 138 | loss_fake = 0 139 | for di in fake_D: 140 | loss_fake += hinge_loss(di[0], False) 141 | 142 | true_D = D(Xs) 143 | loss_true = 0 144 | for di in true_D: 145 | loss_true += hinge_loss(di[0], True) 146 | # true_score2 = D(Xt)[-1][0] 147 | 148 | lossD = 0.5*(loss_true.mean() + loss_fake.mean()) 149 | 150 | with amp.scale_loss(lossD, opt_D) as scaled_loss: 151 | scaled_loss.backward() 152 | # lossD.backward() 153 | opt_D.step() 154 | batch_time = time.time() - start_time 155 | if iteration % show_step == 0: 156 | image = make_image(Xs, Xt, Y) 157 | vis.image(image[::-1, :, :], opts={'title': 'result'}, win='result') 158 | cv2.imwrite('./gen_images/latest.jpg', image.transpose([1,2,0])) 159 | print(f'epoch: {epoch} {iteration} / {len(dataloader)}') 160 | print(f'lossD: {lossD.item()} lossG: {lossG.item()} batch_time: {batch_time}s') 161 | print(f'L_adv: {L_adv.item()} L_id: {L_id.item()} L_attr: {L_attr.item()} L_rec: {L_rec.item()}') 162 | if iteration % 1000 == 0: 163 | torch.save(G.state_dict(), './saved_models/G_latest.pth') 164 | torch.save(D.state_dict(), './saved_models/D_latest.pth') 165 | 166 | 167 | -------------------------------------------------------------------------------- /train_HEAR.py: -------------------------------------------------------------------------------- 1 | from network.AEI_Net import * 2 | from network.HEAR_Net import * 3 | from utils.Dataset import * 4 | from torch.utils.data import DataLoader 5 | import torch.optim as optim 6 | from face_modules.model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 7 | import torch.nn.functional as F 8 | import torch 9 | import time 10 | import numpy as np 11 | import torchvision 12 | import cv2 13 | from apex import amp 14 | import visdom 15 | 16 | 17 | vis = visdom.Visdom(server='127.0.0.1', env='faceshifter', port=8099) 18 | batch_size = 32 19 | lr = 4e-4 20 | max_epoch = 2000 21 | show_step = 10 22 | save_epoch = 1 23 | model_save_path = './saved_models/' 24 | optim_level = 'O1' 25 | 26 | device = torch.device('cuda') 27 | 28 | G = AEI_Net(c_id=512).to(device) 29 | G.eval() 30 | G.load_state_dict(torch.load('./saved_models/G_latest.pth', map_location=torch.device('cpu')), strict=True) 31 | 32 | net = HearNet() 33 | net.train() 34 | net.to(device) 35 | 36 | arcface = Backbone(50, 0.6, 'ir_se').to(device) 37 | arcface.eval() 38 | arcface.load_state_dict(torch.load('./face_modules/model_ir_se50.pth', map_location=device), strict=False) 39 | 40 | opt = optim.Adam(net.parameters(), lr=lr, betas=(0, 0.999)) 41 | 42 | net, opt = amp.initialize(net, opt, opt_level=optim_level) 43 | 44 | try: 45 | net.load_state_dict(torch.load('./saved_models/HEAR_latest.pth', map_location=torch.device('cpu')), strict=False) 46 | except Exception as e: 47 | print(e) 48 | 49 | dataset = AugmentedOcclusions('../hearnet_data', 50 | ['../ego_hands_png'], 51 | ['../shapenet_png'], same_prob=0.5) 52 | dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True) 53 | 54 | MSE = torch.nn.MSELoss() 55 | L1 = torch.nn.L1Loss() 56 | 57 | 58 | def get_numpy_image(X): 59 | X = X[:8] 60 | X = torchvision.utils.make_grid(X.detach().cpu(), nrow=X.shape[0]).numpy() * 0.5 + 0.5 61 | X = X.transpose([1,2,0])*255 62 | np.clip(X, 0, 255).astype(np.uint8) 63 | return X 64 | 65 | 66 | def make_image(Xs, Xt, Y): 67 | Xs = get_numpy_image(Xs) 68 | Xt = get_numpy_image(Xt) 69 | Y = get_numpy_image(Y) 70 | return np.concatenate((Xs, Xt, Y), axis=0).transpose([2, 0, 1]) 71 | 72 | print(torch.backends.cudnn.benchmark) 73 | #torch.backends.cudnn.benchmark = True 74 | for epoch in range(0, max_epoch): 75 | # torch.cuda.empty_cache() 76 | for iteration, data in enumerate(dataloader): 77 | start_time = time.time() 78 | Xs, Xt, same_person = data 79 | Xs = Xs.to(device) 80 | Xt = Xt.to(device) 81 | with torch.no_grad(): 82 | embed_s, _ = arcface(F.interpolate(Xs[:, :, 19:237, 19:237], [112, 112], mode='bilinear', align_corners=True)) 83 | embed_t, _ = arcface(F.interpolate(Xt[:, :, 19:237, 19:237], [112, 112], mode='bilinear', align_corners=True)) 84 | same_person = same_person.to(device) 85 | 86 | # train G 87 | opt.zero_grad() 88 | with torch.no_grad(): 89 | Yst_hat, _ = G(Xt, embed_s) 90 | Ytt, _ = G(Xt, embed_t) 91 | 92 | dYt = Xt - Ytt 93 | hear_input = torch.cat((Yst_hat, dYt), dim=1) 94 | Yst = net(hear_input) 95 | 96 | Yst_aligned = Yst[:, :, 19:237, 19:237] 97 | 98 | id_Yst, _ = arcface(F.interpolate(Yst_aligned, [112, 112], mode='bilinear', align_corners=True)) 99 | 100 | L_id =(1 - torch.cosine_similarity(embed_s, id_Yst, dim=1)).mean() 101 | 102 | L_chg = L1(Yst_hat, Yst) 103 | 104 | L_rec = torch.sum(0.5 * torch.mean(torch.pow(Yst - Xt, 2).reshape(batch_size, -1), dim=1) * same_person) / (same_person.sum() + 1e-6) 105 | 106 | loss = L_id + L_chg + L_rec 107 | with amp.scale_loss(loss, opt) as scaled_loss: 108 | scaled_loss.backward() 109 | 110 | # loss.backward() 111 | opt.step() 112 | 113 | batch_time = time.time() - start_time 114 | if iteration % show_step == 0: 115 | image = make_image(Xs, Xt, Yst) 116 | vis.image(image, opts={'title': 'HEAR'}, win='HEAR') 117 | cv2.imwrite('./gen_images/HEAR_latest.jpg', image.transpose([1,2,0])[:,:,::-1]) 118 | print(f'epoch: {epoch} {iteration} / {len(dataloader)}') 119 | print(f'loss: {loss.item()} batch_time: {batch_time}s') 120 | print(f'L_id: {L_id.item()} L_chg: {L_chg.item()} L_rec: {L_rec.item()}') 121 | if iteration % 1000 == 0: 122 | torch.save(net.state_dict(), './saved_models/HEAR_latest.pth') 123 | 124 | 125 | -------------------------------------------------------------------------------- /utils/Dataset.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import TensorDataset 2 | import torchvision.transforms as transforms 3 | from PIL import Image 4 | import glob 5 | import pickle 6 | import random 7 | import numpy as np 8 | import os 9 | import cv2 10 | 11 | 12 | class FaceEmbed(TensorDataset): 13 | def __init__(self, data_path_list, same_prob=0.8): 14 | datasets = [] 15 | # embeds = [] 16 | self.N = [] 17 | self.same_prob = same_prob 18 | for data_path in data_path_list: 19 | image_list = glob.glob(f'{data_path}/*.*g') 20 | datasets.append(image_list) 21 | self.N.append(len(image_list)) 22 | # with open(f'{data_path}/embed.pkl', 'rb') as f: 23 | # embed = pickle.load(f) 24 | # embeds.append(embed) 25 | self.datasets = datasets 26 | # self.embeds = embeds 27 | self.transforms = transforms.Compose([ 28 | transforms.ColorJitter(0.2, 0.2, 0.2, 0.01), 29 | transforms.ToTensor(), 30 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 31 | ]) 32 | 33 | def __getitem__(self, item): 34 | idx = 0 35 | while item >= self.N[idx]: 36 | item -= self.N[idx] 37 | idx += 1 38 | image_path = self.datasets[idx][item] 39 | name = os.path.split(image_path)[1] 40 | # embed = self.embeds[idx][name] 41 | Xs = cv2.imread(image_path) 42 | Xs = Image.fromarray(Xs) 43 | 44 | if random.random() > self.same_prob: 45 | image_path = random.choice(self.datasets[random.randint(0, len(self.datasets)-1)]) 46 | Xt = cv2.imread(image_path) 47 | Xt = Image.fromarray(Xt) 48 | same_person = 0 49 | else: 50 | Xt = Xs.copy() 51 | same_person = 1 52 | return self.transforms(Xs), self.transforms(Xt), same_person 53 | 54 | def __len__(self): 55 | return sum(self.N) 56 | 57 | 58 | # Deprecated 59 | class With_Identity(TensorDataset): 60 | def __init__(self, root_path, same_prob=0.8): 61 | self.root_path = root_path 62 | self.same_prob = same_prob 63 | self.classes = os.listdir(root_path) 64 | self.transforms = transforms.Compose([ 65 | transforms.ColorJitter(0.1, 0.1, 0.1, 0.01), 66 | transforms.Resize((256, 256)), 67 | transforms.ToTensor(), 68 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 69 | ]) 70 | 71 | def __getitem__(self, item): 72 | class_path = os.path.join(self.root_path, self.classes[item]) 73 | files = glob.glob(class_path + '/*.*g') 74 | N = len(files) 75 | order = [i for i in range(N)] 76 | random.shuffle(order) 77 | Xs = Image.fromarray(cv2.imread(files[order[0]])) 78 | if random.random() < self.same_prob: 79 | if len(order) == 1: 80 | order.append(order[0]) 81 | if random.random() < 0.5: 82 | order[1] = order[0] 83 | Xt = Image.fromarray(cv2.imread(files[order[1]])) 84 | return self.transforms(Xs), self.transforms(Xt), True 85 | else: 86 | other_class = random.randint(0, self.__len__()-1) 87 | class_path = os.path.join(self.root_path, 88 | self.classes[other_class]) 89 | files = glob.glob(class_path + '/*.*g') 90 | pick = random.choice(files) 91 | Xt = Image.fromarray(cv2.imread(pick)) 92 | return self.transforms(Xs), self.transforms(Xt), False 93 | 94 | def __len__(self): 95 | return len(self.classes) 96 | 97 | 98 | def compose_occlusion(face_img, occlusions): 99 | h, w, c = face_img.shape 100 | if len(occlusions) == 0: 101 | return face_img 102 | for occlusion in occlusions: 103 | # scale 104 | scale = random.random() * 0.5 + 0.5 105 | # occlusion = cv2.resize(occlusion, (), fx=scale, fy=scale) 106 | # rotate 107 | R = cv2.getRotationMatrix2D((occlusion.shape[0]/2, occlusion.shape[1]/2), random.random()*180-90, scale) 108 | occlusion = cv2.warpAffine(occlusion, R, (occlusion.shape[1], occlusion.shape[0])) 109 | oh, ow, _ = occlusion.shape 110 | oc_color = occlusion[:, :, :3] 111 | oc_alpha = occlusion[:, :, 3].astype(np.float) / 255. 112 | oc_alpha = np.expand_dims(oc_alpha, axis=2) 113 | tmp = np.zeros([h+oh, w+ow, c]) 114 | tmp[oh//2:oh//2+h, ow//2:ow//2+w, :] = face_img 115 | cx = random.randint(int(ow / 2) + 1, int(w + ow / 2) - 1) 116 | cy = random.randint(int(oh / 2) + 1, int(h + oh / 2) - 1) 117 | stx = cx - int(ow / 2) 118 | sty = cy - int(oh / 2) 119 | tmp[sty:sty+oh, stx:stx+ow, :] = oc_color * oc_alpha + tmp[sty:sty+oh, stx:stx+ow, :] * (1-oc_alpha) 120 | face_img = tmp[oh//2:oh//2+h, ow//2:ow//2+w, :].astype(np.uint8) 121 | return face_img 122 | 123 | 124 | class AugmentedOcclusions(TensorDataset): 125 | def __init__(self, face_img_root, hand_sets, obj_sets, same_prob=0.5): 126 | self.same_prob = same_prob 127 | hands_data = [] 128 | for hand_set_path in hand_sets: 129 | paths = glob.glob(hand_set_path + '/*.png') 130 | hands_data.extend(paths) 131 | self.hands_data = hands_data 132 | obj_data = [] 133 | for obj_set_path in obj_sets: 134 | paths = glob.glob(obj_set_path + '/*.png') 135 | obj_data.extend(paths) 136 | self.obj_data = obj_data 137 | 138 | self.face_img_paths = glob.glob(face_img_root + '/*.jpg') 139 | self.transforms = transforms.Compose([ 140 | transforms.ColorJitter(0.1, 0.1, 0.1, 0.01), 141 | transforms.ToTensor(), 142 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 143 | ]) 144 | 145 | def gen_occlusion(self): 146 | p = random.random() 147 | occlusions = [] 148 | if p < 0.25: # no occlusion 149 | pass 150 | elif p < 0.5: # only hand 151 | hand_img = cv2.imread(self.hands_data[random.randint(0, len(self.hands_data)-1)], cv2.IMREAD_UNCHANGED) 152 | occlusions.append(hand_img) 153 | elif p < 0.75: # only object 154 | obj_img = cv2.imread(self.obj_data[random.randint(0, len(self.obj_data)-1)], cv2.IMREAD_UNCHANGED) 155 | occlusions.append(obj_img) 156 | else: # both 157 | hand_img = cv2.imread(self.hands_data[random.randint(0, len(self.hands_data)-1)], cv2.IMREAD_UNCHANGED) 158 | occlusions.append(hand_img) 159 | obj_img = cv2.imread(self.obj_data[random.randint(0, len(self.obj_data)-1)], cv2.IMREAD_UNCHANGED) 160 | occlusions.append(obj_img) 161 | return occlusions 162 | 163 | def __getitem__(self, item): 164 | face_path = self.face_img_paths[item] 165 | face_img = cv2.imread(face_path) 166 | 167 | Xs = face_img 168 | p = random.random() 169 | if p > self.same_prob: 170 | Xt_path = self.face_img_paths[random.randint(0, len(self.face_img_paths)-1)] 171 | Xt = cv2.imread(Xt_path) 172 | Xt = compose_occlusion(Xt, self.gen_occlusion()) 173 | same_person = 0 174 | else: 175 | Xt = compose_occlusion(face_img, self.gen_occlusion()) 176 | same_person = 1 177 | return self.transforms(Image.fromarray(Xs)), self.transforms(Image.fromarray(Xt)), same_person 178 | 179 | def __len__(self): 180 | return len(self.face_img_paths) 181 | -------------------------------------------------------------------------------- /utils/download_vggface_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | import os 5 | import threading 6 | import socket 7 | import urllib.request 8 | 9 | timeout = 1 10 | socket.setdefaulttimeout(timeout) 11 | 12 | save_path = '/media/taotao/2T/vgg_face_dataset/' 13 | 14 | 15 | def download_and_save(url, savename): 16 | try: 17 | data = urllib.request.urlopen(url).read() 18 | fid = open(savename, 'w+b') 19 | fid.write(data) 20 | print("download succeed: " + url) 21 | fid.close() 22 | except IOError: 23 | print("download failed: " + url) 24 | 25 | 26 | def get_all_iamge(filename): 27 | fid = open(filename) 28 | name = filename.split('/')[-1] 29 | name = name[:-4] 30 | lines = fid.readlines() 31 | fid.close() 32 | for line in lines: 33 | line_split = line.split(' ') 34 | image_id = line_split[0] 35 | image_url = line_split[1] 36 | if not os.path.exists(f'{save_path}/' + name): 37 | os.mkdir(f'{save_path}/' + name) 38 | savefile = f'{save_path}/' + name + '/' + image_id + '.jpg' 39 | # The maxSize of Thread numberr:1000 40 | print(image_url, savefile) 41 | while True: 42 | if (len(threading.enumerate()) < 1000): 43 | break 44 | t = threading.Thread(target=download_and_save, args=(image_url, savefile,)) 45 | t.start() 46 | 47 | 48 | if __name__ == "__main__": 49 | fileDir = '/home/taotao/Downloads/vgg_face_dataset/files/' 50 | names = os.listdir(fileDir) 51 | for i in range(len(names)): 52 | get_all_iamge(os.path.join(fileDir, names[i])) 53 | -------------------------------------------------------------------------------- /utils/split_hearnet_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../') 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torch.nn.functional as F 6 | from face_modules.model import Backbone, Arcface, MobileFaceNet, Am_softmax, l2_norm 7 | from network.AEI_Net import * 8 | from face_modules.mtcnn import * 9 | import cv2 10 | import PIL.Image as Image 11 | import numpy as np 12 | import glob 13 | import time 14 | import os 15 | import shutil 16 | 17 | 18 | output_path = '../../hearnet_data/' 19 | os.makedirs(output_path, exist_ok=True) 20 | 21 | batch_size = 32 22 | 23 | device = torch.device('cuda') 24 | G = AEI_Net(c_id=512) 25 | G.eval() 26 | G.load_state_dict(torch.load('../saved_models/G_latest.pth', map_location=torch.device('cpu'))) 27 | G = G.cuda() 28 | 29 | arcface = Backbone(50, 0.6, 'ir_se').to(device) 30 | arcface.eval() 31 | arcface.load_state_dict(torch.load('../face_modules/model_ir_se50.pth', map_location=device), strict=False) 32 | 33 | test_transform = transforms.Compose([ 34 | transforms.ToTensor(), 35 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 36 | ]) 37 | 38 | data_roots = ['../../celeb-aligned-256_0.85/', '../../ffhq_256_0.85/', '../../vgg_256_0.85/', '../../stars_256_0.85/'] 39 | 40 | all_lists = [] 41 | for data_root in data_roots: 42 | img_lists = glob.glob(data_root + '/*.*g') 43 | all_lists.extend(img_lists) 44 | 45 | scores = [] 46 | torch.backends.cudnn.benchmark = True 47 | G = G.half() 48 | with torch.no_grad(): 49 | for idx in range(0, len(all_lists)-batch_size, batch_size): 50 | st = time.time() 51 | 52 | Xl = [] 53 | for i in range(batch_size): 54 | img_path = all_lists[idx + i] 55 | img = cv2.imread(img_path)[:,:,::-1] 56 | X = Image.fromarray(img) 57 | X = test_transform(X) 58 | X = X.unsqueeze(0) 59 | Xl.append(X) 60 | X = torch.cat(Xl, dim=0).cuda() 61 | embeds, _ = arcface(F.interpolate(X[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 62 | Yt, _ = G(X.half(), embeds.half()) 63 | Yt = Yt.float() 64 | HE = torch.abs(X - Yt).mean(dim=[1,2,3]) 65 | for i in range(batch_size): 66 | scores.append((all_lists[idx + i], HE[i].item())) 67 | st = time.time() - st 68 | print(f'{idx} / {len(all_lists)} time: {st}') 69 | 70 | 71 | def comp(x): 72 | return x[1] 73 | 74 | 75 | scores.sort(key=comp, reverse=True) 76 | N = len(scores) 77 | pick_num = int(N*0.1) 78 | scores = scores[:pick_num] 79 | 80 | ind = 0 81 | print('copying files...') 82 | for img_path, _ in scores: 83 | shutil.copyfile(img_path, output_path+'/%08d.jpg' % ind) 84 | ind += 1 85 | # test bug 86 | # img = cv2.imread(img_path)[:, :, ::-1] 87 | # X = Image.fromarray(img) 88 | # X = test_transform(X) 89 | # X = X.unsqueeze(0).cuda() 90 | # embeds, _ = arcface(F.interpolate(X[:, :, 19:237, 19:237], (112, 112), mode='bilinear', align_corners=True)) 91 | # Yt, _ = G(X, embeds) 92 | # X = X.cpu().numpy().transpose(2, 3, 1, 0).squeeze() 93 | # Yt = Yt.cpu().numpy().transpose(2, 3, 1, 0).squeeze() 94 | # X = (X*0.5)+0.5 95 | # Yt = (Yt*0.5)+0.5 96 | # show = np.concatenate((X, Yt), axis=1)[:,:,::-1] 97 | # cv2.imshow('show', show) 98 | # cv2.waitKey(0) 99 | print('done') 100 | --------------------------------------------------------------------------------