├── .gitignore
├── .gitmodules
├── BFM
    ├── BFM_exp_idx.mat
    ├── BFM_front_idx.mat
    ├── facemodel_info.mat
    ├── select_vertex_id.mat
    ├── similarity_Lm3D_all.mat
    └── std_exp.txt
├── LICENSE
├── SECURITY.md
├── data_loader.py
├── demo.py
├── face_decoder.py
├── images
    ├── albedo.png
    ├── alignment.png
    ├── camera.png
    ├── example.gif
    ├── example.png
    ├── extreme.png
    ├── lm3d.png
    └── lm5p.png
├── inception_resnet_v1.py
├── input
    ├── 000002.jpg
    ├── 000002.txt
    ├── 000006.jpg
    ├── 000006.txt
    ├── 000007.jpg
    ├── 000007.txt
    ├── 000031.jpg
    ├── 000031.txt
    ├── 000033.jpg
    ├── 000033.txt
    ├── 000037.jpg
    ├── 000037.txt
    ├── 000050.jpg
    ├── 000050.txt
    ├── 000055.jpg
    ├── 000055.txt
    ├── 000114.jpg
    ├── 000114.txt
    ├── 000125.jpg
    ├── 000125.txt
    ├── 000126.jpg
    ├── 000126.txt
    ├── 015259.jpg
    ├── 015259.txt
    ├── 015270.jpg
    ├── 015270.txt
    ├── 015309.jpg
    ├── 015309.txt
    ├── 015310.jpg
    ├── 015310.txt
    ├── 015316.jpg
    ├── 015316.txt
    ├── 015384.jpg
    ├── 015384.txt
    ├── vd006.png
    ├── vd006.txt
    ├── vd025.png
    ├── vd025.txt
    ├── vd026.png
    ├── vd026.txt
    ├── vd034.png
    ├── vd034.txt
    ├── vd051.png
    ├── vd051.txt
    ├── vd070.png
    ├── vd070.txt
    ├── vd092.png
    ├── vd092.txt
    ├── vd102.png
    └── vd102.txt
├── losses.py
├── networks.py
├── options.py
├── preprocess_img.py
├── readme.md
├── reconstruction_model.py
├── renderer
    └── __init__.py
├── skin.py
├── train.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tf_mesh_renderer"]
2 | 	path = tf_mesh_renderer
3 | 	url = https://github.com/google/tf_mesh_renderer.git
4 | 


--------------------------------------------------------------------------------
/BFM/BFM_exp_idx.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/BFM/BFM_exp_idx.mat


--------------------------------------------------------------------------------
/BFM/BFM_front_idx.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/BFM/BFM_front_idx.mat


--------------------------------------------------------------------------------
/BFM/facemodel_info.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/BFM/facemodel_info.mat


--------------------------------------------------------------------------------
/BFM/select_vertex_id.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/BFM/select_vertex_id.mat


--------------------------------------------------------------------------------
/BFM/similarity_Lm3D_all.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/BFM/similarity_Lm3D_all.mat


--------------------------------------------------------------------------------
/BFM/std_exp.txt:
--------------------------------------------------------------------------------
1 | 453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/data_loader.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.contrib.data import prefetch_to_device, shuffle_and_repeat, map_and_batch
 3 | import os
 4 | import glob
 5 | import numpy as np
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
 7 | ###############################################################################################
 8 | # data loader for training stage
 9 | ###############################################################################################
10 | def _parse_function(image_path,lm_path,mask_path):
11 | 
12 | 	# input image
13 | 	x = tf.read_file(image_path)
14 | 	img = tf.image.decode_png(x, channels=3)
15 | 	img = tf.cast(img,tf.float32)
16 | 	img = img[:,:,::-1]
17 | 
18 | 	# ground truth landmark
19 | 	x2 = tf.read_file(lm_path)
20 | 	lm = tf.decode_raw(x2,tf.float64)
21 | 	lm = tf.cast(lm,tf.float32)
22 | 	lm = tf.reshape(lm,[68,2])
23 | 
24 | 	# skin mask
25 | 	x3 = tf.read_file(mask_path)
26 | 	mask = tf.image.decode_png(x3, channels=3)
27 | 	mask = tf.cast(mask,tf.float32)
28 | 
29 | 	return img,lm,mask
30 | 
31 | def check_lm_bin(dataset,lm_path):
32 | 	if not os.path.isdir(os.path.join(dataset,'lm_bin')):
33 | 		os.makedirs(os.path.join(dataset,'lm_bin'))
34 | 		for i in range(len(lm_path)):
35 | 			lm = np.loadtxt(lm_path[i])
36 | 			lm = np.reshape(lm,[-1])
37 | 			lm.tofile(os.path.join(dataset,'lm_bin',lm_path[i].split('/')[-1].replace('txt','bin')))	
38 | 
39 | def load_dataset(opt,train=True):
40 | 	if train:
41 | 		data_path = opt.data_path
42 | 	else:
43 | 		data_path = opt.val_data_path
44 | 	image_path_all = []
45 | 	lm_path_all = []
46 | 	mask_path_all = []
47 | 
48 | 	for dataset in data_path:
49 | 		image_path = glob.glob(dataset + '/' + '*.png')
50 | 		image_path.sort()
51 | 		lm_path_ = [os.path.join(dataset,'lm',f.split('/')[-1].replace('png','txt')) for f in image_path]
52 | 		lm_path_.sort()
53 | 		mask_path = [os.path.join(dataset,'mask',f.split('/')[-1]) for f in image_path]
54 | 		mask_path.sort()
55 | 
56 | 		# check if landmark binary files exist
57 | 		check_lm_bin(dataset,lm_path_)
58 | 
59 | 		lm_path = [os.path.join(dataset,'lm_bin',f.split('/')[-1].replace('png','bin')) for f in image_path]
60 | 		lm_path.sort()
61 | 
62 | 		image_path_all += image_path
63 | 		mask_path_all += mask_path
64 | 		lm_path_all += lm_path
65 | 
66 | 	dataset_num = len(image_path_all)
67 | 
68 | 	dataset = tf.data.Dataset.from_tensor_slices((image_path_all,lm_path_all,mask_path_all))
69 | 	dataset = dataset. \
70 | 	apply(shuffle_and_repeat(dataset_num)). \
71 | 	apply(map_and_batch(_parse_function, opt.batch_size, num_parallel_batches=4, drop_remainder=True)). \
72 | 	apply(prefetch_to_device('/gpu:0', None)) # When using dataset.prefetch, use buffer_size=None to let it detect optimal buffer size
73 | 
74 | 	inputs_iterator = dataset.make_one_shot_iterator()
75 | 	return inputs_iterator
76 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf 
  2 | import numpy as np
  3 | from PIL import Image
  4 | import os
  5 | import glob
  6 | import platform
  7 | import argparse
  8 | from scipy.io import loadmat,savemat
  9 | 
 10 | from preprocess_img import align_img
 11 | from utils import *
 12 | from face_decoder import Face3D
 13 | from options import Option
 14 | 
 15 | is_windows = platform.system() == "Windows"
 16 | 
 17 | def parse_args():
 18 | 
 19 |     desc = "Deep3DFaceReconstruction"
 20 |     parser = argparse.ArgumentParser(description=desc)
 21 | 
 22 |     parser.add_argument('--pretrain_weights', type=str, default=None, help='path for pre-trained model')
 23 |     parser.add_argument('--use_pb', type=int, default=1, help='validation data folder')
 24 | 
 25 |     return parser.parse_args()
 26 | 
 27 | def restore_weights(sess,opt):
 28 | 	var_list = tf.trainable_variables()
 29 | 	g_list = tf.global_variables()
 30 | 
 31 | 	# add batch normalization params into trainable variables 
 32 | 	bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
 33 | 	bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
 34 | 	var_list +=bn_moving_vars
 35 | 
 36 | 	# create saver to save and restore weights
 37 | 	saver = tf.train.Saver(var_list = var_list)
 38 | 	saver.restore(sess,opt.pretrain_weights)
 39 | 
 40 | def demo():
 41 | 	# input and output folder
 42 | 	args = parse_args()
 43 | 
 44 | 	image_path = 'input'
 45 | 	save_path = 'output'	
 46 | 	if not os.path.exists(save_path):
 47 | 		os.makedirs(save_path)
 48 | 	img_list = glob.glob(image_path + '/' + '*.png')
 49 | 	img_list +=glob.glob(image_path + '/' + '*.jpg')
 50 | 
 51 | 	# read BFM face model
 52 | 	# transfer original BFM model to our model
 53 | 	if not os.path.isfile('./BFM/BFM_model_front.mat'):
 54 | 		transferBFM09()
 55 | 
 56 | 	# read standard landmarks for preprocessing images
 57 | 	lm3D = load_lm3d()
 58 | 	n = 0
 59 | 
 60 | 	# build reconstruction model
 61 | 	with tf.Graph().as_default() as graph:
 62 | 		
 63 | 		with tf.device('/cpu:0'):
 64 | 			opt = Option(is_train=False)
 65 | 		opt.batch_size = 1
 66 | 		opt.pretrain_weights = args.pretrain_weights
 67 | 		FaceReconstructor = Face3D()
 68 | 		images = tf.placeholder(name = 'input_imgs', shape = [opt.batch_size,224,224,3], dtype = tf.float32)
 69 | 
 70 | 		if args.use_pb and os.path.isfile('network/FaceReconModel.pb'):
 71 | 			print('Using pre-trained .pb file.')
 72 | 			graph_def = load_graph('network/FaceReconModel.pb')
 73 | 			tf.import_graph_def(graph_def,name='resnet',input_map={'input_imgs:0': images})
 74 | 			# output coefficients of R-Net (dim = 257) 
 75 | 			coeff = graph.get_tensor_by_name('resnet/coeff:0')
 76 | 		else:
 77 | 			print('Using pre-trained .ckpt file: %s'%opt.pretrain_weights)
 78 | 			import networks
 79 | 			coeff = networks.R_Net(images,is_training=False)
 80 | 
 81 | 		# reconstructing faces
 82 | 		FaceReconstructor.Reconstruction_Block(coeff,opt)
 83 | 		face_shape = FaceReconstructor.face_shape_t
 84 | 		face_texture = FaceReconstructor.face_texture
 85 | 		face_color = FaceReconstructor.face_color
 86 | 		landmarks_2d = FaceReconstructor.landmark_p
 87 | 		recon_img = FaceReconstructor.render_imgs
 88 | 		tri = FaceReconstructor.facemodel.face_buf
 89 | 
 90 | 
 91 | 		with tf.Session() as sess:
 92 | 			if not args.use_pb :
 93 | 				restore_weights(sess,opt)
 94 | 
 95 | 			print('reconstructing...')
 96 | 			for file in img_list:
 97 | 				n += 1
 98 | 				print(n)
 99 | 				# load images and corresponding 5 facial landmarks
100 | 				img,lm = load_img(file,file.replace('png','txt').replace('jpg','txt'))
101 | 				# preprocess input image
102 | 				input_img,lm_new,transform_params = align_img(img,lm,lm3D)
103 | 
104 | 				coeff_,face_shape_,face_texture_,face_color_,landmarks_2d_,recon_img_,tri_ = sess.run([coeff,\
105 | 					face_shape,face_texture,face_color,landmarks_2d,recon_img,tri],feed_dict = {images: input_img})
106 | 
107 | 
108 | 				# reshape outputs
109 | 				input_img = np.squeeze(input_img)
110 | 				face_shape_ = np.squeeze(face_shape_, (0))
111 | 				face_texture_ = np.squeeze(face_texture_, (0))
112 | 				face_color_ = np.squeeze(face_color_, (0))
113 | 				landmarks_2d_ = np.squeeze(landmarks_2d_, (0))
114 | 				if not is_windows:
115 | 					recon_img_ = np.squeeze(recon_img_, (0))
116 | 
117 | 				# save output files
118 | 				if not is_windows:
119 | 					savemat(os.path.join(save_path,file.split(os.path.sep)[-1].replace('.png','.mat').replace('jpg','mat')),{'cropped_img':input_img[:,:,::-1],'recon_img':recon_img_,'coeff':coeff_,\
120 | 						'face_shape':face_shape_,'face_texture':face_texture_,'face_color':face_color_,'lm_68p':landmarks_2d_,'lm_5p':lm_new})
121 | 				save_obj(os.path.join(save_path,file.split(os.path.sep)[-1].replace('.png','_mesh.obj').replace('.jpg','_mesh.obj')),face_shape_,tri_,np.clip(face_color_,0,255)/255) # 3D reconstruction face (in canonical view)
122 | 
123 | if __name__ == '__main__':
124 | 	demo()
125 | 


--------------------------------------------------------------------------------
/face_decoder.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf 
  2 | import math as m
  3 | import numpy as np
  4 | from scipy.io import loadmat
  5 | import platform
  6 | 
  7 | is_windows = platform.system() == "Windows"
  8 | 
  9 | if not is_windows:
 10 | 	from renderer import mesh_renderer
 11 | ###############################################################################################
 12 | # Reconstruct 3D face based on output coefficients and facemodel
 13 | ###############################################################################################
 14 | 
 15 | # BFM 3D face model
 16 | class BFM():
 17 | 	def __init__(self,model_path = './BFM/BFM_model_front.mat'):
 18 | 		model = loadmat(model_path)
 19 | 		self.meanshape = tf.constant(model['meanshape']) # mean face shape. [3*N,1]
 20 | 		self.idBase = tf.constant(model['idBase']) # identity basis. [3*N,80]
 21 | 		self.exBase = tf.constant(model['exBase'].astype(np.float32)) # expression basis. [3*N,64]
 22 | 		self.meantex = tf.constant(model['meantex']) # mean face texture. [3*N,1] (0-255)
 23 | 		self.texBase = tf.constant(model['texBase']) # texture basis. [3*N,80]
 24 | 		self.point_buf = tf.constant(model['point_buf']) # face indices for each vertex that lies in. starts from 1. [N,8]
 25 | 		self.face_buf = tf.constant(model['tri']) # vertex indices for each face. starts from 1. [F,3]
 26 | 		self.front_mask_render = tf.squeeze(tf.constant(model['frontmask2_idx'])) # vertex indices for small face region to compute photometric error. starts from 1.
 27 | 		self.mask_face_buf = tf.constant(model['tri_mask2']) # vertex indices for each face from small face region. starts from 1. [f,3]
 28 | 		self.skin_mask = tf.squeeze(tf.constant(model['skinmask'])) # vertex indices for pre-defined skin region to compute reflectance loss
 29 | 		self.keypoints = tf.squeeze(tf.constant(model['keypoints'])) # vertex indices for 68 landmarks. starts from 1. [68,1]
 30 | 
 31 | # Analytic 3D face
 32 | class Face3D():
 33 | 	def __init__(self):
 34 | 		facemodel = BFM()
 35 | 		self.facemodel = facemodel
 36 | 
 37 | 	# analytic 3D face reconstructions with coefficients from R-Net
 38 | 	def Reconstruction_Block(self,coeff,opt):
 39 | 		#coeff: [batchsize,257] reconstruction coefficients
 40 | 
 41 | 		id_coeff,ex_coeff,tex_coeff,angles,translation,gamma,camera_scale,f_scale = self.Split_coeff(coeff)
 42 | 		# [batchsize,N,3] canonical face shape in BFM space
 43 | 		face_shape = self.Shape_formation_block(id_coeff,ex_coeff,self.facemodel)
 44 | 		# [batchsize,N,3] vertex texture (in RGB order)
 45 | 		face_texture = self.Texture_formation_block(tex_coeff,self.facemodel)
 46 | 		# [batchsize,3,3] rotation matrix for face shape
 47 | 		rotation = self.Compute_rotation_matrix(angles)
 48 | 		# [batchsize,N,3] vertex normal
 49 | 		face_norm = self.Compute_norm(face_shape,self.facemodel)
 50 | 		norm_r = tf.matmul(face_norm,rotation)
 51 | 
 52 | 		# do rigid transformation for face shape using predicted rotation and translation
 53 | 		face_shape_t = self.Rigid_transform_block(face_shape,rotation,translation)
 54 | 		# compute 2d landmark projections 
 55 | 		# landmark_p: [batchsize,68,2]	
 56 | 		face_landmark_t = self.Compute_landmark(face_shape_t,self.facemodel)
 57 | 		landmark_p = self.Projection_block(face_landmark_t,camera_scale,f_scale)
 58 | 
 59 | 		# [batchsize,N,3] vertex color (in RGB order)
 60 | 		face_color = self.Illumination_block(face_texture, norm_r, gamma)
 61 | 
 62 | 		# reconstruction images and region masks for computing photometric loss		
 63 | 		render_imgs,img_mask,img_mask_crop = self.Render_block(face_shape_t,norm_r,face_color,camera_scale,f_scale,self.facemodel,opt.batch_size,opt.is_train)
 64 | 
 65 | 		self.id_coeff = id_coeff
 66 | 		self.ex_coeff = ex_coeff
 67 | 		self.tex_coeff = tex_coeff
 68 | 		self.f_scale = f_scale
 69 | 		self.gamma = gamma
 70 | 		self.face_shape = face_shape
 71 | 		self.face_shape_t = face_shape_t
 72 | 		self.face_texture = face_texture
 73 | 		self.face_color = face_color
 74 | 		self.landmark_p = landmark_p
 75 | 		self.render_imgs = render_imgs
 76 | 		self.img_mask = img_mask
 77 | 		self.img_mask_crop = img_mask_crop
 78 | 
 79 | 	#----------------------------------------------------------------------------------------------
 80 | 	def Split_coeff(self,coeff):
 81 | 
 82 | 		id_coeff = coeff[:,:80]
 83 | 		ex_coeff = coeff[:,80:144]
 84 | 		tex_coeff = coeff[:,144:224]
 85 | 		angles = coeff[:,224:227]
 86 | 		gamma = coeff[:,227:254]
 87 | 		translation = coeff[:,254:257]
 88 | 		camera_scale = tf.ones([tf.shape(coeff)[0],1])
 89 | 		f_scale = tf.ones([tf.shape(coeff)[0],1])
 90 | 
 91 | 		return id_coeff,ex_coeff,tex_coeff,angles,translation,gamma,camera_scale,f_scale
 92 | 
 93 | 	def Shape_formation_block(self,id_coeff,ex_coeff,facemodel):
 94 | 		face_shape = tf.einsum('ij,aj->ai',facemodel.idBase,id_coeff) + \
 95 | 					tf.einsum('ij,aj->ai',facemodel.exBase,ex_coeff) + facemodel.meanshape
 96 | 
 97 | 		# reshape face shape to [batchsize,N,3]
 98 | 		face_shape = tf.reshape(face_shape,[tf.shape(face_shape)[0],-1,3])
 99 | 		# re-centering the face shape with mean shape
100 | 		face_shape = face_shape - tf.reshape(tf.reduce_mean(tf.reshape(facemodel.meanshape,[-1,3]),0),[1,1,3])
101 | 
102 | 		return face_shape
103 | 
104 | 	def Compute_norm(self,face_shape,facemodel):
105 | 		shape = face_shape
106 | 		face_id = facemodel.face_buf
107 | 		point_id = facemodel.point_buf
108 | 
109 | 		# face_id and point_id index starts from 1
110 | 		face_id = tf.cast(face_id - 1,tf.int32)
111 | 		point_id = tf.cast(point_id - 1,tf.int32)
112 | 
113 | 		#compute normal for each face
114 | 		v1 = tf.gather(shape,face_id[:,0], axis = 1)
115 | 		v2 = tf.gather(shape,face_id[:,1], axis = 1)
116 | 		v3 = tf.gather(shape,face_id[:,2], axis = 1)
117 | 		e1 = v1 - v2
118 | 		e2 = v2 - v3
119 | 		face_norm = tf.cross(e1,e2)
120 | 
121 | 		face_norm = tf.nn.l2_normalize(face_norm, dim = 2) # normalized face_norm first
122 | 		face_norm = tf.concat([face_norm,tf.zeros([tf.shape(face_shape)[0],1,3])], axis = 1)
123 | 
124 | 		#compute normal for each vertex using one-ring neighborhood
125 | 		v_norm = tf.reduce_sum(tf.gather(face_norm, point_id, axis = 1), axis = 2)
126 | 		v_norm = tf.nn.l2_normalize(v_norm, dim = 2)
127 | 		
128 | 		return v_norm
129 | 
130 | 	def Texture_formation_block(self,tex_coeff,facemodel):
131 | 		face_texture = tf.einsum('ij,aj->ai',facemodel.texBase,tex_coeff) + facemodel.meantex
132 | 
133 | 		# reshape face texture to [batchsize,N,3], note that texture is in RGB order
134 | 		face_texture = tf.reshape(face_texture,[tf.shape(face_texture)[0],-1,3])
135 | 
136 | 		return face_texture
137 | 
138 | 	def Compute_rotation_matrix(self,angles):
139 | 		n_data = tf.shape(angles)[0]
140 | 
141 | 		# compute rotation matrix for X-axis, Y-axis, Z-axis respectively
142 | 		rotation_X = tf.concat([tf.ones([n_data,1]),
143 | 			tf.zeros([n_data,3]),
144 | 			tf.reshape(tf.cos(angles[:,0]),[n_data,1]),
145 | 			-tf.reshape(tf.sin(angles[:,0]),[n_data,1]),
146 | 			tf.zeros([n_data,1]),
147 | 			tf.reshape(tf.sin(angles[:,0]),[n_data,1]),
148 | 			tf.reshape(tf.cos(angles[:,0]),[n_data,1])],
149 | 			axis = 1
150 | 			)
151 | 
152 | 		rotation_Y = tf.concat([tf.reshape(tf.cos(angles[:,1]),[n_data,1]),
153 | 			tf.zeros([n_data,1]),
154 | 			tf.reshape(tf.sin(angles[:,1]),[n_data,1]),
155 | 			tf.zeros([n_data,1]),
156 | 			tf.ones([n_data,1]),
157 | 			tf.zeros([n_data,1]),
158 | 			-tf.reshape(tf.sin(angles[:,1]),[n_data,1]),
159 | 			tf.zeros([n_data,1]),
160 | 			tf.reshape(tf.cos(angles[:,1]),[n_data,1])],
161 | 			axis = 1
162 | 			)
163 | 
164 | 		rotation_Z = tf.concat([tf.reshape(tf.cos(angles[:,2]),[n_data,1]),
165 | 			-tf.reshape(tf.sin(angles[:,2]),[n_data,1]),
166 | 			tf.zeros([n_data,1]),
167 | 			tf.reshape(tf.sin(angles[:,2]),[n_data,1]),
168 | 			tf.reshape(tf.cos(angles[:,2]),[n_data,1]),
169 | 			tf.zeros([n_data,3]),
170 | 			tf.ones([n_data,1])],
171 | 			axis = 1
172 | 			)
173 | 
174 | 		rotation_X = tf.reshape(rotation_X,[n_data,3,3])
175 | 		rotation_Y = tf.reshape(rotation_Y,[n_data,3,3])
176 | 		rotation_Z = tf.reshape(rotation_Z,[n_data,3,3])
177 | 
178 | 		# R = RzRyRx
179 | 		rotation = tf.matmul(tf.matmul(rotation_Z,rotation_Y),rotation_X)
180 | 
181 | 		rotation = tf.transpose(rotation, perm = [0,2,1])
182 | 
183 | 		return rotation
184 | 
185 | 	def Projection_block(self,face_shape,camera_scale,f_scale):
186 | 
187 | 		# pre-defined camera focal for pespective projection
188 | 		focal = tf.constant(1015.0)
189 | 		focal = focal*f_scale
190 | 		focal = tf.reshape(focal,[-1,1])
191 | 		batchsize = tf.shape(focal)[0]
192 | 
193 | 		# define camera position
194 | 		camera_pos = tf.reshape(tf.constant([0.0,0.0,10.0]),[1,1,3])*tf.reshape(camera_scale,[-1,1,1])
195 | 		reverse_z = tf.tile(tf.reshape(tf.constant([1.0,0,0,0,1,0,0,0,-1.0]),[1,3,3]),[tf.shape(face_shape)[0],1,1])
196 | 
197 | 		# compute projection matrix
198 | 		p_matrix = tf.concat([focal,tf.zeros([batchsize,1]),112.*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),focal,112.*tf.ones([batchsize,1]),tf.zeros([batchsize,2]),tf.ones([batchsize,1])],axis = 1)
199 | 		p_matrix = tf.reshape(p_matrix,[-1,3,3])
200 | 
201 | 		# convert z in world space to the distance to camera
202 | 		face_shape = tf.matmul(face_shape,reverse_z) + camera_pos
203 | 		aug_projection = tf.matmul(face_shape,tf.transpose(p_matrix,[0,2,1]))
204 | 
205 | 		# [batchsize, N,2] 2d face projection
206 | 		face_projection = aug_projection[:,:,0:2]/tf.reshape(aug_projection[:,:,2],[tf.shape(face_shape)[0],tf.shape(aug_projection)[1],1])
207 | 
208 | 
209 | 		return face_projection
210 | 
211 | 
212 | 	def Compute_landmark(self,face_shape,facemodel):
213 | 
214 | 		# compute 3D landmark postitions with pre-computed 3D face shape
215 | 		keypoints_idx = facemodel.keypoints
216 | 		keypoints_idx = tf.cast(keypoints_idx - 1,tf.int32)
217 | 		face_landmark = tf.gather(face_shape,keypoints_idx,axis = 1)
218 | 
219 | 		return face_landmark
220 | 
221 | 	def Illumination_block(self,face_texture,norm_r,gamma):
222 | 		n_data = tf.shape(gamma)[0]
223 | 		n_point = tf.shape(norm_r)[1]
224 | 		gamma = tf.reshape(gamma,[n_data,3,9])
225 | 		# set initial lighting with an ambient lighting
226 | 		init_lit = tf.constant([0.8,0,0,0,0,0,0,0,0])
227 | 		gamma = gamma + tf.reshape(init_lit,[1,1,9])
228 | 
229 | 		# compute vertex color using SH function approximation
230 | 		a0 = m.pi 
231 | 		a1 = 2*m.pi/tf.sqrt(3.0)
232 | 		a2 = 2*m.pi/tf.sqrt(8.0)
233 | 		c0 = 1/tf.sqrt(4*m.pi)
234 | 		c1 = tf.sqrt(3.0)/tf.sqrt(4*m.pi)
235 | 		c2 = 3*tf.sqrt(5.0)/tf.sqrt(12*m.pi)
236 | 
237 | 		Y = tf.concat([tf.tile(tf.reshape(a0*c0,[1,1,1]),[n_data,n_point,1]),
238 | 			tf.expand_dims(-a1*c1*norm_r[:,:,1],2),
239 | 			tf.expand_dims(a1*c1*norm_r[:,:,2],2),
240 | 			tf.expand_dims(-a1*c1*norm_r[:,:,0],2),
241 | 			tf.expand_dims(a2*c2*norm_r[:,:,0]*norm_r[:,:,1],2),
242 | 			tf.expand_dims(-a2*c2*norm_r[:,:,1]*norm_r[:,:,2],2),
243 | 			tf.expand_dims(a2*c2*0.5/tf.sqrt(3.0)*(3*tf.square(norm_r[:,:,2])-1),2),
244 | 			tf.expand_dims(-a2*c2*norm_r[:,:,0]*norm_r[:,:,2],2),
245 | 			tf.expand_dims(a2*c2*0.5*(tf.square(norm_r[:,:,0])-tf.square(norm_r[:,:,1])),2)],axis = 2)
246 | 
247 | 		color_r = tf.squeeze(tf.matmul(Y,tf.expand_dims(gamma[:,0,:],2)),axis = 2)
248 | 		color_g = tf.squeeze(tf.matmul(Y,tf.expand_dims(gamma[:,1,:],2)),axis = 2)
249 | 		color_b = tf.squeeze(tf.matmul(Y,tf.expand_dims(gamma[:,2,:],2)),axis = 2)
250 | 
251 | 		#[batchsize,N,3] vertex color in RGB order
252 | 		face_color = tf.stack([color_r*face_texture[:,:,0],color_g*face_texture[:,:,1],color_b*face_texture[:,:,2]],axis = 2)
253 | 
254 | 		return face_color
255 | 
256 | 	def Rigid_transform_block(self,face_shape,rotation,translation):
257 | 		# do rigid transformation for 3D face shape
258 | 		face_shape_r = tf.matmul(face_shape,rotation)
259 | 		face_shape_t = face_shape_r + tf.reshape(translation,[tf.shape(face_shape)[0],1,3])
260 | 
261 | 		return face_shape_t
262 | 
263 | 	def Render_block(self,face_shape,face_norm,face_color,camera_scale,f_scale,facemodel,batchsize,is_train=True):
264 | 		if is_train and is_windows:
265 | 			raise ValueError('Not support training with Windows environment.')
266 | 
267 | 		if is_windows:
268 | 			return [],[],[]
269 | 
270 | 		# render reconstruction images 
271 | 		n_vex = int(facemodel.idBase.shape[0].value/3)
272 | 		fov_y = 2*tf.atan(112./(1015.*f_scale))*180./m.pi
273 | 		fov_y = tf.reshape(fov_y,[batchsize])
274 | 		# full face region
275 | 		face_shape = tf.reshape(face_shape,[batchsize,n_vex,3])
276 | 		face_norm = tf.reshape(face_norm,[batchsize,n_vex,3])
277 | 		face_color = tf.reshape(face_color,[batchsize,n_vex,3])
278 | 
279 | 		# pre-defined cropped face region
280 | 		mask_face_shape = tf.gather(face_shape,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
281 | 		mask_face_norm = tf.gather(face_norm,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
282 | 		mask_face_color = tf.gather(face_color,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
283 | 
284 | 		# setting cammera settings
285 | 		camera_position = tf.constant([[0,0,10.0]])*tf.reshape(camera_scale,[-1,1])
286 | 		camera_lookat = tf.constant([0,0,0.0])
287 | 		camera_up = tf.constant([0,1.0,0])
288 | 
289 | 		# setting light source position(intensities are set to 0 because we have computed the vertex color)
290 | 		light_positions = tf.tile(tf.reshape(tf.constant([0,0,1e5]),[1,1,3]),[batchsize,1,1])
291 | 		light_intensities = tf.tile(tf.reshape(tf.constant([0.0,0.0,0.0]),[1,1,3]),[batchsize,1,1])
292 | 		ambient_color = tf.tile(tf.reshape(tf.constant([1.0,1,1]),[1,3]),[batchsize,1])
293 | 
294 | 		#using tf_mesh_renderer for rasterization (https://github.com/google/tf_mesh_renderer)
295 | 		# img: [batchsize,224,224,3] images in RGB order (0-255)
296 | 		# mask:[batchsize,224,224,1] transparency for img ({0,1} value)
297 | 		with tf.device('/cpu:0'):
298 | 			img_rgba = mesh_renderer.mesh_renderer(face_shape,
299 | 				tf.cast(facemodel.face_buf-1,tf.int32),
300 | 				face_norm,
301 | 				face_color,
302 | 				camera_position = camera_position,
303 | 				camera_lookat = camera_lookat,
304 | 				camera_up = camera_up,
305 | 				light_positions = light_positions,
306 | 				light_intensities = light_intensities,
307 | 				image_width = 224,
308 | 				image_height = 224,
309 | 				fov_y = fov_y,
310 | 				near_clip = 0.01,
311 | 				far_clip = 50.0,
312 | 				ambient_color = ambient_color)
313 | 
314 | 		img = img_rgba[:,:,:,:3]
315 | 		mask = img_rgba[:,:,:,3:]
316 | 
317 | 		img = tf.cast(img[:,:,:,::-1],tf.float32) #transfer RGB to BGR
318 | 		mask = tf.cast(mask,tf.float32) # full face region
319 | 
320 | 		if is_train:
321 | 			# compute mask for small face region
322 | 			with tf.device('/cpu:0'):
323 | 				img_crop_rgba = mesh_renderer.mesh_renderer(mask_face_shape,
324 | 					tf.cast(facemodel.mask_face_buf-1,tf.int32),
325 | 					mask_face_norm,
326 | 					mask_face_color,
327 | 					camera_position = camera_position,
328 | 					camera_lookat = camera_lookat,
329 | 					camera_up = camera_up,
330 | 					light_positions = light_positions,
331 | 					light_intensities = light_intensities,
332 | 					image_width = 224,
333 | 					image_height = 224,
334 | 					fov_y = fov_y,
335 | 					near_clip = 0.01,
336 | 					far_clip = 50.0,
337 | 					ambient_color = ambient_color)
338 | 
339 | 			mask_f = img_crop_rgba[:,:,:,3:]
340 | 			mask_f = tf.cast(mask_f,tf.float32) # small face region
341 | 			return img,mask,mask_f
342 | 
343 | 		img_rgba = tf.cast(tf.clip_by_value(img_rgba,0,255),tf.float32)
344 | 
345 | 		return img_rgba,mask,mask
346 | 


--------------------------------------------------------------------------------
/images/albedo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/albedo.png


--------------------------------------------------------------------------------
/images/alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/alignment.png


--------------------------------------------------------------------------------
/images/camera.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/camera.png


--------------------------------------------------------------------------------
/images/example.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/example.gif


--------------------------------------------------------------------------------
/images/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/example.png


--------------------------------------------------------------------------------
/images/extreme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/extreme.png


--------------------------------------------------------------------------------
/images/lm3d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/lm3d.png


--------------------------------------------------------------------------------
/images/lm5p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/images/lm5p.png


--------------------------------------------------------------------------------
/inception_resnet_v1.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Contains the definition of the Inception Resnet V1 architecture.
 17 | As described in http://arxiv.org/abs/1602.07261.
 18 |   Inception-v4, Inception-ResNet and the Impact of Residual Connections
 19 |     on Learning
 20 |   Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
 21 | """
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | 
 26 | import tensorflow as tf
 27 | import tensorflow.contrib.slim as slim
 28 | 
 29 | 
 30 | # Inception-Resnet-A
 31 | def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 32 |     """Builds the 35x35 resnet block."""
 33 |     with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
 34 |         with tf.variable_scope('Branch_0'):
 35 |             tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
 36 |         with tf.variable_scope('Branch_1'):
 37 |             tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
 38 |             tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
 39 |         with tf.variable_scope('Branch_2'):
 40 |             tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
 41 |             tower_conv2_1 = slim.conv2d(tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3')
 42 |             tower_conv2_2 = slim.conv2d(tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3')
 43 |         mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3)
 44 |         up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 45 |                          activation_fn=None, scope='Conv2d_1x1')
 46 |         net += scale * up
 47 |         if activation_fn:
 48 |             net = activation_fn(net)
 49 |     return net
 50 | 
 51 | # Inception-Resnet-B
 52 | def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 53 |     """Builds the 17x17 resnet block."""
 54 |     with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
 55 |         with tf.variable_scope('Branch_0'):
 56 |             tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1')
 57 |         with tf.variable_scope('Branch_1'):
 58 |             tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
 59 |             tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7],
 60 |                                         scope='Conv2d_0b_1x7')
 61 |             tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1],
 62 |                                         scope='Conv2d_0c_7x1')
 63 |         mixed = tf.concat([tower_conv, tower_conv1_2], 3)
 64 |         up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 65 |                          activation_fn=None, scope='Conv2d_1x1')
 66 |         net += scale * up
 67 |         if activation_fn:
 68 |             net = activation_fn(net)
 69 |     return net
 70 | 
 71 | 
 72 | # Inception-Resnet-C
 73 | def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 74 |     """Builds the 8x8 resnet block."""
 75 |     with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
 76 |         with tf.variable_scope('Branch_0'):
 77 |             tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
 78 |         with tf.variable_scope('Branch_1'):
 79 |             tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
 80 |             tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3],
 81 |                                         scope='Conv2d_0b_1x3')
 82 |             tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1],
 83 |                                         scope='Conv2d_0c_3x1')
 84 |         mixed = tf.concat([tower_conv, tower_conv1_2], 3)
 85 |         up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 86 |                          activation_fn=None, scope='Conv2d_1x1')
 87 |         net += scale * up
 88 |         if activation_fn:
 89 |             net = activation_fn(net)
 90 |     return net
 91 |   
 92 | def reduction_a(net, k, l, m, n):
 93 |     with tf.variable_scope('Branch_0'):
 94 |         tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID',
 95 |                                  scope='Conv2d_1a_3x3')
 96 |     with tf.variable_scope('Branch_1'):
 97 |         tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')
 98 |         tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3,
 99 |                                     scope='Conv2d_0b_3x3')
100 |         tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3,
101 |                                     stride=2, padding='VALID',
102 |                                     scope='Conv2d_1a_3x3')
103 |     with tf.variable_scope('Branch_2'):
104 |         tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
105 |                                      scope='MaxPool_1a_3x3')
106 |     net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
107 |     return net
108 | 
109 | def reduction_b(net):
110 |     with tf.variable_scope('Branch_0'):
111 |         tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
112 |         tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
113 |                                    padding='VALID', scope='Conv2d_1a_3x3')
114 |     with tf.variable_scope('Branch_1'):
115 |         tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
116 |         tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2,
117 |                                     padding='VALID', scope='Conv2d_1a_3x3')
118 |     with tf.variable_scope('Branch_2'):
119 |         tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
120 |         tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3,
121 |                                     scope='Conv2d_0b_3x3')
122 |         tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2,
123 |                                     padding='VALID', scope='Conv2d_1a_3x3')
124 |     with tf.variable_scope('Branch_3'):
125 |         tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
126 |                                      scope='MaxPool_1a_3x3')
127 |     net = tf.concat([tower_conv_1, tower_conv1_1,
128 |                         tower_conv2_2, tower_pool], 3)
129 |     return net
130 |   
131 | def inference(images, keep_probability, phase_train=True, 
132 |               bottleneck_layer_size=128, weight_decay=0.0, reuse=None):
133 |     batch_norm_params = {
134 |         # Decay for the moving averages.
135 |         'decay': 0.995,
136 |         # epsilon to prevent 0s in variance.
137 |         'epsilon': 0.001,
138 |         # force in-place updates of mean and variance estimates
139 |         'updates_collections': None,
140 |         # Moving averages ends up in the trainable variables collection
141 |         'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
142 |     }
143 |     
144 |     with slim.arg_scope([slim.conv2d, slim.fully_connected],
145 |                         weights_initializer=slim.initializers.xavier_initializer(), 
146 |                         weights_regularizer=slim.l2_regularizer(weight_decay),
147 |                         normalizer_fn=slim.batch_norm,
148 |                         normalizer_params=batch_norm_params):
149 |         return inception_resnet_v1(images, is_training=phase_train,
150 |               dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse)
151 | 
152 | 
153 | def inception_resnet_v1(inputs, is_training=True,
154 |                         dropout_keep_prob=0.8,
155 |                         bottleneck_layer_size=128,
156 |                         reuse=None, 
157 |                         scope='InceptionResnetV1'):
158 |     """Creates the Inception Resnet V1 model.
159 |     Args:
160 |       inputs: a 4-D tensor of size [batch_size, height, width, 3].
161 |       num_classes: number of predicted classes.
162 |       is_training: whether is training or not.
163 |       dropout_keep_prob: float, the fraction to keep before final layer.
164 |       reuse: whether or not the network and its variables should be reused. To be
165 |         able to reuse 'scope' must be given.
166 |       scope: Optional variable_scope.
167 |     Returns:
168 |       logits: the logits outputs of the model.
169 |       end_points: the set of end_points from the inception model.
170 |     """
171 |     end_points = {}
172 |   
173 |     with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
174 |         with slim.arg_scope([slim.batch_norm, slim.dropout],
175 |                             is_training=is_training):
176 |             with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
177 |                                 stride=1, padding='SAME'):
178 |       
179 |                 # 149 x 149 x 32
180 |                 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
181 |                                   scope='Conv2d_1a_3x3')
182 |                 end_points['Conv2d_1a_3x3'] = net
183 |                 # 147 x 147 x 32
184 |                 net = slim.conv2d(net, 32, 3, padding='VALID',
185 |                                   scope='Conv2d_2a_3x3')
186 |                 end_points['Conv2d_2a_3x3'] = net
187 |                 # 147 x 147 x 64
188 |                 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
189 |                 end_points['Conv2d_2b_3x3'] = net
190 |                 # 73 x 73 x 64
191 |                 net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
192 |                                       scope='MaxPool_3a_3x3')
193 |                 end_points['MaxPool_3a_3x3'] = net
194 |                 # 73 x 73 x 80
195 |                 net = slim.conv2d(net, 80, 1, padding='VALID',
196 |                                   scope='Conv2d_3b_1x1')
197 |                 end_points['Conv2d_3b_1x1'] = net
198 |                 # 71 x 71 x 192
199 |                 net = slim.conv2d(net, 192, 3, padding='VALID',
200 |                                   scope='Conv2d_4a_3x3')
201 |                 end_points['Conv2d_4a_3x3'] = net
202 |                 # 35 x 35 x 256
203 |                 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID',
204 |                                   scope='Conv2d_4b_3x3')
205 |                 end_points['Conv2d_4b_3x3'] = net
206 |                 
207 |                 # 5 x Inception-resnet-A
208 |                 net = slim.repeat(net, 5, block35, scale=0.17)
209 |                 end_points['Mixed_5a'] = net
210 |         
211 |                 # Reduction-A
212 |                 with tf.variable_scope('Mixed_6a'):
213 |                     net = reduction_a(net, 192, 192, 256, 384)
214 |                 end_points['Mixed_6a'] = net
215 |                 
216 |                 # 10 x Inception-Resnet-B
217 |                 net = slim.repeat(net, 10, block17, scale=0.10)
218 |                 end_points['Mixed_6b'] = net
219 |                 
220 |                 # Reduction-B
221 |                 with tf.variable_scope('Mixed_7a'):
222 |                     net = reduction_b(net)
223 |                 end_points['Mixed_7a'] = net
224 |                 
225 |                 # 5 x Inception-Resnet-C
226 |                 net = slim.repeat(net, 5, block8, scale=0.20)
227 |                 end_points['Mixed_8a'] = net
228 |                 
229 |                 net = block8(net, activation_fn=None)
230 |                 end_points['Mixed_8b'] = net
231 |                 
232 |                 with tf.variable_scope('Logits'):
233 |                     end_points['PrePool'] = net
234 |                     #pylint: disable=no-member
235 |                     net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
236 |                                           scope='AvgPool_1a_8x8')
237 |                     net = slim.flatten(net)
238 |           
239 |                     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
240 |                                        scope='Dropout')
241 |           
242 |                     end_points['PreLogitsFlatten'] = net
243 |                 
244 |                 net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, 
245 |                         scope='Bottleneck', reuse=False)
246 |   
247 |     return net, end_points
248 | 


--------------------------------------------------------------------------------
/input/000002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000002.jpg


--------------------------------------------------------------------------------
/input/000002.txt:
--------------------------------------------------------------------------------
1 | 142.84	207.18
2 | 222.02	203.9
3 | 159.24	253.57
4 | 146.59	290.93
5 | 227.52	284.74
6 | 


--------------------------------------------------------------------------------
/input/000006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000006.jpg


--------------------------------------------------------------------------------
/input/000006.txt:
--------------------------------------------------------------------------------
1 | 199.93	158.28
2 | 255.34	166.54
3 | 236.08	198.92
4 | 198.83	229.24
5 | 245.23	234.52
6 | 


--------------------------------------------------------------------------------
/input/000007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000007.jpg


--------------------------------------------------------------------------------
/input/000007.txt:
--------------------------------------------------------------------------------
1 | 129.36	198.28
2 | 204.47	191.47
3 | 164.42	240.51
4 | 140.74	277.77
5 | 205.4	270.9
6 | 


--------------------------------------------------------------------------------
/input/000031.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000031.jpg


--------------------------------------------------------------------------------
/input/000031.txt:
--------------------------------------------------------------------------------
1 | 151.23	240.71
2 | 274.05	235.52
3 | 217.37	305.99
4 | 158.03	346.06
5 | 272.17	341.09
6 | 


--------------------------------------------------------------------------------
/input/000033.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000033.jpg


--------------------------------------------------------------------------------
/input/000033.txt:
--------------------------------------------------------------------------------
1 | 119.09	94.291
2 | 158.31	96.472
3 | 136.76	121.4
4 | 119.33	134.49
5 | 154.66	136.68
6 | 


--------------------------------------------------------------------------------
/input/000037.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000037.jpg


--------------------------------------------------------------------------------
/input/000037.txt:
--------------------------------------------------------------------------------
1 | 147.37	159.39
2 | 196.94	163.26
3 | 190.68	194.36
4 | 153.72	228.44
5 | 193.94	229.7
6 | 


--------------------------------------------------------------------------------
/input/000050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000050.jpg


--------------------------------------------------------------------------------
/input/000050.txt:
--------------------------------------------------------------------------------
1 | 150.4	94.799
2 | 205.14	102.07
3 | 179.54	131.16
4 | 144.45	147.42
5 | 193.39	154.14
6 | 


--------------------------------------------------------------------------------
/input/000055.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000055.jpg


--------------------------------------------------------------------------------
/input/000055.txt:
--------------------------------------------------------------------------------
1 | 114.26	193.42
2 | 205.8	190.27
3 | 154.15	244.02
4 | 124.69	295.22
5 | 200.88	292.69
6 | 


--------------------------------------------------------------------------------
/input/000114.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000114.jpg


--------------------------------------------------------------------------------
/input/000114.txt:
--------------------------------------------------------------------------------
1 | 217.52	152.95
2 | 281.48	147.14
3 | 253.02	196.03
4 | 225.79	221.6
5 | 288.25	214.44
6 | 


--------------------------------------------------------------------------------
/input/000125.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000125.jpg


--------------------------------------------------------------------------------
/input/000125.txt:
--------------------------------------------------------------------------------
1 | 90.928	99.858
2 | 146.87	100.33
3 | 114.22	130.36
4 | 91.579	153.32
5 | 143.63	153.56
6 | 


--------------------------------------------------------------------------------
/input/000126.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/000126.jpg


--------------------------------------------------------------------------------
/input/000126.txt:
--------------------------------------------------------------------------------
1 | 307.56	166.54
2 | 387.06	159.62
3 | 335.52	222.26
4 | 319.3	248.85
5 | 397.71	239.14
6 | 


--------------------------------------------------------------------------------
/input/015259.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/015259.jpg


--------------------------------------------------------------------------------
/input/015259.txt:
--------------------------------------------------------------------------------
1 | 226.38	193.65
2 | 319.12	208.97
3 | 279.99	245.88
4 | 213.79	290.55
5 | 303.03	302.1
6 | 


--------------------------------------------------------------------------------
/input/015270.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/015270.jpg


--------------------------------------------------------------------------------
/input/015270.txt:
--------------------------------------------------------------------------------
1 | 208.4	410.08
2 | 364.41	388.68
3 | 291.6	503.57
4 | 244.82	572.86
5 | 383.18	553.49
6 | 


--------------------------------------------------------------------------------
/input/015309.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/015309.jpg


--------------------------------------------------------------------------------
/input/015309.txt:
--------------------------------------------------------------------------------
1 | 284.61	496.57
2 | 562.77	550.78
3 | 395.85	712.84
4 | 238.92	786.8
5 | 495.61	827.22
6 | 


--------------------------------------------------------------------------------
/input/015310.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/015310.jpg


--------------------------------------------------------------------------------
/input/015310.txt:
--------------------------------------------------------------------------------
1 | 153.95	153.43
2 | 211.13	161.54
3 | 197.28	190.26
4 | 150.82	215.98
5 | 202.32	223.12
6 | 


--------------------------------------------------------------------------------
/input/015316.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/015316.jpg


--------------------------------------------------------------------------------
/input/015316.txt:
--------------------------------------------------------------------------------
1 | 481.31	396.88
2 | 667.75	392.43
3 | 557.81	440.55
4 | 490.44	586.28
5 | 640.56	583.2
6 | 


--------------------------------------------------------------------------------
/input/015384.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/015384.jpg


--------------------------------------------------------------------------------
/input/015384.txt:
--------------------------------------------------------------------------------
1 | 191.79	143.97
2 | 271.86	151.23
3 | 191.25	210.29
4 | 187.82	257.12
5 | 258.82	261.96
6 | 


--------------------------------------------------------------------------------
/input/vd006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd006.png


--------------------------------------------------------------------------------
/input/vd006.txt:
--------------------------------------------------------------------------------
1 | 123.12	117.58
2 | 176.59	122.09
3 | 126.99	144.68
4 | 117.61	183.43
5 | 163.94	186.41
6 | 


--------------------------------------------------------------------------------
/input/vd025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd025.png


--------------------------------------------------------------------------------
/input/vd025.txt:
--------------------------------------------------------------------------------
1 | 180.12	116.13
2 | 263.18	98.397
3 | 230.48	154.72
4 | 201.37	199.01
5 | 279.18	182.56
6 | 


--------------------------------------------------------------------------------
/input/vd026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd026.png


--------------------------------------------------------------------------------
/input/vd026.txt:
--------------------------------------------------------------------------------
1 | 171.27	263.54
2 | 286.58	263.88
3 | 203.35	333.02
4 | 170.6	389.42
5 | 281.73	386.84
6 | 


--------------------------------------------------------------------------------
/input/vd034.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd034.png


--------------------------------------------------------------------------------
/input/vd034.txt:
--------------------------------------------------------------------------------
1 | 136.01	167.83
2 | 195.25	151.71
3 | 152.89	191.45
4 | 149.85	235.5
5 | 201.16	222.8
6 | 


--------------------------------------------------------------------------------
/input/vd051.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd051.png


--------------------------------------------------------------------------------
/input/vd051.txt:
--------------------------------------------------------------------------------
1 | 161.92	292.04
2 | 254.21	283.81
3 | 212.75	342.06
4 | 170.78	387.28
5 | 254.6	379.82
6 | 


--------------------------------------------------------------------------------
/input/vd070.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd070.png


--------------------------------------------------------------------------------
/input/vd070.txt:
--------------------------------------------------------------------------------
1 | 276.53	290.35
2 | 383.38	294.75
3 | 314.48	354.66
4 | 275.08	407.72
5 | 364.94	411.48
6 | 


--------------------------------------------------------------------------------
/input/vd092.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd092.png


--------------------------------------------------------------------------------
/input/vd092.txt:
--------------------------------------------------------------------------------
1 | 108.59	149.07
2 | 157.35	143.85
3 | 134.4	173.2
4 | 117.88	200.79
5 | 159.56	196.36
6 | 


--------------------------------------------------------------------------------
/input/vd102.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Deep3DFaceReconstruction/1935ea92af72a72c4cf7b8a677a822afd8554b51/input/vd102.png


--------------------------------------------------------------------------------
/input/vd102.txt:
--------------------------------------------------------------------------------
1 | 121.62	225.96
2 | 186.73	223.07
3 | 162.99	269.82
4 | 132.12	302.62
5 | 186.42	299.21
6 | 


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from scipy.io import loadmat,savemat
 3 | ###############################################################################################
 4 | # Define losses for training
 5 | ###############################################################################################
 6 | 
 7 | # photometric loss
 8 | # input_imgs and render_imgs are [batchsize,h,w,3] BGR images
 9 | # img_mask are [batchsize,h,w,1] attention masks
10 | def Photo_loss(input_imgs,render_imgs,img_mask):
11 | 
12 | 	input_imgs = tf.cast(input_imgs,tf.float32)
13 | 
14 | 	# img_mask = tf.squeeze(img_mask,3)
15 | 	img_mask = tf.stop_gradient(img_mask[:,:,:,0])
16 | 
17 | 	# photo loss with skin attention
18 | 	photo_loss = tf.sqrt(tf.reduce_sum(tf.square(input_imgs - render_imgs),axis = 3))*img_mask/255
19 | 	photo_loss = tf.reduce_sum(photo_loss) / tf.maximum(tf.reduce_sum(img_mask),1.0)
20 | 
21 | 	return photo_loss
22 | 
23 | # perceptual loss
24 | # id_feature and id_label are [batchsize, c] identity features for reconstruction images and input images
25 | def Perceptual_loss(id_feature,id_label):
26 | 	id_feature = tf.nn.l2_normalize(id_feature, dim = 1)
27 | 	id_label = tf.nn.l2_normalize(id_label, dim = 1)
28 | 	# cosine similarity
29 | 	sim = tf.reduce_sum(id_feature*id_label,1)
30 | 	loss = tf.reduce_sum(tf.maximum(0.0,1.0 - sim))/tf.cast(tf.shape(id_feature)[0],tf.float32)
31 | 
32 | 	return loss
33 | 
34 | # landmark loss
35 | # landmark_p and landmark_label are [batchsize, 68, 2] landmark projections for reconstruction images and input images
36 | def Landmark_loss(landmark_p,landmark_label):
37 | 
38 | 	# we set higher weights for landmarks around the mouth and nose regions
39 | 	landmark_weight = tf.concat([tf.ones([1,28]),20*tf.ones([1,3]),tf.ones([1,29]),20*tf.ones([1,8])],axis = 1)
40 | 	landmark_weight = tf.tile(landmark_weight,[tf.shape(landmark_p)[0],1])
41 | 
42 | 	landmark_loss = tf.reduce_sum(tf.reduce_sum(tf.square(landmark_p-landmark_label),2)*landmark_weight)/(68.0*tf.cast(tf.shape(landmark_p)[0],tf.float32))
43 | 
44 | 	return landmark_loss
45 | 
46 | # coefficient regularization to ensure plausible 3d faces
47 | def Regulation_loss(id_coeff,ex_coeff,tex_coeff,opt):
48 | 	w_ex = opt.w_ex
49 | 	w_tex = opt.w_tex
50 | 
51 | 	regulation_loss = tf.nn.l2_loss(id_coeff) + w_ex * tf.nn.l2_loss(ex_coeff) + w_tex * tf.nn.l2_loss(tex_coeff)
52 | 	regulation_loss = 2*regulation_loss/ tf.cast(tf.shape(id_coeff)[0],tf.float32)
53 | 
54 | 	return regulation_loss 
55 | 
56 | # albedo regularization to ensure an uniform skin albedo
57 | def Reflectance_loss(face_texture,facemodel):
58 | 	skin_mask = facemodel.skin_mask
59 | 	skin_mask = tf.reshape(skin_mask,[1,tf.shape(skin_mask)[0],1])
60 | 
61 | 	texture_mean = tf.reduce_sum(face_texture*skin_mask,1)/tf.reduce_sum(skin_mask)
62 | 	texture_mean = tf.expand_dims(texture_mean,1)
63 | 
64 | 	# minimize texture variance for pre-defined skin region  
65 | 	reflectance_loss = tf.reduce_sum(tf.square((face_texture - texture_mean)*skin_mask/255.0))/(tf.cast(tf.shape(face_texture)[0],tf.float32)*tf.reduce_sum(skin_mask))
66 | 
67 | 	return reflectance_loss
68 | 
69 | # gamma regularization to ensure a nearly-monochromatic light
70 | def Gamma_loss(gamma):
71 | 	gamma = tf.reshape(gamma,[-1,3,9])
72 | 	gamma_mean = tf.reduce_mean(gamma,1, keep_dims = True)
73 | 
74 | 	gamma_loss = tf.reduce_mean(tf.square(gamma - gamma_mean))
75 | 
76 | 	return gamma_loss


--------------------------------------------------------------------------------
/networks.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | from tensorflow.contrib.slim.nets import resnet_v1
 3 | slim = tf.contrib.slim
 4 | from inception_resnet_v1 import inception_resnet_v1
 5 | ###############################################################################################
 6 | #Define R-Net and Perceptual-Net for 3D face reconstruction
 7 | ###############################################################################################
 8 | 
 9 | def R_Net(inputs,is_training=True):
10 | 	#input: [Batchsize,H,W,C], 0-255, BGR image
11 | 	inputs = tf.cast(inputs,tf.float32)
12 | 	# standard ResNet50 backbone (without the last classfication FC layer)
13 | 	with slim.arg_scope(resnet_v1.resnet_arg_scope()):
14 | 		net,end_points = resnet_v1.resnet_v1_50(inputs,is_training = is_training ,reuse = tf.AUTO_REUSE)
15 | 
16 | 	# Modified FC layer with 257 channels for reconstruction coefficients
17 | 	net_id = slim.conv2d(net, 80, [1, 1],
18 | 		activation_fn=None,
19 | 		normalizer_fn=None,
20 | 		weights_initializer = tf.zeros_initializer(),
21 | 		scope='fc-id')
22 | 	net_ex = slim.conv2d(net, 64, [1, 1],
23 | 		activation_fn=None,
24 | 		normalizer_fn=None,
25 | 		weights_initializer = tf.zeros_initializer(),
26 | 		scope='fc-ex')
27 | 	net_tex = slim.conv2d(net, 80, [1, 1],
28 | 		activation_fn=None,
29 | 		normalizer_fn=None,
30 | 		weights_initializer = tf.zeros_initializer(),
31 | 		scope='fc-tex')
32 | 	net_angles = slim.conv2d(net, 3, [1, 1],
33 | 		activation_fn=None,
34 | 		normalizer_fn=None,
35 | 		weights_initializer = tf.zeros_initializer(),
36 | 		scope='fc-angles')
37 | 	net_gamma = slim.conv2d(net, 27, [1, 1],
38 | 		activation_fn=None,
39 | 		normalizer_fn=None,
40 | 		weights_initializer = tf.zeros_initializer(),
41 | 		scope='fc-gamma')
42 | 	net_t_xy = slim.conv2d(net, 2, [1, 1],
43 | 		activation_fn=None,
44 | 		normalizer_fn=None,
45 | 		weights_initializer = tf.zeros_initializer(),
46 | 		scope='fc-XY')
47 | 	net_t_z = slim.conv2d(net, 1, [1, 1],
48 | 		activation_fn=None,
49 | 		normalizer_fn=None,
50 | 		weights_initializer = tf.zeros_initializer(),
51 | 		scope='fc-Z')
52 | 
53 | 	net_id = tf.squeeze(net_id, [1,2], name='fc-id/squeezed')
54 | 	net_ex = tf.squeeze(net_ex, [1,2], name='fc-ex/squeezed')
55 | 	net_tex = tf.squeeze(net_tex, [1,2],name='fc-tex/squeezed')
56 | 	net_angles = tf.squeeze(net_angles,[1,2], name='fc-angles/squeezed')
57 | 	net_gamma = tf.squeeze(net_gamma,[1,2], name='fc-gamma/squeezed')
58 | 	net_t_xy = tf.squeeze(net_t_xy,[1,2], name='fc-XY/squeezed')
59 | 	net_t_z = tf.squeeze(net_t_z,[1,2], name='fc-Z/squeezed')
60 | 
61 | 	net_ = tf.concat([net_id,net_ex,net_tex,net_angles,net_gamma,net_t_xy,net_t_z], axis = 1)
62 | 
63 | 	return net_
64 | 
65 | 
66 | def Perceptual_Net(input_imgs):
67 |     #input_imgs: [Batchsize,H,W,C], 0-255, BGR image
68 | 
69 |     input_imgs = tf.reshape(input_imgs,[-1,224,224,3])
70 |     input_imgs = tf.cast(input_imgs,tf.float32)
71 |     input_imgs = tf.clip_by_value(input_imgs,0,255)
72 |     input_imgs = (input_imgs - 127.5)/128.0
73 | 
74 |     #standard face-net backbone
75 |     batch_norm_params = {
76 |     'decay': 0.995,
77 |     'epsilon': 0.001,
78 |     'updates_collections': None}
79 | 
80 |     with slim.arg_scope([slim.conv2d, slim.fully_connected],weights_initializer=slim.initializers.xavier_initializer(), 
81 |         weights_regularizer=slim.l2_regularizer(0.0),
82 |         normalizer_fn=slim.batch_norm,
83 |         normalizer_params=batch_norm_params):
84 |         feature_128,_ = inception_resnet_v1(input_imgs, bottleneck_layer_size=128, is_training=False, reuse=tf.AUTO_REUSE)
85 | 
86 |     # output the last FC layer feature(before classification) as identity feature
87 |     return feature_128


--------------------------------------------------------------------------------
/options.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import tensorflow as tf
 3 | import os
 4 | 
 5 | # training options
 6 | 
 7 | class Option():
 8 | 	def __init__(self,model_name=None,is_train=True):
 9 | 		#--------------------------------------------------------------------------------------
10 | 		self.is_train = is_train
11 | 		self.model_dir = 'result'
12 | 		if model_name is None:
13 | 			self.model_name = 'model_test'
14 | 		else:
15 | 			self.model_name = model_name
16 | 		self.data_path = ['./processed_data']
17 | 		self.val_data_path = ['./processed_data']
18 | 
19 | 		self.model_save_path = os.path.join(self.model_dir,self.model_name)
20 | 		if self.is_train:
21 | 			if not os.path.exists(self.model_save_path):
22 | 				os.makedirs(self.model_save_path)
23 | 
24 | 		self.summary_dir = os.path.join(self.model_save_path,'summary')
25 | 
26 | 		self.train_summary_path = os.path.join(self.summary_dir, 'train')
27 | 		self.val_summary_path = os.path.join(self.summary_dir, 'val')
28 | 		#---------------------------------------------------------------------------------------
29 | 		# visible gpu settings
30 | 		self.config = tf.ConfigProto()
31 | 		self.config.gpu_options.visible_device_list = '0'
32 | 		self.use_pb = True
33 | 		#---------------------------------------------------------------------------------------
34 | 		# training parameters
35 | 
36 | 		self.w_photo = 1.92
37 | 		self.w_lm = 1.6e-3
38 | 		self.w_id = 0.2
39 | 
40 | 		self.w_reg = 3.0e-4
41 | 		self.w_ref = 5.0
42 | 
43 | 		self.w_gamma = 10.0
44 | 
45 | 		self.w_ex = 0.8
46 | 		self.w_tex = 1.7e-2
47 | 
48 | 		self.batch_size = 16
49 | 		self.boundaries = [100000]
50 | 		lr = [1e-4,2e-5]
51 | 		self.global_step = tf.Variable(0,name='global_step',trainable = False)
52 | 		self.lr = tf.train.piecewise_constant(self.global_step,self.boundaries,lr)
53 | 		self.augment = True
54 | 		self.train_maxiter = 200000
55 | 		self.train_summary_iter = 50
56 | 		self.image_summary_iter = 200
57 | 		self.val_summary_iter = 1000
58 | 		self.save_iter = 10000
59 | 		#---------------------------------------------------------------------------------------
60 | 		# initial weights for resnet and facenet
61 | 		self.R_net_weights = os.path.join('./weights/resnet','resnet_v1_50.ckpt')
62 | 		self.Perceptual_net_weights = './weights/id_net/model-20170512-110547.ckpt-250000'
63 | 		self.pretrain_weights = os.path.join('train/model_test','iter_100000.ckpt')
64 | 


--------------------------------------------------------------------------------
/preprocess_img.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | from scipy.io import loadmat,savemat
  3 | from PIL import Image
  4 | from skin import skinmask
  5 | import argparse
  6 | from utils import *
  7 | import os
  8 | import glob
  9 | import tensorflow as tf
 10 | 
 11 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 12 | 
 13 | #calculating least square problem
 14 | def POS(xp,x):
 15 | 	npts = xp.shape[1]
 16 | 
 17 | 	A = np.zeros([2*npts,8])
 18 | 
 19 | 	A[0:2*npts-1:2,0:3] = x.transpose()
 20 | 	A[0:2*npts-1:2,3] = 1
 21 | 
 22 | 	A[1:2*npts:2,4:7] = x.transpose()
 23 | 	A[1:2*npts:2,7] = 1;
 24 | 
 25 | 	b = np.reshape(xp.transpose(),[2*npts,1])
 26 | 
 27 | 	k,_,_,_ = np.linalg.lstsq(A,b)
 28 | 
 29 | 	R1 = k[0:3]
 30 | 	R2 = k[4:7]
 31 | 	sTx = k[3]
 32 | 	sTy = k[7]
 33 | 	s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2
 34 | 	t = np.stack([sTx,sTy],axis = 0)
 35 | 
 36 | 	return t,s
 37 | 
 38 | # resize and crop images
 39 | def resize_n_crop_img(img,lm,t,s,target_size = 224.):
 40 | 	w0,h0 = img.size
 41 | 	w = (w0/s*102).astype(np.int32)
 42 | 	h = (h0/s*102).astype(np.int32)
 43 | 	img = img.resize((w,h),resample = Image.BICUBIC)
 44 | 
 45 | 	left = (w/2 - target_size/2 + float((t[0] - w0/2)*102/s)).astype(np.int32)
 46 | 	right = left + target_size
 47 | 	up = (h/2 - target_size/2 + float((h0/2 - t[1])*102/s)).astype(np.int32)
 48 | 	below = up + target_size
 49 | 
 50 | 	img = img.crop((left,up,right,below))
 51 | 	img = np.array(img)
 52 | 	img = img[:,:,::-1] #RGBtoBGR
 53 | 	img = np.expand_dims(img,0)
 54 | 	lm = np.stack([lm[:,0] - t[0] + w0/2,lm[:,1] - t[1] + h0/2],axis = 1)/s*102
 55 | 	lm = lm - np.reshape(np.array([(w/2 - target_size/2),(h/2-target_size/2)]),[1,2])
 56 | 
 57 | 	return img,lm
 58 | 
 59 | 
 60 | # resize and crop input images before sending to the R-Net
 61 | def align_img(img,lm,lm3D):
 62 | 
 63 | 	w0,h0 = img.size
 64 | 
 65 | 	# change from image plane coordinates to 3D sapce coordinates(X-Y plane)
 66 | 	lm = np.stack([lm[:,0],h0 - 1 - lm[:,1]], axis = 1)
 67 | 
 68 | 	# calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face
 69 | 	t,s = POS(lm.transpose(),lm3D.transpose())
 70 | 
 71 | 	# processing the image
 72 | 	img_new,lm_new = resize_n_crop_img(img,lm,t,s)
 73 | 	lm_new = np.stack([lm_new[:,0],223 - lm_new[:,1]], axis = 1)
 74 | 	trans_params = np.array([w0,h0,102.0/s,t[0],t[1]])
 75 | 
 76 | 	return img_new,lm_new,trans_params
 77 | 
 78 | # detect 68 face landmarks for aligned images
 79 | def get_68landmark(img,detector,sess):
 80 | 
 81 | 	input_img = detector.get_tensor_by_name('input_imgs:0')
 82 | 	lm = detector.get_tensor_by_name('landmark:0')
 83 | 
 84 | 	landmark = sess.run(lm,feed_dict={input_img:img})
 85 | 	landmark = np.reshape(landmark,[68,2])
 86 | 	landmark = np.stack([landmark[:,1],223-landmark[:,0]],axis=1)
 87 | 
 88 | 	return landmark
 89 | 
 90 | # get skin attention mask for aligned images
 91 | def get_skinmask(img):
 92 | 
 93 | 	img = np.squeeze(img,0)
 94 | 	skin_img = skinmask(img)
 95 | 	return skin_img
 96 | 
 97 | def parse_args():
 98 |     desc = "Data preprocessing for Deep3DRecon."
 99 |     parser = argparse.ArgumentParser(description=desc)
100 | 
101 |     parser.add_argument('--img_path', type=str, default='./input', help='original images folder')
102 |     parser.add_argument('--save_path', type=str, default='./processed_data', help='custom path to save proccessed images and labels')
103 | 
104 | 
105 |     return parser.parse_args()
106 | 
107 | # training data pre-processing
108 | def preprocessing():
109 | 
110 | 	args = parse_args()
111 | 	image_path = args.img_path
112 | 	save_path = args.save_path
113 | 	if not os.path.isdir(save_path):
114 | 		os.makedirs(save_path)
115 | 	if not os.path.isdir(os.path.join(save_path,'lm')):
116 | 		os.makedirs(os.path.join(save_path,'lm'))
117 | 	if not os.path.isdir(os.path.join(save_path,'lm_bin')):
118 | 		os.makedirs(os.path.join(save_path,'lm_bin'))
119 | 	if not os.path.isdir(os.path.join(save_path,'mask')):
120 | 		os.makedirs(os.path.join(save_path,'mask'))
121 | 
122 | 	img_list = sorted(glob.glob(image_path + '/' + '*.png'))
123 | 	img_list += sorted(glob.glob(image_path + '/' + '*.jpg'))
124 | 
125 | 	lm3D = load_lm3d()
126 | 
127 | 	with tf.Graph().as_default() as graph, tf.device('/gpu:0'):
128 | 		lm_detector = load_graph(os.path.join('network','landmark68_detector.pb'))
129 | 		tf.import_graph_def(lm_detector,name='')
130 | 		sess = tf.InteractiveSession()
131 | 
132 | 		for file in img_list:
133 | 
134 | 			print(file)
135 | 			name = file.split('/')[-1].replace('.png','').replace('.jpg','')
136 | 			img,lm5p = load_img(file,file.replace('png','txt').replace('jpg','txt'))
137 | 			img_align,_,_ = align_img(img,lm5p,lm3D)  # [1,224,224,3] BGR image
138 | 
139 | 			lm68p = get_68landmark(img_align,graph,sess)
140 | 			lm68p = lm68p.astype(np.float64)
141 | 			skin_mask = get_skinmask(img_align)
142 | 
143 | 			Image.fromarray(img_align.squeeze(0)[:,:,::-1].astype(np.uint8),'RGB').save(os.path.join(save_path,name+'.png'))
144 | 			Image.fromarray(skin_mask.astype(np.uint8)).save(os.path.join(save_path,'mask',name+'.png'))
145 | 
146 | 			np.savetxt(os.path.join(save_path,'lm',name+'.txt'),lm68p)
147 | 			lm_bin = np.reshape(lm68p,[-1])
148 | 			lm_bin.tofile(os.path.join(save_path,'lm_bin',name+'.bin'))	
149 | 
150 | if __name__ == '__main__':
151 | 	preprocessing()


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | ## Accurate 3D Face Reconstruction with Weakly-Supervised Learning: From Single Image to Image Set ##
  2 | 
  3 | <p align="center"> 
  4 | <img src="/images/example.gif">
  5 | </p>
  6 | 
  7 | ### **_\*\*\*07/20/2021: A [PyTorch implementation](https://github.com/sicxu/Deep3DFaceRecon_pytorch) which has much better performance and is much easier to use is available now. This repo will not be maintained in future. \*\*\*_**
  8 | 
  9 | 
 10 | This is a tensorflow implementation of the following paper:
 11 | 
 12 | Y. Deng, J. Yang, S. Xu, D. Chen, Y. Jia, and X. Tong, [Accurate 3D Face Reconstruction with Weakly-Supervised Learning: From Single Image to Image Set](https://arxiv.org/abs/1903.08527), IEEE Computer Vision and Pattern Recognition Workshop (CVPRW) on Analysis and Modeling of Faces and Gestures (AMFG), 2019. (**_Best Paper Award!_**)
 13 | 
 14 | The method enforces a hybrid-level weakly-supervised training for CNN-based 3D face reconstruction. It is fast, accurate, and robust to pose and occlussions. It achieves state-of-the-art performance on multiple datasets such as FaceWarehouse, MICC Florence and BU-3DFE.
 15 | 
 16 | 
 17 | 
 18 | 
 19 | ## Features
 20 | 
 21 | ### ● Accurate shapes
 22 | The method reconstructs faces with high accuracy. Quantitative evaluations (shape errors in mm) on several benchmarks show its state-of-the-art performance:
 23 | 
 24 | 
 25 | |Method|FaceWareHouse|Florence|BU3DFE|
 26 | |:---:|:---:|:---:|:---:|
 27 | |[Tewari et al. 17](https://arxiv.org/abs/1703.10580)</center>|2.19±0.54|-|-|
 28 | |[Tewari et al. 18](https://arxiv.org/abs/1712.02859)|1.84±0.38|-|-|
 29 | |[Genova et al. 18](https://arxiv.org/abs/1806.06098)|-|1.77±0.53|-|
 30 | |[Sela et al. 17](https://arxiv.org/abs/1703.10131)|-|-|2.91±0.60|
 31 | |[PRN 18](https://arxiv.org/abs/1803.07835)|-|-|1.86±0.47|
 32 | |Ours|**1.81±0.50**|**1.67±0.50**|**1.40±0.31**|
 33 | 
 34 | (Please refer to our paper for more details about these results)
 35 | 
 36 | ### ● High fidelity textures
 37 | The method produces high fidelity face textures meanwhile preserves identity information of input images. Scene illumination is also disentangled to generate a pure albedo.
 38 | <p align="center"> 
 39 | <img src="/images/albedo.png">
 40 | </p>
 41 | 
 42 | ### ● Robust
 43 | The method can provide reasonable results under extreme conditions such as large pose and occlusions.
 44 | <p align="center"> 
 45 | <img src="/images/extreme.png">
 46 | </p>
 47 | 
 48 | ### ● Aligned with images
 49 | Our method aligns reconstruction faces with input images. It provides face pose estimation and 68 facial landmarks which are useful for other tasks. We conduct an experiment on AFLW_2000 dataset (NME) to evaluate the performance, as shown in the table below:
 50 | <p align="center"> 
 51 | <img src="/images/alignment.png">
 52 | </p>
 53 | 
 54 | |Method|[0°,30°]|[30°,60°]|[60°,90°]|Overall|
 55 | |:---:|:---:|:---:|:---:|:---:|
 56 | |[3DDFA 16](https://arxiv.org/abs/1511.07212)</center>|3.78|4.54|7.93|5.42|
 57 | |[3DDFA+SDM 16](https://arxiv.org/abs/1511.07212)|3.43|4.24|7.17|4.94|
 58 | |[Bulat et al. 17](https://arxiv.org/abs/1703.00862)|**2.47**|**3.01**|**4.31**|**3.26**|
 59 | |[PRN 18](https://arxiv.org/abs/1803.07835)|2.75|3.51|4.61|3.62|
 60 | |Ours|2.56|3.11|4.45|3.37|
 61 | 
 62 | 
 63 | ### ● Easy and Fast
 64 | Faces are represented with Basel Face Model 2009, which is easy for further manipulations (e.g expression transfer). ResNet-50 is used as backbone network to achieve over 50 fps (on GTX 1080) for reconstructions.
 65 | 
 66 | 
 67 | ## Getting Started
 68 | ### Testing Requirements ###
 69 | 
 70 | - Reconstructions can be done on both Windows and Linux. However, we suggest running on Linux because the rendering process is only supported on Linux.
 71 | - Python 3.6 (numpy, scipy, pillow, argparse).
 72 | - Tensorflow 1.12.
 73 | - [Basel Face Model 2009 (BFM09)](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-0&id=basel_face_model). 
 74 | - [Expression Basis (transferred from Facewarehouse by Guo et al.)](https://github.com/Juyong/3DFace). The original BFM09 model does not handle expression variations so extra expression basis are needed. 
 75 | - [tf mesh renderer](https://github.com/google/tf_mesh_renderer/tree/ba27ea1798f6ee8d03ddbc52f42ab4241f9328bb).  We use the library to render reconstruction images. **Note that the rendering tool can only be used on Linux.**
 76 | 
 77 | ### Installation ###
 78 | #### 1. Clone the repository
 79 | ```
 80 | git clone https://github.com/Microsoft/Deep3DFaceReconstruction --recursive
 81 | cd Deep3DFaceReconstruction
 82 | ```
 83 | 
 84 | #### 2. Set up the python environment
 85 | If you use anaconda, run the following:
 86 | ```
 87 | conda create -n deep3d python=3.6
 88 | source activate deep3d
 89 | conda install tensorflow-gpu==1.12.0 scipy
 90 | pip install pillow argparse
 91 | ```
 92 | 
 93 | Alternatively, you can install tensorflow via pip install (In this way, you need to link /usr/local/cuda to cuda-9.0):
 94 | ```
 95 | pip install tensorflow-gpu==1.12.0
 96 | ```
 97 | 
 98 | #### 3. Compile tf_mesh_renderer
 99 | 
100 | If you install tensorflow using pip,  we provide a [pre-compiled binary file (rasterize_triangles_kernel.so)](https://drive.google.com/file/d/1VUtJPdg0UiJkKWxkACs8ZTf5L7Y4P9Wj/view?usp=sharing) of the library. **Note that the pre-compiled file can only be run with tensorflow 1.12.**
101 | 
102 | If you install tensorflow using conda, you have to compile tf_mesh_renderer from sources. Compile tf_mesh_renderer with Bazel. **Set -D_GLIBCXX_USE_CXX11_ABI=1 in ./mesh_renderer/kernels/BUILD before the compilation**:
103 | ```
104 | cd tf_mesh_renderer
105 | git checkout ba27ea1798
106 | git checkout master WORKSPACE
107 | bazel test ...
108 | cd ..
109 | ```
110 | If the library is compiled correctly, there should be a file named "rasterize_triangles_kernel.so" in ./tf_mesh_renderer/bazel-bin/mesh_renderer/kernels. 
111 | 
112 | After compilation, copy corresponding files to ./renderer subfolder:
113 | ```
114 | cd renderer
115 | cp ./tf_mesh_renderer/mesh_renderer/{camera_utils.py,mesh_renderer.py,rasterize_triangles.py} ./renderer/
116 | cp ./tf_mesh_renderer/bazel-bin/mesh_renderer/kernels/rasterize_triangles_kernel.so ./renderer/
117 | ```
118 | If you download our pre-compiled binary file, put it into ./renderer subfolder as well.
119 | 
120 | Replace the library path in Line 26 in ./renderer/rasterize_triangles.py with "./renderer/rasterize_triangles_kernel.so".
121 | 
122 | Replace "xrange" function in Line 109 in ./renderer/rasterize_triangles.py with "range" function for compatibility with python3.
123 | 
124 | 
125 | ### Testing with pre-trained network ###
126 | 
127 | 1. Download the Basel Face Model. Due to the license agreement of Basel Face Model, you have to download the BFM09 model after submitting an application on its [home page](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-2&id=downloads). After getting the access to BFM data, download "01_MorphableModel.mat" and put it into ./BFM subfolder.
128 | 
129 | 2. Download the Expression Basis provided by [Guo et al.](https://github.com/Juyong/3DFace) You can find a link named "CoarseData" in the first row of Introduction part in their repository. Download and unzip the Coarse_Dataset.zip. Put "Exp_Pca.bin" into ./BFM subfolder. The expression basis are constructed using [Facewarehouse](http://kunzhou.net/zjugaps/facewarehouse/) data and transferred to BFM topology.
130 | 
131 | 3. Download the pre-trained [reconstruction network](https://drive.google.com/file/d/176LCdUDxAj7T2awQ5knPMPawq5Q2RUWM/view?usp=sharing), unzip it and put "FaceReconModel.pb" into ./network subfolder.
132 | 
133 | 4. Run the demo code.
134 | 
135 | ```
136 | python demo.py
137 | ```
138 | 
139 | 5. ./input subfolder contains several test images and ./output subfolder stores their reconstruction results. For each input test image, two output files can be obtained after running the demo code:
140 | 	- "xxx.mat" : 
141 | 		- cropped_img: an RGB image after alignment, which is the input to the R-Net
142 | 		- recon_img: an RGBA reconstruction image aligned with the input image (only on Linux).
143 | 		- coeff: output coefficients of R-Net.
144 | 		- face_shape: vertex positions of 3D face in the world coordinate.
145 | 		- face_texture: vertex texture of 3D face, which excludes lighting effect.
146 | 		- face_color: vertex color of 3D face, which takes lighting into consideration.
147 | 		- lm\_68p: 68 2D facial landmarks derived from the reconstructed 3D face. The landmarks are aligned with cropped_img.
148 | 		- lm\_5p: 5 detected landmarks aligned with cropped_img. 
149 | 	- "xxx_mesh.obj" : 3D face mesh in the world coordinate (best viewed in MeshLab).
150 | 
151 | ### Training requirements ###
152 | 
153 | - Training is only supported on Linux. To train new model from scratch, more requirements are needed on top of the requirements listed in the testing stage.
154 | - [Facenet](https://github.com/davidsandberg/facenet) provided by 
155 | Sandberg et al. In our paper, we use a network to exrtact perceptual face features. This network model cannot be publicly released. As an alternative, we recommend using the Facenet from Sandberg et al. This repo uses the version [20170512-110547](https://github.com/davidsandberg/facenet/blob/529c3b0b5fc8da4e0f48d2818906120f2e5687e6/README.md) trained on MS-Celeb-1M. Training process has been tested with this model to ensure similar results.
156 | - [Resnet50-v1](https://github.com/tensorflow/models/blob/master/research/slim/README.md) pre-trained on ImageNet from Tensorflow Slim. We use the version resnet_v1_50_2016_08_28.tar.gz as an initialization of the face reconstruction network.
157 | - [68-facial-landmark detector](https://drive.google.com/file/d/1KYFeTb963jg0F47sTiwqDdhBIvRlUkPa/view?usp=sharing). We use 68 facial landmarks for loss calculation during training. To make the training process reproducible, we provide a lightweight detector that produce comparable results to [the method of Bulat et al.](https://github.com/1adrianb/2D-and-3D-face-alignment). The detector is trained on [300WLP](http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm), [LFW](http://vis-www.cs.umass.edu/lfw/), and [LS3D-W](https://www.adrianbulat.com/face-alignment).
158 | 
159 | ### Training preparation ###
160 | 
161 | 1. Download the [pre-trained weights](https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk/edit) of Facenet provided by Sandberg et al., unzip it and put all files in ./weights/id_net.
162 | 2. Download the [pre-trained weights](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz) of Resnet_v1_50 provided by Tensorflow Slim, unzip it and put resnet_v1_50.ckpt in ./weights/resnet.
163 | 3. Download the [68 landmark detector](https://drive.google.com/file/d/1KYFeTb963jg0F47sTiwqDdhBIvRlUkPa/view?usp=sharing), put the file in ./network.
164 | 
165 | ### Data pre-processing ###
166 | 1. To train our model with custom images，5 facial landmarks of each image are needed in advance for an image pre-alignment process. We recommend using [dlib](http://dlib.net/) or [MTCNN](https://github.com/ipazc/mtcnn). Use these public face detectors to get 5 landmarks, and save all images and corresponding landmarks in <raw_img_path>. Note that an image and its detected landmark file should have same name.
167 | 2. Align images and generate 68 landmarks as well as skin masks for training: 
168 | 
169 | ```
170 | # Run following command for data pre-processing. By default, the code uses example images in ./input and saves the processed data in ./processed_data
171 | python preprocess_img.py
172 | 
173 | # Alternatively, you can set your custom image path and save path
174 | python preprocess_img.py --img_path <raw_img_path> --save_path <save_path_for_processed_data>
175 | 
176 | ```
177 | 
178 | ### Training networks ###
179 | 1. Train the reconstruction network with the following command:
180 | ```
181 | # By default, the code uses the data in ./processed_data as training data as well as validation data
182 | python train.py
183 | 
184 | # Alternatively, you can set your custom data path
185 | python train.py --data_path <custom_data_path> --val_data_path <custom_val_data_path> --model_name <custom_model_name>
186 | 
187 | ```
188 | 2. Monitoring the training process via tensorboard:
189 | ```
190 | tensorboard --logdir=result/<custom_model_name> --port=10001
191 | ```
192 | 3. Evaluating trained model:
193 | ```
194 | python demo.py --use_pb 0 --pretrain_weights <custom_weights>.ckpt
195 | ```
196 | Training a model with a batchsize of 16 and 200K iterations takes 20 hours on a single Tesla M40 GPU.
197 | 
198 | ## Latest Update
199 | 
200 | ### 2020.4 ###
201 | The face reconstruction process is totally transferred to tensorflow version while the old version uses numpy. We have also integrated the rendering process into the framework. As a result, reconstruction images aligned with the input can be easily obtained without extra efforts. The whole process is tensorflow-based which allows gradient back-propagation for other tasks.
202 | ### 2020.6 ###
203 | Upload a [pre-trained model](https://drive.google.com/file/d/1fPsvLKghlCK8rknb9GPiKwIq9HIqWWwV/view?usp=sharing) with white light assumption as described in the paper.
204 | 
205 | ### 2020.12 ###
206 | Upload the training code for single image face reconstruction.
207 | 
208 | ## Note
209 | 
210 | 1. An image pre-alignment with 5 facial landmarks is necessary before reconstruction. In our image pre-processing stage, we solve a least square problem between 5 facial landmarks on the image and 5 facial landmarks of the BFM09 average 3D face to cancel out face scales and misalignment. To get 5 facial landmarks, you can choose any open source face detector that returns them, such as [dlib](http://dlib.net/) or [MTCNN](https://github.com/ipazc/mtcnn). However, these traditional 2D detectors may return wrong landmarks under large poses which could influence the alignment result. Therefore, we recommend using [the method of Bulat et al.](https://github.com/1adrianb/2D-and-3D-face-alignment) to get facial landmarks (3D definition) with semantic consistency for large pose images. Note that our model is trained without position augmentation so that a bad alignment may lead to inaccurate reconstruction results. We put some examples in the ./input subfolder for reference.
211 | 
212 | 
213 | 2. We assume a [pinhole camera model](https://en.wikipedia.org/wiki/Pinhole_camera_model) for face projection. The camera is positioned at (0,0,10) (dm) in the world coordinate and points to the negative z axis. We set the camera fov to 12.6 empirically and fix it during training and inference time. Faces in canonical views are at the origin of the world coordinate and facing the positive z axis. Rotations and translations predicted by the R-Net are all with respect to the world coordinate.
214 | <p align="center"> 
215 | <img src="/images/camera.png" width="300">
216 | </p>
217 | 
218 | 3. The current model is trained using 3-channel (r,g,b) scene illumination instead of white light described in the paper. As a result, the gamma coefficient that controls lighting has a dimension of 27 instead of 9. 
219 | 
220 | 4. We excluded ear and neck region of original BFM09 to allow the network concentrate on the face region. To see which vertices in the original model are preserved, check select_vertex_id.mat in the ./BFM subfolder. Note that index starts from 1.
221 | 
222 | 5. Our model may give inferior results for images with severe perspetive distortions (e.g., some selfies). In addition, we cannot well handle faces with eyes closed due to the lack of these kind of images in the training data.
223 |   
224 | 5. If you have any further questions, please contact Yu Deng (dengyu2008@hotmail.com) and Jiaolong Yang (jiaoyan@microsoft.com).
225 | 
226 | 
227 | ## Citation
228 | 
229 | Please cite the following paper if this model helps your research:
230 | 
231 | 	@inproceedings{deng2019accurate,
232 | 	    title={Accurate 3D Face Reconstruction with Weakly-Supervised Learning: From Single Image to Image Set},
233 | 	    author={Yu Deng and Jiaolong Yang and Sicheng Xu and Dong Chen and Yunde Jia and Xin Tong},
234 | 	    booktitle={IEEE Computer Vision and Pattern Recognition Workshops},
235 | 	    year={2019}
236 | 	}
237 | ##
238 | The face images on this page are from the public [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) dataset released by MMLab, CUHK.
239 | 


--------------------------------------------------------------------------------
/reconstruction_model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import face_decoder
 3 | import networks
 4 | import losses
 5 | from utils import *
 6 | ###############################################################################################
 7 | # model for single image face reconstruction
 8 | ###############################################################################################
 9 | class Reconstruction_model():
10 | 	# initialization
11 | 	def __init__(self,opt):
12 | 		self.Face3D = face_decoder.Face3D() #analytic 3D face object
13 | 		self.opt = opt # training options
14 | 		self.Optimizer = tf.train.AdamOptimizer(learning_rate = opt.lr) # optimizer
15 | 
16 | 	# load input data from queue
17 | 	def set_input(self,input_iterator):
18 | 		self.imgs,self.lm_labels,self.attention_masks = input_iterator.get_next()
19 | 
20 | 	# forward process of the model
21 | 	def forward(self,is_train = True):
22 | 
23 | 		with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
24 | 			self.coeff = networks.R_Net(self.imgs,is_training=is_train)
25 | 
26 | 			self.Face3D.Reconstruction_Block(self.coeff,self.opt)
27 | 
28 | 			self.id_labels = networks.Perceptual_Net(self.imgs)
29 | 			self.id_features = networks.Perceptual_Net(self.Face3D.render_imgs)
30 | 
31 | 			self.photo_loss = losses.Photo_loss(self.imgs,self.Face3D.render_imgs,self.Face3D.img_mask_crop*self.attention_masks)
32 | 			self.landmark_loss = losses.Landmark_loss(self.Face3D.landmark_p,self.lm_labels)
33 | 			self.perceptual_loss = losses.Perceptual_loss(self.id_features,self.id_labels)
34 | 
35 | 			self.reg_loss = losses.Regulation_loss(self.Face3D.id_coeff,self.Face3D.ex_coeff,self.Face3D.tex_coeff,self.opt)
36 | 			self.reflect_loss = losses.Reflectance_loss(self.Face3D.face_texture,self.Face3D.facemodel)
37 | 			self.gamma_loss = losses.Gamma_loss(self.Face3D.gamma)
38 | 
39 | 
40 | 			self.loss = self.opt.w_photo*self.photo_loss + self.opt.w_lm*self.landmark_loss + self.opt.w_id*self.perceptual_loss\
41 | 			+ self.opt.w_reg*self.reg_loss + self.opt.w_ref*self.reflect_loss + self.opt.w_gamma*self.gamma_loss
42 | 
43 | 	# backward process
44 | 	def backward(self,is_train = True):
45 | 		if is_train:
46 | 			update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
47 | 			var_list = tf.trainable_variables()
48 | 			update_var_list = [v for v in var_list if 'resnet_v1_50' in v.name or 'fc-' in v.name]
49 | 			grads = tf.gradients(self.loss,update_var_list)
50 | 			# get train_op with update_ops to ensure updating for bn parameters
51 | 			with tf.control_dependencies(update_ops):
52 | 				self.train_op = self.Optimizer.apply_gradients(zip(grads,update_var_list),global_step = self.opt.global_step)
53 | 
54 | 		# if not training stage, avoid updating variables 
55 | 		else:
56 | 			pass
57 | 
58 | 	# forward and backward
59 | 	def step(self, is_train = True):
60 | 		with tf.variable_scope(tf.get_variable_scope()) as scope:
61 | 			self.forward(is_train = is_train)
62 | 		self.backward(is_train = is_train)
63 | 
64 | 	# statistics summarization
65 | 	def summarize(self):
66 | 
67 | 		# scalar and histogram stats
68 | 		stat = [
69 | 		tf.summary.scalar('reflect_error',self.reflect_loss),
70 | 		tf.summary.scalar('gamma_error',self.gamma_loss),
71 | 		tf.summary.scalar('id_sim_error',self.perceptual_loss),
72 | 		tf.summary.scalar('lm_error',tf.sqrt(self.landmark_loss)),
73 | 		tf.summary.scalar('photo_error',self.photo_loss),
74 | 		tf.summary.scalar('train_error',self.loss),
75 | 		tf.summary.histogram('id_coeff',self.Face3D.id_coeff),
76 | 		tf.summary.histogram('ex_coeff',self.Face3D.ex_coeff),
77 | 		tf.summary.histogram('tex_coeff',self.Face3D.tex_coeff)]
78 | 
79 | 		self.summary_stat = tf.summary.merge(stat)
80 | 		# combine face region of reconstruction images with input images
81 | 		render_imgs = self.Face3D.render_imgs[:,:,:,::-1]*self.Face3D.img_mask + tf.cast(self.imgs[:,:,:,::-1],tf.float32)*(1-self.Face3D.img_mask)
82 | 		render_imgs = tf.clip_by_value(render_imgs,0,255)
83 | 		render_imgs = tf.cast(render_imgs,tf.uint8)
84 | 		# image stats
85 | 		img_stat = [tf.summary.image('imgs',tf.concat([tf.cast(self.imgs[:,:,:,::-1],tf.uint8),render_imgs],axis = 2), max_outputs = 8)]
86 | 		self.summary_img = tf.summary.merge(img_stat) 


--------------------------------------------------------------------------------
/renderer/__init__.py:
--------------------------------------------------------------------------------
1 | import sys,os
2 | sys.path.append(os.path.join(os.path.dirname(__file__),'..','renderer'))


--------------------------------------------------------------------------------
/skin.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | 
  4 | class GMM:
  5 |     def __init__(self, dim, num, w, mu, cov, cov_det, cov_inv):
  6 |         self.dim = dim # feature dimension
  7 |         self.num = num # number of Gaussian components
  8 |         self.w = w # weights of Gaussian components (a list of scalars)
  9 |         self.mu= mu # mean of Gaussian components (a list of 1xdim vectors)
 10 |         self.cov = cov # covariance matrix of Gaussian components (a list of dimxdim matrices)
 11 |         self.cov_det = cov_det # pre-computed determinet of covariance matrices (a list of scalars)
 12 |         self.cov_inv = cov_inv # pre-computed inverse covariance matrices (a list of dimxdim matrices)
 13 | 
 14 |         self.factor = [0]*num
 15 |         for i in range(self.num):
 16 |             self.factor[i] = (2*math.pi)**(self.dim/2) * self.cov_det[i]**0.5
 17 |         
 18 |     def likelihood(self, data):
 19 |         assert(data.shape[1] == self.dim)
 20 |         N = data.shape[0]
 21 |         lh = np.zeros(N)
 22 | 
 23 |         for i in range(self.num):
 24 |             data_ = data - self.mu[i]
 25 | 
 26 |             tmp = np.matmul(data_,self.cov_inv[i]) * data_
 27 |             tmp = np.sum(tmp,axis=1)
 28 |             power = -0.5 * tmp
 29 | 
 30 |             p = np.array([math.exp(power[j]) for j in range(N)])
 31 |             p = p/self.factor[i]
 32 |             lh += p*self.w[i]
 33 |         
 34 |         return lh
 35 | 
 36 | 
 37 | def _rgb2ycbcr(rgb):
 38 |     m = np.array([[65.481, 128.553, 24.966],
 39 |                   [-37.797, -74.203, 112],
 40 |                   [112, -93.786, -18.214]])
 41 |     shape = rgb.shape
 42 |     rgb = rgb.reshape((shape[0] * shape[1], 3))
 43 |     ycbcr = np.dot(rgb, m.transpose() / 255.)
 44 |     ycbcr[:, 0] += 16.
 45 |     ycbcr[:, 1:] += 128.
 46 |     return ycbcr.reshape(shape)
 47 | 
 48 | 
 49 | def _bgr2ycbcr(bgr):
 50 |     rgb = bgr[..., ::-1]
 51 |     return _rgb2ycbcr(rgb)
 52 | 
 53 | 
 54 | gmm_skin_w = [0.24063933, 0.16365987, 0.26034665, 0.33535415]
 55 | gmm_skin_mu = [np.array([113.71862, 103.39613, 164.08226]),
 56 |                 np.array([150.19858, 105.18467, 155.51428]),
 57 |                 np.array([183.92976, 107.62468, 152.71820]),
 58 |                 np.array([114.90524, 113.59782, 151.38217])]
 59 | gmm_skin_cov_det = [5692842.5, 5851930.5, 2329131., 1585971.]
 60 | gmm_skin_cov_inv = [np.array([[0.0019472069, 0.0020450759, -0.00060243998],[0.0020450759, 0.017700525, 0.0051420014],[-0.00060243998, 0.0051420014, 0.0081308950]]),
 61 |                     np.array([[0.0027110141, 0.0011036990, 0.0023122299],[0.0011036990, 0.010707724, 0.010742856],[0.0023122299, 0.010742856, 0.017481629]]),
 62 |                     np.array([[0.0048026871, 0.00022935172, 0.0077668377],[0.00022935172, 0.011729696, 0.0081661865],[0.0077668377, 0.0081661865, 0.025374353]]),
 63 |                     np.array([[0.0011989699, 0.0022453172, -0.0010748957],[0.0022453172, 0.047758564, 0.020332102],[-0.0010748957, 0.020332102, 0.024502251]])]
 64 | 
 65 | gmm_skin = GMM(3, 4, gmm_skin_w, gmm_skin_mu, [], gmm_skin_cov_det, gmm_skin_cov_inv)
 66 | 
 67 | gmm_nonskin_w = [0.12791070, 0.31130761, 0.34245777, 0.21832393]
 68 | gmm_nonskin_mu = [np.array([99.200851, 112.07533, 140.20602]),
 69 |                     np.array([110.91392, 125.52969, 130.19237]),
 70 |                     np.array([129.75864, 129.96107, 126.96808]),
 71 |                     np.array([112.29587, 128.85121, 129.05431])]
 72 | gmm_nonskin_cov_det = [458703648., 6466488., 90611376., 133097.63]
 73 | gmm_nonskin_cov_inv = [np.array([[0.00085371657, 0.00071197288, 0.00023958916],[0.00071197288, 0.0025935620, 0.00076557708],[0.00023958916, 0.00076557708, 0.0015042332]]),
 74 |                     np.array([[0.00024650150, 0.00045542428, 0.00015019422],[0.00045542428, 0.026412144, 0.018419769],[0.00015019422, 0.018419769, 0.037497383]]),
 75 |                     np.array([[0.00037054974, 0.00038146760, 0.00040408765],[0.00038146760, 0.0085505722, 0.0079136286],[0.00040408765, 0.0079136286, 0.010982352]]),
 76 |                     np.array([[0.00013709733, 0.00051228428, 0.00012777430],[0.00051228428, 0.28237113, 0.10528370],[0.00012777430, 0.10528370, 0.23468947]])]
 77 | 
 78 | gmm_nonskin = GMM(3, 4, gmm_nonskin_w, gmm_nonskin_mu, [], gmm_nonskin_cov_det, gmm_nonskin_cov_inv)
 79 | 
 80 | prior_skin = 0.8
 81 | prior_nonskin = 1 - prior_skin
 82 | 
 83 | 
 84 | # calculate skin attention mask
 85 | def skinmask(imbgr):
 86 |     im = _bgr2ycbcr(imbgr)
 87 | 
 88 |     data = im.reshape((-1,3))
 89 | 
 90 |     lh_skin = gmm_skin.likelihood(data)
 91 |     lh_nonskin = gmm_nonskin.likelihood(data)
 92 | 
 93 |     tmp1 = prior_skin * lh_skin
 94 |     tmp2 = prior_nonskin * lh_nonskin
 95 |     post_skin = tmp1 / (tmp1+tmp2) # posterior probability
 96 | 
 97 |     post_skin = post_skin.reshape((im.shape[0],im.shape[1]))
 98 | 
 99 |     post_skin = np.round(post_skin*255)
100 |     post_skin = post_skin.astype(np.uint8)
101 |     post_skin = np.tile(np.expand_dims(post_skin,2),[1,1,3]) # reshape to H*W*3
102 | 
103 |     return post_skin


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf 
  2 | import numpy as np 
  3 | import os
  4 | from options import Option
  5 | from reconstruction_model import *
  6 | from data_loader import *
  7 | from utils import *
  8 | import argparse
  9 | ###############################################################################################
 10 | # training stage
 11 | ###############################################################################################
 12 | 
 13 | 
 14 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 15 | 
 16 | # training data and validation data
 17 | def parse_args():
 18 |     desc = "Deep3DFaceReconstruction"
 19 |     parser = argparse.ArgumentParser(description=desc)
 20 | 
 21 |     parser.add_argument('--data_path', type=str, default='./processed_data', help='training data folder')
 22 |     parser.add_argument('--val_data_path', type=str, default='./processed_data', help='validation data folder')
 23 |     parser.add_argument('--model_name', type=str, default='./model_test', help='model name')
 24 | 
 25 | 
 26 |     return parser.parse_args()
 27 | 
 28 | # initialize weights for resnet and facenet
 29 | def restore_weights_and_initialize(opt):
 30 | 	var_list = tf.trainable_variables()
 31 | 	g_list = tf.global_variables()
 32 | 
 33 | 	# add batch normalization params into trainable variables 
 34 | 	bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
 35 | 	bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
 36 | 	var_list +=bn_moving_vars
 37 | 
 38 | 	# create saver to save and restore weights
 39 | 	resnet_vars = [v for v in var_list if 'resnet_v1_50' in v.name]
 40 | 	facenet_vars = [v for v in var_list if 'InceptionResnetV1' in v.name]
 41 | 	saver_resnet = tf.train.Saver(var_list = resnet_vars)
 42 | 	saver_facenet = tf.train.Saver(var_list = facenet_vars)
 43 | 
 44 | 	saver = tf.train.Saver(var_list = resnet_vars + [v for v in var_list if 'fc-' in v.name],max_to_keep = 50)
 45 | 
 46 | 	# create session
 47 | 	sess = tf.InteractiveSession(config = opt.config)
 48 | 
 49 | 	# create summary op
 50 | 	train_writer = tf.summary.FileWriter(opt.train_summary_path, sess.graph)
 51 | 	val_writer = tf.summary.FileWriter(opt.val_summary_path, sess.graph)
 52 | 
 53 | 	# initialization
 54 | 	tf.global_variables_initializer().run()
 55 | 	tf.local_variables_initializer().run()
 56 | 
 57 | 	saver_resnet.restore(sess,opt.R_net_weights)
 58 | 	saver_facenet.restore(sess,opt.Perceptual_net_weights)
 59 | 
 60 | 	return saver, train_writer,val_writer, sess
 61 | 
 62 | 
 63 | # main function for training
 64 | def train():
 65 | 
 66 | 	# read BFM face model
 67 | 	# transfer original BFM model to our model
 68 | 	if not os.path.isfile('./BFM/BFM_model_front.mat'):
 69 | 		transferBFM09()
 70 | 
 71 | 	with tf.Graph().as_default() as graph:
 72 | 
 73 | 		# training options
 74 | 		args = parse_args()
 75 | 		opt = Option(model_name=args.model_name)
 76 | 		opt.data_path = [args.data_path]
 77 | 		opt.val_data_path = [args.val_data_path]
 78 | 
 79 | 		# load training data into queue
 80 | 		train_iterator = load_dataset(opt)
 81 | 		# create reconstruction model
 82 | 		model = Reconstruction_model(opt)
 83 | 		# send training data to the model
 84 | 		model.set_input(train_iterator)
 85 | 		# update model variables with training data
 86 | 		model.step(is_train = True)
 87 | 		# summarize training statistics
 88 | 		model.summarize()
 89 | 
 90 | 		# several training stattistics to be saved
 91 | 		train_stat = model.summary_stat
 92 | 		train_img_stat = model.summary_img
 93 | 		train_op = model.train_op
 94 | 		photo_error = model.photo_loss
 95 | 		lm_error = model.landmark_loss
 96 | 		id_error = model.perceptual_loss
 97 | 
 98 | 		# load validation data into queue
 99 | 		val_iterator = load_dataset(opt,train=False)
100 | 		# send validation data to the model
101 | 		model.set_input(val_iterator)
102 | 		# only do foward pass without updating model variables
103 | 		model.step(is_train = False)
104 | 		# summarize validation statistics
105 | 		model.summarize()
106 | 		val_stat = model.summary_stat
107 | 		val_img_stat = model.summary_img
108 | 
109 | 		# initialization
110 | 		saver, train_writer,val_writer, sess = restore_weights_and_initialize(opt)
111 | 
112 | 		# freeze the graph to ensure no new op will be added during training
113 | 		sess.graph.finalize()
114 | 
115 | 		# training loop
116 | 		for i in range(opt.train_maxiter):
117 | 			_,ph_loss,lm_loss,id_loss = sess.run([train_op,photo_error,lm_error,id_error])
118 | 			print('Iter: %d; lm_loss: %f ; photo_loss: %f; id_loss: %f\n'%(i,np.sqrt(lm_loss),ph_loss,id_loss))
119 | 			# summarize training stats every <train_summary_iter> iterations
120 | 			if np.mod(i,opt.train_summary_iter) == 0:
121 | 				train_summary = sess.run(train_stat)
122 | 				train_writer.add_summary(train_summary,i)
123 | 
124 | 			# summarize image stats every <image_summary_iter> iterations
125 | 			if np.mod(i,opt.image_summary_iter) == 0:
126 | 				train_img_summary = sess.run(train_img_stat)
127 | 				train_writer.add_summary(train_img_summary,i)
128 | 
129 | 			# summarize validation stats every <val_summary_iter> iterations	
130 | 			if np.mod(i,opt.val_summary_iter) == 0:
131 | 				val_summary,val_img_summary = sess.run([val_stat,val_img_stat])
132 | 				val_writer.add_summary(val_summary,i)
133 | 				val_writer.add_summary(val_img_summary,i)
134 | 
135 | 			# # save model variables every <save_iter> iterations	
136 | 			if np.mod(i,opt.save_iter) == 0:
137 | 				saver.save(sess,os.path.join(opt.model_save_path,'iter_%d.ckpt'%i))
138 | 
139 | 
140 | if __name__ == '__main__':
141 | 	train()


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import tensorflow as tf
  3 | from PIL import Image
  4 | from scipy.io import loadmat,savemat
  5 | from array import array
  6 | 
  7 | # load expression basis
  8 | def LoadExpBasis():
  9 | 	n_vertex = 53215
 10 | 	Expbin = open('BFM/Exp_Pca.bin','rb')
 11 | 	exp_dim = array('i')
 12 | 	exp_dim.fromfile(Expbin,1)
 13 | 	expMU = array('f')
 14 | 	expPC = array('f')
 15 | 	expMU.fromfile(Expbin,3*n_vertex)
 16 | 	expPC.fromfile(Expbin,3*exp_dim[0]*n_vertex)
 17 | 
 18 | 	expPC = np.array(expPC)
 19 | 	expPC = np.reshape(expPC,[exp_dim[0],-1])
 20 | 	expPC = np.transpose(expPC)
 21 | 
 22 | 	expEV = np.loadtxt('BFM/std_exp.txt')
 23 | 
 24 | 	return expPC,expEV
 25 | 
 26 | # transfer original BFM09 to our face model
 27 | def transferBFM09():
 28 | 	original_BFM = loadmat('BFM/01_MorphableModel.mat')
 29 | 	shapePC = original_BFM['shapePC'] # shape basis
 30 | 	shapeEV = original_BFM['shapeEV'] # corresponding eigen value
 31 | 	shapeMU = original_BFM['shapeMU'] # mean face
 32 | 	texPC = original_BFM['texPC'] # texture basis
 33 | 	texEV = original_BFM['texEV'] # eigen value
 34 | 	texMU = original_BFM['texMU'] # mean texture
 35 | 
 36 | 	expPC,expEV = LoadExpBasis()
 37 | 
 38 | 	# transfer BFM09 to our face model
 39 | 
 40 | 	idBase = shapePC*np.reshape(shapeEV,[-1,199])
 41 | 	idBase = idBase/1e5 # unify the scale to decimeter
 42 | 	idBase = idBase[:,:80] # use only first 80 basis
 43 | 
 44 | 	exBase = expPC*np.reshape(expEV,[-1,79])
 45 | 	exBase = exBase/1e5 # unify the scale to decimeter
 46 | 	exBase = exBase[:,:64] # use only first 64 basis
 47 | 
 48 | 	texBase = texPC*np.reshape(texEV,[-1,199])
 49 | 	texBase = texBase[:,:80] # use only first 80 basis
 50 | 
 51 | 	# our face model is cropped align face landmarks which contains only 35709 vertex.
 52 | 	# original BFM09 contains 53490 vertex, and expression basis provided by JuYong contains 53215 vertex.
 53 | 	# thus we select corresponding vertex to get our face model.
 54 | 
 55 | 	index_exp = loadmat('BFM/BFM_front_idx.mat')
 56 | 	index_exp = index_exp['idx'].astype(np.int32) - 1 #starts from 0 (to 53215)
 57 | 
 58 | 	index_shape = loadmat('BFM/BFM_exp_idx.mat')
 59 | 	index_shape = index_shape['trimIndex'].astype(np.int32) - 1 #starts from 0 (to 53490)
 60 | 	index_shape = index_shape[index_exp]
 61 | 
 62 | 
 63 | 	idBase = np.reshape(idBase,[-1,3,80])
 64 | 	idBase = idBase[index_shape,:,:]
 65 | 	idBase = np.reshape(idBase,[-1,80])
 66 | 
 67 | 	texBase = np.reshape(texBase,[-1,3,80])
 68 | 	texBase = texBase[index_shape,:,:]
 69 | 	texBase = np.reshape(texBase,[-1,80])
 70 | 
 71 | 	exBase = np.reshape(exBase,[-1,3,64])
 72 | 	exBase = exBase[index_exp,:,:]
 73 | 	exBase = np.reshape(exBase,[-1,64])
 74 | 
 75 | 	meanshape = np.reshape(shapeMU,[-1,3])/1e5
 76 | 	meanshape = meanshape[index_shape,:]
 77 | 	meanshape = np.reshape(meanshape,[1,-1])
 78 | 
 79 | 	meantex = np.reshape(texMU,[-1,3])
 80 | 	meantex = meantex[index_shape,:]
 81 | 	meantex = np.reshape(meantex,[1,-1])
 82 | 
 83 | 	# other info contains triangles, region used for computing photometric loss,
 84 | 	# region used for skin texture regularization, and 68 landmarks index etc.
 85 | 	other_info = loadmat('BFM/facemodel_info.mat')
 86 | 	frontmask2_idx = other_info['frontmask2_idx']
 87 | 	skinmask = other_info['skinmask']
 88 | 	keypoints = other_info['keypoints']
 89 | 	point_buf = other_info['point_buf']
 90 | 	tri = other_info['tri']
 91 | 	tri_mask2 = other_info['tri_mask2']
 92 | 
 93 | 	# save our face model
 94 | 	savemat('BFM/BFM_model_front.mat',{'meanshape':meanshape,'meantex':meantex,'idBase':idBase,'exBase':exBase,'texBase':texBase,'tri':tri,'point_buf':point_buf,'tri_mask2':tri_mask2\
 95 | 		,'keypoints':keypoints,'frontmask2_idx':frontmask2_idx,'skinmask':skinmask})
 96 | 
 97 | # load landmarks for standard face, which is used for image preprocessing
 98 | def load_lm3d():
 99 | 
100 | 	Lm3D = loadmat('./BFM/similarity_Lm3D_all.mat')
101 | 	Lm3D = Lm3D['lm']
102 | 
103 | 	# calculate 5 facial landmarks using 68 landmarks
104 | 	lm_idx = np.array([31,37,40,43,46,49,55]) - 1
105 | 	Lm3D = np.stack([Lm3D[lm_idx[0],:],np.mean(Lm3D[lm_idx[[1,2]],:],0),np.mean(Lm3D[lm_idx[[3,4]],:],0),Lm3D[lm_idx[5],:],Lm3D[lm_idx[6],:]], axis = 0)
106 | 	Lm3D = Lm3D[[1,2,0,3,4],:]
107 | 
108 | 	return Lm3D
109 | 
110 | # load input images and corresponding 5 landmarks
111 | def load_img(img_path,lm_path):
112 | 
113 | 	image = Image.open(img_path)
114 | 	lm = np.loadtxt(lm_path)
115 | 
116 | 	return image,lm
117 | 
118 | # save 3D face to obj file
119 | def save_obj(path,v,f,c):
120 | 	with open(path,'w') as file:
121 | 		for i in range(len(v)):
122 | 			file.write('v %f %f %f %f %f %f\n'%(v[i,0],v[i,1],v[i,2],c[i,0],c[i,1],c[i,2]))
123 | 
124 | 		file.write('\n')
125 | 
126 | 		for i in range(len(f)):
127 | 			file.write('f %d %d %d\n'%(f[i,0],f[i,1],f[i,2]))
128 | 
129 | 	file.close()
130 | 
131 | # load .pb file into tensorflow graph
132 | def load_graph(graph_filename):
133 | 	with tf.gfile.GFile(graph_filename,'rb') as f:
134 | 		graph_def = tf.GraphDef()
135 | 		graph_def.ParseFromString(f.read())
136 | 
137 | 	return graph_def


--------------------------------------------------------------------------------