├── Demo ├── align │ ├── __init__.py │ ├── .gitignore │ ├── det1.npy │ ├── det2.npy │ ├── det3.npy │ └── detect_face.py ├── 1000_from_CASIA │ ├── centroids.npy │ └── centroids_names.txt ├── README.md ├── alignment.py ├── dumping.py └── demo.py ├── Utils ├── 002.jpg └── README.md ├── Attack ├── example.png ├── README.md ├── cos_mx.py ├── cos_tf.py ├── face_preparation.py ├── utils.py ├── stn.py └── attack.py ├── LICENSE └── README.md /Demo/align/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Demo/align/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /Utils/002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papermsucode/advhat/HEAD/Utils/002.jpg -------------------------------------------------------------------------------- /Attack/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papermsucode/advhat/HEAD/Attack/example.png -------------------------------------------------------------------------------- /Demo/align/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papermsucode/advhat/HEAD/Demo/align/det1.npy -------------------------------------------------------------------------------- /Demo/align/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papermsucode/advhat/HEAD/Demo/align/det2.npy -------------------------------------------------------------------------------- /Demo/align/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papermsucode/advhat/HEAD/Demo/align/det3.npy -------------------------------------------------------------------------------- /Demo/1000_from_CASIA/centroids.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papermsucode/advhat/HEAD/Demo/1000_from_CASIA/centroids.npy -------------------------------------------------------------------------------- /Utils/README.md: -------------------------------------------------------------------------------- 1 | An example notebook of the MX to TF transformation of the ArcFace models. Can be directly applicable for *LResNet100E-IR,ArcFace@ms1m-refine-v2*, *LResNet50E-IR,ArcFace@ms1m-refine-v1*, *LResNet34E-IR,ArcFace@ms1m-refine-v1*. By analogy, *MobileFaceNet,ArcFace@ms1m-refine-v1* can be transformed by slight modifications. 2 | 3 | In response to [this](https://github.com/papermsucode/advhat/issues/11) issue. 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Stepan Komkov, Aleksandr Petiushko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Demo/README.md: -------------------------------------------------------------------------------- 1 | ## Demo launch 2 | 3 | ArcFace@ms1m-refine-v2 transformed to TensorFlow is available [here](https://drive.google.com/file/d/1fb70KgMRSmaEUF5cJ67BCD_DmTPCR5uJ/view?usp=sharing). 4 | 5 | The command for demo launch: 6 | 7 | `python3 demo.py PATH_TO_THE_DOWNLOADED_MODEL PATH_TO_THE_DIRECTORY_WITH_CLASS_CENTROIDS` 8 | 9 | Centroids for the first 1000 classes of CASIA are in the "1000_from_CASIA" directory. 10 | 11 | ## Preparation of your own centroids 12 | 13 | ### Alignment 14 | 15 | The dataset of your images has to be arranged in the following way: 16 | 17 | ├── Person 1 18 | │ ├── Person_1_image_1.png 19 | │ ├── Person_1_image_2.png 20 | │ ├── Person_1_image_3.png 21 | │ └── Person_1_image_4.png 22 | ├── Person 2 23 | │ ├── Person_2_image_1.png 24 | │ ├── Person_2_image_2.png 25 | │ ├── Person_2_image_3.png 26 | │ ├── Person_2_image_4.png 27 | │ └── Person_2_image_5.png 28 | ├── Person 3 29 | │ ├── Person_3_image_1.png 30 | │ ├── Person_3_image_2.png 31 | ... 32 | 33 | The command for images alignment: 34 | 35 | `python3 alignment.py PATH_TO_DIRECTIRY_WITH_IMAGES PATH_FOR_THE_ALIGNED_IMAGES` 36 | 37 | ### Centroids calculation 38 | 39 | Using directory with aligned images from the previous step, you can obtain centroids with the next command: 40 | 41 | `python3 dumping.py PATH_TO_DIRECTORY_WITH_ALIGNED_IMAGES PATH_FOR_THE_CENTROIDS PATH_TO_THE_DOWNLOADED_MODEL` 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AdvHat: Real-world adversarial attack on ArcFace Face ID system 2 | 3 | By Stepan Komkov and Aleksandr Petiushko 4 | 5 | This is the code repository for the AdvHat research article. The article is available [here](https://arxiv.org/abs/1908.08705). The video demo is available [here](https://youtu.be/a4iNg0wWBsQ). Code that is used for the article is available right here. 6 | 7 | ## Abstract 8 | 9 | We propose a novel easily reproducible technique to attack the best public Face ID system ArcFace in different shooting conditions. To create an attack, we print the rectangular paper sticker on a common color printer and put it on the hat. The adversarial sticker is prepared with a novel algorithm for off-plane transformations of the image which imitates sticker location on the hat. Such an approach confuses the state-of-the-art public Face ID model LResNet100E-IR, ArcFace@ms1m-refine-v2 and is transferable to other Face ID models. 10 | 11 | ## The repository 12 | 13 | The repository is organized as follows: 14 | 15 | * In the Attack directory, you can find code and instructions on how to reproduce an attack for your images. 16 | * In the Demo directory, you can find a demo script which can help you to verify the robustness of the prepared attack to the real-world shooting conditions. 17 | 18 | ## Built With 19 | 20 | * [InsightFace's ArcFace](https://github.com/deepinsight/insightface) - The SOTA public FaceID model 21 | * [Kevin Zakka's STN](https://github.com/kevinzakka/spatial-transformer-network) - Spatial Transformer realization 22 | 23 | ## Citation 24 | 25 | ``` 26 | @article{komkov2019advhat, 27 | title={AdvHat: Real-world adversarial attack on ArcFace Face ID system}, 28 | author={Komkov, Stepan and Petiushko, Aleksandr}, 29 | journal={arXiv preprint arXiv:1908.08705}, 30 | year={2019} 31 | } 32 | ``` 33 | 34 | ## License 35 | 36 | This project is licensed under the MIT License - see the [LICENSE.md](https://github.com/papermsucode/advhat/blob/master/LICENSE) file for details. 37 | -------------------------------------------------------------------------------- /Attack/README.md: -------------------------------------------------------------------------------- 1 | ## Preparing an attack 2 | 3 | 1. First, make a full-face photo of attacked person, a full-face photo in a hat, and a 4 | full-face photo in a hat with an example sticker on the hat. To be sure that you use a 5 | sticker with the correct size follow instructions: 6 | 1. Download an example.png. 7 | 2. Open downloaded image with standard Windows print util. 8 | 3. Choose the regime with 4 photos per page (9 x 13 sm). 9 | 4. Uncheck the box "Fit picture to frame". 10 | 5. Print page with an example sticker. 11 | 6. Cut out the sticker and put in on the hat. 12 | 13 | 2. Use the next command to prepare photos: 14 | 15 | `python3 face_preparation.py PATH_TO_THE_IMAGE` 16 | 17 | 3. You need to find parameters for the sticker position initialization. Use the next 18 | command to find these parameters: 19 | 20 | `python3 face_preparation.py PATH_TO_THE_IMAGE_WITH_HAT_ONLY --mask` 21 | 22 | It will show sticker placement with default parameters. Change parameters until the 23 | image looks like a prepared image with the sticker. You can see the parameters using `--help` 24 | flag. 25 | 26 | 4. Download TensorFlow ArcFace model 27 | [here](https://drive.google.com/file/d/1fb70KgMRSmaEUF5cJ67BCD_DmTPCR5uJ/view?usp=sharing). 28 | 29 | 5. Launch an attack preparation: 30 | 31 | `python3 attack.py PATH_TO_THE_PREPARED_IMAGED_WITH_HAT PATH_TO_THE_TF_MODEL --anchor_face 32 | PATH_TO_THE_PREPARED_IMAGE_WITHOUT_HAT (sticker position parameters in the same format from 33 | the third step)` 34 | 35 | 6. Print the obtained sticker, put it on the hat as before, and make a new photo with the sticker. 36 | 37 | 7. Use "face_preparation.py" again to prepare a new photo and "cos_tf.py" to calculate a new similarity. 38 | 39 | `python3 cos_tf.py PATH_TO_THE_PREPARED_IMAGE_WITHOUT_HAT PATH_TO_THE_PREPARED_IMAGE_WITH_HAT_ONLY` - baseline similarity 40 | 41 | 42 | `python3 cos_tf.py PATH_TO_THE_PREPARED_IMAGE_WITHOUT_HAT PATH_TO_THE_PREPARED_IMAGE_WITH_THE_NEW_STICKER` - final similarity 43 | 44 | ### Notes 45 | 46 | Note that our printer has good color rendering, that is why NPS-loss does not make influence in our experiments. 47 | You may need to add NPS-loss for your printer. 48 | -------------------------------------------------------------------------------- /Attack/cos_mx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import mxnet as mx 4 | import mxnet.ndarray as nd 5 | import numpy as np 6 | import skimage.io as io 7 | from skimage.transform import rescale 8 | from numpy import linalg as LA 9 | 10 | # Prepare image to network input format 11 | def prep(im): 12 | if len(im.shape)==3: 13 | return np.transpose(im,[2,0,1]).reshape((1,3,112,112)) 14 | elif len(im.shape)==4: 15 | return np.transpose(im,[0,3,1,2]).reshape((im.shape[0],3,112,112)) 16 | 17 | def main(args): 18 | print(args) 19 | 20 | # Embedding model 21 | sym, arg_params, aux_params = mx.model.load_checkpoint(args.model, 0) 22 | sym = sym.get_internals()['fc1_output'] 23 | model = mx.mod.Module(symbol=sym, context=mx.gpu(0), label_names = None) 24 | model.bind(data_shapes=[('data', (1, 3, 112, 112))]) 25 | model.set_params(arg_params, aux_params) 26 | 27 | # Embedding calculation 28 | im1 = (prep(rescale(io.imread(args.face1)/255.,112./600.,order=5))*255.).astype(np.uint8) 29 | im2 = (prep(rescale(io.imread(args.face2)/255.,112./600.,order=5))*255.).astype(np.uint8) 30 | 31 | batch = mx.io.DataBatch(data=[nd.array(im1)]) 32 | model.forward(batch, is_train=False) 33 | emb1 = model.get_outputs()[0].asnumpy()[0] 34 | batch = mx.io.DataBatch(data=[nd.array(im2)]) 35 | model.forward(batch, is_train=False) 36 | emb2 = model.get_outputs()[0].asnumpy()[0] 37 | 38 | # Normalization 39 | emb1 /= LA.norm(emb1) 40 | emb2 /= LA.norm(emb2) 41 | cos_sim = np.sum(emb1 * emb2) 42 | 43 | # Result 44 | print('Cos_sim(face1, face2) =', cos_sim) 45 | 46 | def parse_arguments(argv): 47 | parser = argparse.ArgumentParser() 48 | 49 | parser.add_argument('face1', type=str, help='Path to the preprocessed face1.') 50 | parser.add_argument('face2', type=str, help='Path to the preprocessed face2.') 51 | parser.add_argument('model', type=str, help='Path to the model.') 52 | 53 | return parser.parse_args(argv) 54 | 55 | if __name__ == '__main__': 56 | main(parse_arguments(sys.argv[1:])) 57 | -------------------------------------------------------------------------------- /Attack/cos_tf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import tensorflow as tf 4 | import numpy as np 5 | import skimage.io as io 6 | from skimage.transform import rescale 7 | 8 | # Prepare image to network input format 9 | def prep(im): 10 | if len(im.shape)==3: 11 | return np.transpose(im,[2,0,1]).reshape((1,3,112,112))*2-1 12 | elif len(im.shape)==4: 13 | return np.transpose(im,[0,3,1,2]).reshape((im.shape[0],3,112,112))*2-1 14 | 15 | def main(args): 16 | print(args) 17 | 18 | sess = tf.Session() 19 | 20 | # Embedding model 21 | with tf.gfile.GFile(args.model, "rb") as f: 22 | graph_def = tf.GraphDef() 23 | graph_def.ParseFromString(f.read()) 24 | tf.import_graph_def(graph_def, 25 | input_map=None, 26 | return_elements=None, 27 | name="") 28 | image_input = tf.get_default_graph().get_tensor_by_name('image_input:0') 29 | keep_prob = tf.get_default_graph().get_tensor_by_name('keep_prob:0') 30 | is_train = tf.get_default_graph().get_tensor_by_name('training_mode:0') 31 | embedding = tf.get_default_graph().get_tensor_by_name('embedding:0') 32 | 33 | tfdict = {keep_prob:1.0, is_train:False} 34 | 35 | # Embedding calculation 36 | im1 = prep(rescale(io.imread(args.face1)/255.,112./600.,order=5)) 37 | im2 = prep(rescale(io.imread(args.face2)/255.,112./600.,order=5)) 38 | tfdict[image_input] = im1 39 | emb1 = sess.run(embedding,feed_dict=tfdict) 40 | tfdict[image_input] = im2 41 | emb2 = sess.run(embedding,feed_dict=tfdict) 42 | 43 | # Result 44 | cos_sim = np.sum(emb1 * emb2) 45 | print('Cos_sim(face1, face2) =', cos_sim) 46 | 47 | def parse_arguments(argv): 48 | parser = argparse.ArgumentParser() 49 | 50 | parser.add_argument('face1', type=str, help='Path to the preprocessed face1.') 51 | parser.add_argument('face2', type=str, help='Path to the preprocessed face2.') 52 | parser.add_argument('model', type=str, help='Path to the model.') 53 | 54 | return parser.parse_args(argv) 55 | 56 | if __name__ == '__main__': 57 | main(parse_arguments(sys.argv[1:])) 58 | -------------------------------------------------------------------------------- /Demo/alignment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import cv2 4 | from skimage import transform as trans 5 | import tensorflow as tf 6 | import os 7 | import skimage.io as io 8 | import sys 9 | from tqdm import tqdm 10 | 11 | import align.detect_face as detect_face 12 | 13 | # Transform grey image to RGB image 14 | def to_rgb(img): 15 | w, h = img.shape 16 | ret = np.empty((w, h, 3), dtype=np.uint8) 17 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 18 | return ret 19 | 20 | # Align face as ArcFace template 21 | def preprocess(img, landmark): 22 | image_size = [112,112] 23 | src = np.array([ 24 | [38.2946, 51.6963], 25 | [73.5318, 51.5014], 26 | [56.0252, 71.7366], 27 | [41.5493, 92.3655], 28 | [70.7299, 92.2041] ], dtype=np.float32) 29 | dst = landmark.astype(np.float32) 30 | tform = trans.SimilarityTransform() 31 | tform.estimate(dst, src) 32 | M = tform.params[0:2,:] 33 | 34 | warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) 35 | return warped 36 | 37 | def main(args): 38 | 39 | # MTCNN 40 | with tf.Graph().as_default(): 41 | sess = tf.Session() 42 | with sess.as_default(): 43 | pnet, rnet, onet = detect_face.create_mtcnn(sess, None) 44 | threshold = [ 0.6, 0.7, 0.7 ] 45 | factor = 0.709 46 | 47 | # Output dirs creation 48 | if not os.path.exists(args.output_dir): 49 | os.makedirs(args.output_dir) 50 | images = [] 51 | for path in sorted(os.listdir(args.input_dir)): 52 | if not os.path.exists(os.path.join(args.output_dir,path)): 53 | os.mkdir(os.path.join(args.output_dir,path)) 54 | for name in sorted(os.listdir(os.path.join(args.input_dir,path))): 55 | images.append(os.path.join(path,name)) 56 | 57 | # Alignment procedure 58 | for path in tqdm(images): 59 | img = io.imread(os.path.join(args.input_dir,path)) 60 | if img.ndim == 2: 61 | img = to_rgb(img) 62 | img = img[:,:,0:3] 63 | _minsize = min(min(img.shape[0]//5, img.shape[1]//5),80) 64 | bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) 65 | if bounding_boxes.size>0: 66 | bindex = -1 67 | nrof_faces = bounding_boxes.shape[0] 68 | if nrof_faces>0: 69 | det = bounding_boxes[:,0:4] 70 | img_size = np.asarray(img.shape)[0:2] 71 | bindex = 0 72 | if nrof_faces>1: 73 | bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) 74 | img_center = img_size / 2 75 | offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) 76 | offset_dist_squared = np.sum(np.power(offsets,2.0),0) 77 | bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) 78 | points = points[:, bindex] 79 | landmark = points.reshape((2,5)).T 80 | warped = preprocess(img, landmark) 81 | io.imsave(os.path.join(args.output_dir,path), warped) 82 | else: 83 | print(path+' was skipped') 84 | 85 | 86 | def parse_arguments(argv): 87 | parser = argparse.ArgumentParser() 88 | 89 | parser.add_argument('input_dir', type=str, help='Directory with unaligned images.') 90 | parser.add_argument('output_dir', type=str, help='Directory for aligned face thumbnails.') 91 | return parser.parse_args(argv) 92 | 93 | if __name__ == '__main__': 94 | main(parse_arguments(sys.argv[1:])) 95 | -------------------------------------------------------------------------------- /Attack/face_preparation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | sys.path.append(os.path.join(os.path.dirname(__file__), '../Demo/')) 5 | import tensorflow as tf 6 | import numpy as np 7 | import cv2 8 | import skimage.io as io 9 | from skimage import transform as trans 10 | from align import detect_face 11 | from stn import spatial_transformer_network as stn 12 | from utils import projector 13 | 14 | # Align face as ArcFace template 15 | def preprocess(img, landmark): 16 | image_size = [600,600] 17 | src = 600./112.*np.array([ 18 | [38.2946, 51.6963], 19 | [73.5318, 51.5014], 20 | [56.0252, 71.7366], 21 | [41.5493, 92.3655], 22 | [70.7299, 92.2041] ], dtype=np.float32) 23 | dst = landmark.astype(np.float32) 24 | tform = trans.SimilarityTransform() 25 | tform.estimate(dst, src) 26 | M = tform.params[0:2,:] 27 | 28 | warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) 29 | return warped 30 | 31 | def main(args): 32 | sess = tf.Session() 33 | pnet, rnet, onet = detect_face.create_mtcnn(sess, None) 34 | threshold = [ 0.6, 0.7, 0.7 ] 35 | factor = 0.709 36 | 37 | img = io.imread(args.image) 38 | _minsize = min(min(img.shape[0]//5, img.shape[1]//5),80) 39 | bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) 40 | assert bounding_boxes.size>0 41 | points = points[:, 0] 42 | landmark = points.reshape((2,5)).T 43 | warped = preprocess(img, landmark) 44 | 45 | io.imsave(args.image[:-4]+'_aligned.png',warped) 46 | 47 | if args.mask: 48 | logo_mask = np.ones((1,400,900,3),dtype=np.float32) 49 | 50 | logo = tf.placeholder(tf.float32,shape=[1,400,900,3]) 51 | param = tf.placeholder(tf.float32,shape=[1,1]) 52 | ph = tf.placeholder(tf.float32,shape=[1,1]) 53 | result = projector(param,ph,logo) 54 | 55 | face_input = tf.placeholder(tf.float32,shape=[1,600,600,3]) 56 | theta = tf.placeholder(tf.float32,shape=[1,6]) 57 | prepared = stn(result,theta) 58 | 59 | united = prepared[:,300:,150:750]+face_input*(1-prepared[:,300:,150:750]) 60 | 61 | img_with_mask = sess.run(united,feed_dict={ph:[[args.ph]],logo:logo_mask,param:[[args.param]],\ 62 | face_input:np.expand_dims(warped/255.,0),\ 63 | theta:1./args.scale*np.array([[1.,0.,-args.x/450.,0.,1.,-args.y/450.]])})[0] 64 | 65 | io.imsave(args.image[:-4]+'_mask.png',img_with_mask) 66 | 67 | def parse_arguments(argv): 68 | parser = argparse.ArgumentParser() 69 | 70 | parser.add_argument('image', type=str, help='Path to the image.') 71 | parser.add_argument('--mask', action='store_true', help='Use when search the sticker parameters') 72 | parser.add_argument('--ph', type=float, default=17., help='Angle of the off-plane rotation') 73 | parser.add_argument('--param', type=float, default=0.0013, help='Parabola rate for the off-plane parabolic transformation') 74 | parser.add_argument('--scale', type=float, default=0.465, help='Scaling parameter for the sticker') 75 | parser.add_argument('--x', type=float, default=0., help='Translation of the sticker along x-axis') 76 | parser.add_argument('--y', type=float, default=-15., help='Translation of the sticker along y-axis') 77 | return parser.parse_args(argv) 78 | 79 | if __name__ == '__main__': 80 | main(parse_arguments(sys.argv[1:])) 81 | 82 | -------------------------------------------------------------------------------- /Attack/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def tf_integral(x,a): 6 | return 0.5*(x*tf.sqrt(x**2+a)+a*tf.log(tf.abs(x+tf.sqrt(x**2+a)))) 7 | def tf_pre_parabol(x,par): 8 | x = x-450. 9 | prev = 2.*par*(tf_integral(tf.abs(x),0.25/(par**2))-tf_integral(0,0.25/(par**2))) 10 | return prev+450. 11 | 12 | def projector(param,ph,logo): 13 | '''Apply off-plane transformations to the sticker images 14 | param: parabola rate of the off-plane parabolic tranformation, rank 2 tensor with shape [N, 1] 15 | ph:angle of the off-plane rotation, rank 2 tensor with shape [N, 1] 16 | logo: rank 4 tensor with format NHWC and shape [N, 400, 900, 3] 17 | 18 | return: rank 4 tensor with format NHWC and shape [N, 900, 900, 3] 19 | ''' 20 | right_cumsum = tf.transpose(tf.pad(tf.cumsum(logo[:,:,450:],axis=2),tf.constant([[0,0],[0,0],[1,0],[0,0]])),[0,2,1,3]) 21 | left_cumsum = tf.transpose(tf.pad(tf.cumsum(logo[:,:,:450][:,:,::-1],axis=2),tf.constant([[0,0],[0,0],[1,0],[0,0]])),[0,2,1,3]) 22 | 23 | anchors = tf.expand_dims(tf.cast(tf.round(tf.clip_by_value(\ 24 | tf_pre_parabol(tf.expand_dims(tf.constant(np.arange(450,901,dtype=np.float32)),0),\ 25 | param)-450.,0,450.)),tf.int32),2) 26 | anch_inds = tf.tile(tf.expand_dims(tf.expand_dims(tf.range(tf.shape(param)[0]),1),2),[1,451,1]) 27 | new_anchors = tf.concat([anch_inds,anchors],2) 28 | 29 | anchors_div = tf.expand_dims(tf.cast(tf.clip_by_value(anchors[:,1:]-anchors[:,:-1],1,900),tf.float32),3) 30 | right_anchors_cumsum = tf.gather_nd(right_cumsum,new_anchors) 31 | right_anchors_diffs = right_anchors_cumsum[:,1:]-right_anchors_cumsum[:,:-1] 32 | right = right_anchors_diffs/anchors_div 33 | left_anchors_cumsum = tf.gather_nd(left_cumsum,new_anchors) 34 | left_anchors_diffs = left_anchors_cumsum[:,1:]-left_anchors_cumsum[:,:-1] 35 | left = left_anchors_diffs/anchors_div 36 | 37 | tmp_result = tf.transpose(tf.concat([left[:,::-1],right],axis=1),[0,2,1,3]) 38 | 39 | cumsum = tf.pad(tf.cumsum(tmp_result,axis=1),tf.constant([[0,0],[1,0],[0,0],[0,0]])) 40 | 41 | angle = tf.expand_dims(np.pi/180.*ph,2) 42 | 43 | z = param*tf.constant((np.arange(900,dtype=np.float32)-449.5)**2) 44 | z_tile = tf.tile(tf.expand_dims(z,1),tf.constant([1,901,1])) 45 | 46 | y_coord = tf.constant(np.arange(-250,651,dtype=np.float32)) 47 | y_tile = tf.tile(tf.expand_dims(tf.expand_dims(y_coord,1),0),[tf.shape(param)[0],1,900]) 48 | 49 | y_prev = (y_tile+z_tile*tf.sin(-angle))/tf.cos(angle) 50 | y_round = tf.cast(tf.round(tf.clip_by_value(y_prev,0,400.)),tf.int32) 51 | y_div = tf.clip_by_value(y_round[:,1:]-y_round[:,:-1],1,900) 52 | 53 | x_coord = tf.constant(np.arange(900,dtype=np.int32)) 54 | x_tile = tf.tile(tf.expand_dims(tf.expand_dims(x_coord,0),0),[tf.shape(param)[0],901,1]) 55 | 56 | b_coord = tf.tile(tf.expand_dims(tf.expand_dims(tf.range(tf.shape(param)[0]),1),2),[1,901,900]) 57 | 58 | indices = tf.stack([b_coord,y_round,x_tile],axis=3) 59 | 60 | chosen_cumsum = tf.gather_nd(cumsum,indices) 61 | chosen_cumsum_diffs = chosen_cumsum[:,1:]-chosen_cumsum[:,:-1] 62 | final_results = tf.clip_by_value(chosen_cumsum_diffs/tf.expand_dims(tf.cast(y_div,tf.float32),3),0.,1.) 63 | 64 | return final_results 65 | 66 | def TVloss(logo,w_tv): 67 | '''Calculate TV loss of the sticker image with predefined weight. 68 | logo: rank 4 tensor with format NHWC 69 | w_tv: weight of the TV loss 70 | 71 | return: scalar value of the TV loss 72 | ''' 73 | vert_diff = logo[:,1:]-logo[:,:-1] 74 | hor_diff = logo[:,:,1:]-logo[:,:,:-1] 75 | vert_diff_sq = tf.square(vert_diff) 76 | hor_diff_sq = tf.square(hor_diff) 77 | vert_pad = tf.pad(vert_diff_sq,tf.constant([[0,0],[1,0],[0,0],[0,0]])) 78 | hor_pad = tf.pad(hor_diff_sq,tf.constant([[0,0],[0,0],[1,0],[0,0]])) 79 | tv_sum = vert_pad+hor_pad 80 | tv = tf.sqrt(tv_sum+1e-5) 81 | tv_final_sum = tf.reduce_sum(tv) 82 | tv_loss = w_tv*tv_final_sum 83 | return tv_loss 84 | -------------------------------------------------------------------------------- /Demo/dumping.py: -------------------------------------------------------------------------------- 1 | import skimage.io as io 2 | import os 3 | import numpy as np 4 | from tqdm import tqdm 5 | import sys 6 | import argparse 7 | 8 | def main(args): 9 | 10 | # Output dirs creation 11 | if not os.path.exists(args.output_dir): 12 | os.makedirs(args.output_dir) 13 | images = [] 14 | labels = [] 15 | label = 0 16 | for path in sorted(os.listdir(args.input_dir)): 17 | for name in sorted(os.listdir(os.path.join(args.input_dir,path))): 18 | if args.mx: 19 | images.append([[label],os.path.join(args.input_dir,path,name)]) 20 | else: 21 | images.append(os.path.join(args.input_dir,path,name)) 22 | labels.append(label) 23 | label += 1 24 | 25 | 26 | if args.mx: 27 | # MXnet model 28 | import mxnet as mx 29 | sym, arg_params, aux_params = mx.model.load_checkpoint(args.model, 0) 30 | sym = sym.get_internals()['fc1_output'] 31 | model = mx.mod.Module(symbol=sym, context=mx.gpu(0), label_names = None) 32 | model.bind(data_shapes=[('data', (1, 3, 112, 112))]) 33 | model.set_params(arg_params, aux_params) 34 | iterator = mx.image.ImageIter(batch_size=args.batch,data_shape=(3,112,112),imglist=images,path_root='') 35 | else: 36 | # TensorFlow model 37 | import tensorflow as tf 38 | frozen_graph = args.model 39 | with tf.gfile.GFile(frozen_graph, "rb") as f: 40 | graph_def = tf.GraphDef() 41 | graph_def.ParseFromString(f.read()) 42 | with tf.Graph().as_default() as graph: 43 | tf.import_graph_def(graph_def, 44 | input_map=None, 45 | return_elements=None, 46 | name="") 47 | image_input = graph.get_tensor_by_name('image_input:0') 48 | keep_prob = graph.get_tensor_by_name('keep_prob:0') 49 | is_train = graph.get_tensor_by_name('training_mode:0') 50 | embedding = graph.get_tensor_by_name('embedding:0') 51 | sess = tf.Session(graph=graph) 52 | inp_place = tf.placeholder(np.array(['1','2'],dtype='str').dtype) 53 | pipeline = tf.data.Dataset.from_tensor_slices(inp_place) 54 | def parse(filename): 55 | image_string = tf.read_file(filename) 56 | image = tf.image.decode_jpeg(image_string,dct_method="INTEGER_ACCURATE") 57 | image = tf.cast(image,tf.float32) 58 | image = (image - 127.5)*0.0078125 59 | image = tf.transpose(image,perm=[2,0,1]) 60 | return image 61 | pipeline = pipeline.map(parse,num_parallel_calls=4) 62 | pipeline = pipeline.batch(args.batch) 63 | pipiline = pipeline.prefetch(8) 64 | iterator = pipeline.make_initializable_iterator() 65 | next_element = iterator.get_next() 66 | sess.run(iterator.initializer,feed_dict={inp_place:images}) 67 | 68 | # Embeddings evaluation 69 | embs = np.zeros((len(images),512),dtype=np.float32) 70 | for i in tqdm(range(int(np.ceil(len(images)/args.batch)))): 71 | if args.mx: 72 | db = mx.io.DataBatch(data=iterator.next().data) 73 | model.forward(db, is_train=False) 74 | emb = model.get_outputs()[0].asnumpy() 75 | length = min(args.batch,len(images)-i*args.batch) 76 | embs[i*args.batch:i*args.batch+length] = emb[:length]/np.expand_dims(np.sqrt(np.sum(emb[:length]**2,1)),1) 77 | else: 78 | db = sess.run(next_element) 79 | embs[i*args.batch:min((i+1)*args.batch,len(images))] = sess.run(embedding,feed_dict=\ 80 | {image_input:db,keep_prob:1.0,is_train:False}) 81 | 82 | # Centroids preparation 83 | anchor = np.zeros((label,512),dtype=np.float32) 84 | labels = np.array(labels) 85 | for i in range(label): 86 | tmp = np.sum(embs[labels==i],axis=0) 87 | anchor[i] = tmp/np.sqrt(np.sum(tmp**2)) 88 | np.save(os.path.join(args.output_dir,'centroids'),anchor) 89 | names = open(os.path.join(args.output_dir,'centroids_names.txt'),'w') 90 | for i in sorted(os.listdir(args.input_dir)): 91 | names.write(i+'\n') 92 | names.close() 93 | 94 | 95 | def parse_arguments(argv): 96 | parser = argparse.ArgumentParser() 97 | 98 | parser.add_argument('input_dir', type=str, help='Directory with aligned images.') 99 | parser.add_argument('output_dir', type=str, help='Directory to save embeddings.') 100 | parser.add_argument('model',type=str, help='Path to the model.') 101 | parser.add_argument('--mx',action='store_true', help='Flag to use the original mxnet model.') 102 | parser.add_argument('--batch',type=int, help='Batch size.',default=30) 103 | return parser.parse_args(argv) 104 | 105 | if __name__ == '__main__': 106 | main(parse_arguments(sys.argv[1:])) 107 | -------------------------------------------------------------------------------- /Demo/demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import numpy as np 4 | import cv2 5 | import tensorflow as tf 6 | from align import detect_face 7 | from skimage import transform as trans 8 | from skimage.io import imsave 9 | import os 10 | import datetime 11 | 12 | # Align face as ArcFace template 13 | def preprocess(img, landmark): 14 | image_size = [112,112] 15 | src = np.array([ 16 | [38.2946, 51.6963], 17 | [73.5318, 51.5014], 18 | [56.0252, 71.7366], 19 | [41.5493, 92.3655], 20 | [70.7299, 92.2041] ], dtype=np.float32) 21 | dst = landmark.astype(np.float32) 22 | tform = trans.SimilarityTransform() 23 | tform.estimate(dst, src) 24 | M = tform.params[0:2,:] 25 | 26 | warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) 27 | return warped 28 | 29 | def main(args): 30 | 31 | # Models download 32 | frozen_graph = args.model 33 | with tf.gfile.GFile(frozen_graph, "rb") as f: 34 | graph_def = tf.GraphDef() 35 | graph_def.ParseFromString(f.read()) 36 | with tf.Graph().as_default() as graph: 37 | tf.import_graph_def(graph_def, 38 | input_map=None, 39 | return_elements=None, 40 | name="") 41 | image_input = graph.get_tensor_by_name('image_input:0') 42 | keep_prob = graph.get_tensor_by_name('keep_prob:0') 43 | is_train = graph.get_tensor_by_name('training_mode:0') 44 | embedding = graph.get_tensor_by_name('embedding:0') 45 | 46 | minsize = 100 47 | threshold = [ 0.6, 0.7, 0.7 ] 48 | factor = 0.709 49 | sess = tf.Session(graph=graph) 50 | pnet, rnet, onet = detect_face.create_mtcnn(sess, None) 51 | 52 | # Centroids download 53 | anchor = np.load(os.path.join(args.centroids,'centroids.npy')) 54 | names = open(os.path.join(args.centroids,'centroids_names.txt')).read().split('\n')[:-1] 55 | 56 | IDcolor = [255., 255., 255.] 57 | IDcolor2 = [255., 0., 0.] 58 | 59 | video_capture = cv2.VideoCapture(0) 60 | video_capture.set(3, 1280) 61 | video_capture.set(4, 1024) 62 | 63 | while(True): 64 | 65 | # Start of video sequence processing 66 | ret, frame = video_capture.read() 67 | frame = cv2.flip(frame[:,:,::-1], 1) 68 | if not ret: 69 | print('Cannot access the webcam') 70 | break 71 | 72 | key = cv2.waitKey(1) 73 | if key == ord('q'): 74 | break 75 | if key == ord('s'): 76 | imsave('Demo-'+str(datetime.datetime.now())+'.jpg',frame) 77 | 78 | # Search and preparation of all faces on the frame 79 | bounding_boxes, points = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) 80 | 81 | batch = np.zeros((bounding_boxes.shape[0],3,112,112),dtype=np.float32) 82 | for i in range(bounding_boxes.shape[0]): 83 | landmark = points[:,i].reshape((2,5)).T 84 | warped = preprocess(frame, landmark = landmark) 85 | warped = np.transpose(warped,[2,0,1]).reshape((1,3,112,112)) 86 | batch[i] = (warped-127.5)*0.0078125 87 | 88 | # Recognition of all faces 89 | if batch.shape[0]!=0: 90 | embs = sess.run(embedding,feed_dict={image_input:batch,keep_prob:1.0,is_train:False}) 91 | for i in range(bounding_boxes.shape[0]): 92 | probabilities = np.dot(anchor,embs[i]) 93 | val = np.max(probabilities) 94 | pos = np.argmax(probabilities) 95 | 96 | pt1 = (int(bounding_boxes[i][0]), int(bounding_boxes[i][1])) 97 | pt2 = (int(bounding_boxes[i][2]), int(bounding_boxes[i][3])) 98 | 99 | cv2.rectangle(frame, pt1, pt2, IDcolor) 100 | 101 | cv2.putText(frame, 'Top-1 class: '+names[pos], 102 | (int(bounding_boxes[i][0]), int(bounding_boxes[i][1])-5), 103 | cv2.FONT_HERSHEY_SIMPLEX, 1., IDcolor, 3) 104 | cv2.putText(frame, 'Sim. to top-1 class: '+str(round(val,4)), 105 | (int(bounding_boxes[i][0]), int(bounding_boxes[i][3])+30), 106 | cv2.FONT_HERSHEY_SIMPLEX, 1., IDcolor, 3) 107 | 108 | cv2.imshow('Camera ("q" to quit, "s" to save frame)', frame[:,:,::-1]) 109 | 110 | 111 | video_capture.release() 112 | cv2.destroyAllWindows() 113 | 114 | 115 | 116 | def parse_arguments(argv): 117 | parser = argparse.ArgumentParser() 118 | 119 | parser.add_argument('model',type=str, help='Path to the model.') 120 | parser.add_argument('centroids',type=str, help='Dir with centoids of classes for classifier.') 121 | return parser.parse_args(argv) 122 | 123 | if __name__ == '__main__': 124 | main(parse_arguments(sys.argv[1:])) 125 | -------------------------------------------------------------------------------- /Attack/stn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def spatial_transformer_network(input_fmap, theta, out_dims=None, **kwargs): 5 | """ 6 | Spatial Transformer Network layer implementation as described in [1]. 7 | 8 | The layer is composed of 3 elements: 9 | 10 | - localization_net: takes the original image as input and outputs 11 | the parameters of the affine transformation that should be applied 12 | to the input image. 13 | 14 | - affine_grid_generator: generates a grid of (x,y) coordinates that 15 | correspond to a set of points where the input should be sampled 16 | to produce the transformed output. 17 | 18 | - bilinear_sampler: takes as input the original image and the grid 19 | and produces the output image using bilinear interpolation. 20 | 21 | Input 22 | ----- 23 | - input_fmap: output of the previous layer. Can be input if spatial 24 | transformer layer is at the beginning of architecture. Should be 25 | a tensor of shape (B, H, W, C). 26 | 27 | - theta: affine transform tensor of shape (B, 6). Permits cropping, 28 | translation and isotropic scaling. Initialize to identity matrix. 29 | It is the output of the localization network. 30 | 31 | Returns 32 | ------- 33 | - out_fmap: transformed input feature map. Tensor of size (B, H, W, C). 34 | 35 | Notes 36 | ----- 37 | [1]: 'Spatial Transformer Networks', Jaderberg et. al, 38 | (https://arxiv.org/abs/1506.02025) 39 | 40 | """ 41 | # grab input dimensions 42 | B = tf.shape(input_fmap)[0] 43 | H = tf.shape(input_fmap)[1] 44 | W = tf.shape(input_fmap)[2] 45 | 46 | # reshape theta to (B, 2, 3) 47 | theta = tf.reshape(theta, [B, 2, 3]) 48 | 49 | # generate grids of same size or upsample/downsample if specified 50 | if out_dims: 51 | out_H = out_dims[0] 52 | out_W = out_dims[1] 53 | batch_grids = affine_grid_generator(out_H, out_W, theta) 54 | else: 55 | batch_grids = affine_grid_generator(H, W, theta) 56 | 57 | x_s = batch_grids[:, 0, :, :] 58 | y_s = batch_grids[:, 1, :, :] 59 | 60 | # sample input with grid to get output 61 | out_fmap = bilinear_sampler(input_fmap, x_s, y_s) 62 | 63 | return out_fmap 64 | 65 | 66 | def get_pixel_value(img, x, y): 67 | """ 68 | Utility function to get pixel value for coordinate 69 | vectors x and y from a 4D tensor image. 70 | 71 | Input 72 | ----- 73 | - img: tensor of shape (B, H, W, C) 74 | - x: flattened tensor of shape (B*H*W,) 75 | - y: flattened tensor of shape (B*H*W,) 76 | 77 | Returns 78 | ------- 79 | - output: tensor of shape (B, H, W, C) 80 | """ 81 | shape = tf.shape(x) 82 | batch_size = shape[0] 83 | height = shape[1] 84 | width = shape[2] 85 | 86 | batch_idx = tf.range(0, batch_size) 87 | batch_idx = tf.reshape(batch_idx, (batch_size, 1, 1)) 88 | b = tf.tile(batch_idx, (1, height, width)) 89 | 90 | indices = tf.stack([b, y, x], 3) 91 | 92 | return tf.gather_nd(img, indices) 93 | 94 | 95 | def affine_grid_generator(height, width, theta): 96 | """ 97 | This function returns a sampling grid, which when 98 | used with the bilinear sampler on the input feature 99 | map, will create an output feature map that is an 100 | affine transformation [1] of the input feature map. 101 | 102 | Input 103 | ----- 104 | - height: desired height of grid/output. Used 105 | to downsample or upsample. 106 | 107 | - width: desired width of grid/output. Used 108 | to downsample or upsample. 109 | 110 | - theta: affine transform matrices of shape (num_batch, 2, 3). 111 | For each image in the batch, we have 6 theta parameters of 112 | the form (2x3) that define the affine transformation T. 113 | 114 | Returns 115 | ------- 116 | - normalized grid (-1, 1) of shape (num_batch, 2, H, W). 117 | The 2nd dimension has 2 components: (x, y) which are the 118 | sampling points of the original image for each point in the 119 | target image. 120 | 121 | Note 122 | ---- 123 | [1]: the affine transformation allows cropping, translation, 124 | and isotropic scaling. 125 | """ 126 | num_batch = tf.shape(theta)[0] 127 | 128 | # create normalized 2D grid 129 | x = tf.linspace(-1.0, 1.0, width) 130 | y = tf.linspace(-1.0, 1.0, height) 131 | x_t, y_t = tf.meshgrid(x, y) 132 | 133 | # flatten 134 | x_t_flat = tf.reshape(x_t, [-1]) 135 | y_t_flat = tf.reshape(y_t, [-1]) 136 | 137 | # reshape to [x_t, y_t , 1] - (homogeneous form) 138 | ones = tf.ones_like(x_t_flat) 139 | sampling_grid = tf.stack([x_t_flat, y_t_flat, ones]) 140 | 141 | # repeat grid num_batch times 142 | sampling_grid = tf.expand_dims(sampling_grid, axis=0) 143 | sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1])) 144 | 145 | # cast to float32 (required for matmul) 146 | theta = tf.cast(theta, 'float32') 147 | sampling_grid = tf.cast(sampling_grid, 'float32') 148 | 149 | # transform the sampling grid - batch multiply 150 | batch_grids = tf.matmul(theta, sampling_grid) 151 | # batch grid has shape (num_batch, 2, H*W) 152 | 153 | # reshape to (num_batch, H, W, 2) 154 | batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width]) 155 | 156 | return batch_grids 157 | 158 | 159 | def bilinear_sampler(img, x, y): 160 | """ 161 | Performs bilinear sampling of the input images according to the 162 | normalized coordinates provided by the sampling grid. Note that 163 | the sampling is done identically for each channel of the input. 164 | 165 | To test if the function works properly, output image should be 166 | identical to input image when theta is initialized to identity 167 | transform. 168 | 169 | Input 170 | ----- 171 | - img: batch of images in (B, H, W, C) layout. 172 | - grid: x, y which is the output of affine_grid_generator. 173 | 174 | Returns 175 | ------- 176 | - out: interpolated images according to grids. Same size as grid. 177 | """ 178 | H = tf.shape(img)[1] 179 | W = tf.shape(img)[2] 180 | max_y = tf.cast(H - 1, 'int32') 181 | max_x = tf.cast(W - 1, 'int32') 182 | zero = tf.zeros([], dtype='int32') 183 | 184 | # rescale x and y to [0, W-1/H-1] 185 | x = tf.cast(x, 'float32') 186 | y = tf.cast(y, 'float32') 187 | x = 0.5 * ((x + 1.0) * tf.cast(max_x-1, 'float32')) 188 | y = 0.5 * ((y + 1.0) * tf.cast(max_y-1, 'float32')) 189 | 190 | # grab 4 nearest corner points for each (x_i, y_i) 191 | x0 = tf.cast(tf.floor(x), 'int32') 192 | x1 = x0 + 1 193 | y0 = tf.cast(tf.floor(y), 'int32') 194 | y1 = y0 + 1 195 | 196 | # clip to range [0, H-1/W-1] to not violate img boundaries 197 | x0 = tf.clip_by_value(x0, zero, max_x) 198 | x1 = tf.clip_by_value(x1, zero, max_x) 199 | y0 = tf.clip_by_value(y0, zero, max_y) 200 | y1 = tf.clip_by_value(y1, zero, max_y) 201 | 202 | # get pixel value at corner coords 203 | Ia = get_pixel_value(img, x0, y0) 204 | Ib = get_pixel_value(img, x0, y1) 205 | Ic = get_pixel_value(img, x1, y0) 206 | Id = get_pixel_value(img, x1, y1) 207 | 208 | # recast as float for delta calculation 209 | x0 = tf.cast(x0, 'float32') 210 | x1 = tf.cast(x1, 'float32') 211 | y0 = tf.cast(y0, 'float32') 212 | y1 = tf.cast(y1, 'float32') 213 | 214 | # calculate deltas 215 | wa = (x1-x) * (y1-y) 216 | wb = (x1-x) * (y-y0) 217 | wc = (x-x0) * (y1-y) 218 | wd = (x-x0) * (y-y0) 219 | 220 | # add dimension for addition 221 | wa = tf.expand_dims(wa, axis=3) 222 | wb = tf.expand_dims(wb, axis=3) 223 | wc = tf.expand_dims(wc, axis=3) 224 | wd = tf.expand_dims(wd, axis=3) 225 | 226 | # compute output 227 | out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) 228 | 229 | return out 230 | -------------------------------------------------------------------------------- /Attack/attack.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import os 4 | import tensorflow as tf 5 | import numpy as np 6 | import skimage.io as io 7 | from skimage.transform import rescale 8 | from tqdm import tqdm 9 | from stn import spatial_transformer_network as stn 10 | from utils import TVloss, projector 11 | from sklearn.linear_model import LinearRegression as LR 12 | from time import time 13 | import datetime 14 | import matplotlib.pyplot as plt 15 | 16 | # Prepare image to network input format 17 | def prep(im): 18 | if len(im.shape)==3: 19 | return np.transpose(im,[2,0,1]).reshape((1,3,112,112))*2-1 20 | elif len(im.shape)==4: 21 | return np.transpose(im,[0,3,1,2]).reshape((im.shape[0],3,112,112))*2-1 22 | 23 | def main(args): 24 | print(args) 25 | now = str(datetime.datetime.now()) 26 | 27 | sess = tf.Session() 28 | 29 | # Off-plane sticker projection 30 | logo = tf.placeholder(tf.float32,shape=[None,400,900,3],name='logo_input') 31 | param = tf.placeholder(tf.float32,shape=[None,1],name='param_input') 32 | ph = tf.placeholder(tf.float32,shape=[None,1],name='ph_input') 33 | result = projector(param,ph,logo) 34 | 35 | # Union of the sticker and face image 36 | mask_input = tf.placeholder(tf.float32,shape=[None,900,900,3],name='mask_input') 37 | face_input = tf.placeholder(tf.float32,shape=[None,600,600,3],name='face_input') 38 | theta = tf.placeholder(tf.float32,shape=[None,6],name='theta_input') 39 | prepared = stn(result,theta) 40 | 41 | # Transformation to ArcFace template 42 | theta2 = tf.placeholder(tf.float32,shape=[None,6],name='theta2_input') 43 | united = prepared[:,300:,150:750]*mask_input[:,300:,150:750]+\ 44 | face_input*(1-mask_input[:,300:,150:750]) 45 | final_crop = tf.clip_by_value(stn(united,theta2,(112,112)),0.,1.) 46 | 47 | # TV loss and gradients 48 | w_tv = tf.placeholder(tf.float32,name='w_tv_input') 49 | tv_loss = TVloss(logo,w_tv) 50 | 51 | grads_tv = tf.gradients(tv_loss,logo) 52 | grads_input = tf.placeholder(tf.float32,shape=[None,112,112,3],name='grads_input') 53 | grads1 = tf.gradients(final_crop,logo,grad_ys=grads_input) 54 | 55 | # Varios images generator 56 | class Imgen(object): 57 | def __init__(self): 58 | self.fdict = {ph:[[args.ph]],\ 59 | logo:np.ones((1,400,900,3)),\ 60 | param:[[args.param]],\ 61 | theta:1./args.scale*np.array([[1.,0.,-args.x/450.,0.,1.,-args.y/450.]]),\ 62 | theta2:[[1.,0.,0.,0.,1.,0.]],\ 63 | w_tv:args.w_tv} 64 | mask = sess.run(prepared,feed_dict=self.fdict) 65 | self.fdict[mask_input] = mask 66 | 67 | def gen_fixed(self,im,advhat): 68 | self.fdict[face_input] = np.expand_dims(im,0) 69 | self.fdict[logo] = np.expand_dims(advhat,0) 70 | return self.fdict, sess.run(final_crop,feed_dict=self.fdict) 71 | 72 | def gen_random(self,im,advhat,batch=args.batch_size): 73 | alpha1 = np.random.uniform(-1.,1.,size=(batch,1))/180.*np.pi 74 | scale1 = np.random.uniform(args.scale-0.02,args.scale+0.02,size=(batch,1)) 75 | y1 = np.random.uniform(args.y-600./112.,args.y+600./112.,size=(batch,1)) 76 | x1 = np.random.uniform(args.x-600./112.,args.x+600./112.,size=(batch,1)) 77 | alpha2 = np.random.uniform(-1.,1.,size=(batch,1))/180.*np.pi 78 | scale2 = np.random.uniform(1./1.04,1.04,size=(batch,1)) 79 | y2 = np.random.uniform(-1.,1.,size=(batch,1))/66. 80 | angle = np.random.uniform(args.ph-2.,args.ph+2.,size=(batch,1)) 81 | parab = np.random.uniform(args.param-0.0002,args.param+0.0002,size=(batch,1)) 82 | fdict = {ph:angle,param:parab,w_tv:args.w_tv,\ 83 | theta:1./scale1*np.hstack([np.cos(alpha1),np.sin(alpha1),-x1/450.,\ 84 | -np.sin(alpha1),np.cos(alpha1),-y1/450.]),\ 85 | theta2:scale2*np.hstack([np.cos(alpha2),np.sin(alpha2),np.zeros((batch,1)),\ 86 | -np.sin(alpha2),np.cos(alpha2),y2]),\ 87 | logo:np.ones((batch,400,900,3)),\ 88 | face_input:np.tile(np.expand_dims(im,0),[batch,1,1,1])} 89 | mask = sess.run(prepared,feed_dict=fdict) 90 | fdict[mask_input] = mask 91 | fdict[logo] = np.tile(np.expand_dims(advhat,0),[batch,1,1,1]) 92 | return fdict, sess.run(final_crop,feed_dict=fdict) 93 | 94 | gener = Imgen() 95 | 96 | # Initialization of the sticker 97 | init_logo = np.ones((400,900,3))*127./255. 98 | if args.init_face!=None: 99 | init_face = io.imread(args.init_face)/255. 100 | init_loss = tv_loss+tf.reduce_sum(tf.abs(init_face-united[0])) 101 | init_grads = tf.gradients(init_loss,logo) 102 | init_logo = np.ones((400,900,3))*127./255. 103 | fdict, _ = gener.gen_fixed(init_face,init_logo) 104 | moments = np.zeros((400,900,3)) 105 | print('Initialization from face, step 1/2') 106 | for i in tqdm(range(500)): 107 | fdict[logo] = np.expand_dims(init_logo,0) 108 | grads = moments*0.9+sess.run(init_grads,feed_dict=fdict)[0][0] 109 | moments = moments*0.9 + grads*0.1 110 | init_logo = np.clip(init_logo-1./51.*np.sign(grads),0.,1.) 111 | print('Initialization from face, step 2/2') 112 | for i in tqdm(range(500)): 113 | fdict[logo] = np.expand_dims(init_logo,0) 114 | grads = moments*0.9+sess.run(init_grads,feed_dict=fdict)[0][0] 115 | moments = moments*0.9 + grads*0.1 116 | init_logo = np.clip(init_logo-1./255.*np.sign(grads),0.,1.) 117 | io.imsave(now+'_init_logo.png',init_logo) 118 | elif args.init_logo!=None: 119 | init_logo[:] = io.imread(args.init_logo)/255. 120 | 121 | 122 | # Embedding model 123 | with tf.gfile.GFile(args.model, "rb") as f: 124 | graph_def = tf.GraphDef() 125 | graph_def.ParseFromString(f.read()) 126 | tf.import_graph_def(graph_def, 127 | input_map=None, 128 | return_elements=None, 129 | name="") 130 | image_input = tf.get_default_graph().get_tensor_by_name('image_input:0') 131 | keep_prob = tf.get_default_graph().get_tensor_by_name('keep_prob:0') 132 | is_train = tf.get_default_graph().get_tensor_by_name('training_mode:0') 133 | embedding = tf.get_default_graph().get_tensor_by_name('embedding:0') 134 | 135 | orig_emb = tf.placeholder(tf.float32,shape=[None,512],name='orig_emb_input') 136 | cos_loss = tf.reduce_sum(tf.multiply(embedding,orig_emb),axis=1) 137 | grads2 = tf.gradients(cos_loss,image_input) 138 | 139 | fdict2 = {keep_prob:1.0,is_train:False} 140 | 141 | # Anchor embedding calculation 142 | if args.anchor_face!=None: 143 | anch_im = rescale(io.imread(args.anchor_face)/255.,112./600.,order=5) 144 | fdict2[image_input] = prep(anch_im) 145 | fdict2[orig_emb] = sess.run(embedding,feed_dict=fdict2) 146 | elif args.anchor_emb!=None: 147 | fdict2[orig_emb] = np.load(args.anchor_emb)[-1:] 148 | else: 149 | anch_im = rescale(io.imread(args.image)/255.,112./600.,order=5) 150 | fdict2[image_input] = prep(anch_im) 151 | fdict2[orig_emb] = sess.run(embedding,feed_dict=fdict2) 152 | 153 | # Attack constants 154 | im0 = io.imread(args.image)/255. 155 | regr = LR(n_jobs=4) 156 | regr_len = 100 157 | regr_coef = -1. 158 | moments = np.zeros((400,900,3)) 159 | moment_val = 0.9 160 | step_val = 1./51. 161 | stage = 1 162 | step = 0 163 | lr_thresh = 100 164 | ls = [] 165 | t = time() 166 | while True: 167 | # Projecting sticker to the face and feeding it to the embedding model 168 | fdict,ims = gener.gen_random(im0,init_logo) 169 | fdict2[image_input] = prep(ims) 170 | grad_tmp = sess.run(grads2,feed_dict=fdict2) 171 | 172 | fdict_val, im_val = gener.gen_fixed(im0,init_logo) 173 | fdict2[image_input] = prep(im_val) 174 | ls.append(sess.run(cos_loss,feed_dict=fdict2)[0]) 175 | 176 | # Gradients to the original sticker image 177 | fdict[grads_input] = np.transpose(grad_tmp[0],[0,2,3,1]) 178 | grads_on_logo = np.mean(sess.run(grads1,feed_dict=fdict)[0],0) 179 | grads_on_logo += sess.run(grads_tv,feed_dict=fdict)[0][0] 180 | moments = moments*moment_val + grads_on_logo*(1.-moment_val) 181 | init_logo -= step_val*np.sign(moments) 182 | init_logo = np.clip(init_logo,0.,1.) 183 | 184 | # Logging 185 | step += 1 186 | if step%20==0: 187 | print('Stage:',stage,'Step:',step,'Av. time:',round((time()-t)/step,2),'Loss:',round(ls[-1],2),'Coef:',regr_coef) 188 | 189 | # Switching to the second stage 190 | if step>lr_thresh: 191 | regr.fit(np.expand_dims(np.arange(100),1),np.hstack(ls[-100:])) 192 | regr_coef = regr.coef_[0] 193 | if regr_coef>=0: 194 | if stage==1: 195 | stage = 2 196 | moment_val = 0.995 197 | step_val = 1./255. 198 | step = 0 199 | regr_coef = -1. 200 | lr_thresh = 200 201 | t = time() 202 | else: 203 | break 204 | 205 | plt.plot(range(len(ls)),ls) 206 | plt.savefig(now+'_cosine.png') 207 | io.imsave(now+'_advhat.png',init_logo) 208 | 209 | def parse_arguments(argv): 210 | parser = argparse.ArgumentParser() 211 | 212 | parser.add_argument('image', type=str, help='Path to the image for attack.') 213 | parser.add_argument('model', type=str, help='Path to the model for attack.') 214 | parser.add_argument('--init_face', type=str, default=None, help='Path to the face for sticker inititalization.') 215 | parser.add_argument('--init_logo', type=str, default=None, help='Path to the image for inititalization.') 216 | parser.add_argument('--anchor_face', type=str, default=None, help='Path to the anchor face.') 217 | parser.add_argument('--anchor_emb', type=str, default=None, help='Path to the anchor emb (the last will be used)') 218 | parser.add_argument('--w_tv', type=float, default=1e-4, help='Weight of the TV loss') 219 | parser.add_argument('--ph', type=float, default=17., help='Angle of the off-plane rotation') 220 | parser.add_argument('--param', type=float, default=0.0013, help='Parabola rate for the off-plane parabolic transformation') 221 | parser.add_argument('--scale', type=float, default=0.465, help='Scaling parameter for the sticker') 222 | parser.add_argument('--x', type=float, default=0., help='Translation of the sticker along x-axis') 223 | parser.add_argument('--y', type=float, default=-15., help='Translation of the sticker along y-axis') 224 | parser.add_argument('--batch_size', type=int, default=20, help='Batch size for attack') 225 | 226 | return parser.parse_args(argv) 227 | 228 | if __name__ == '__main__': 229 | main(parse_arguments(sys.argv[1:])) 230 | -------------------------------------------------------------------------------- /Demo/1000_from_CASIA/centroids_names.txt: -------------------------------------------------------------------------------- 1 | 0000045 2 | 0000099 3 | 0000100 4 | 0000102 5 | 0000103 6 | 0000105 7 | 0000107 8 | 0000108 9 | 0000114 10 | 0000117 11 | 0000119 12 | 0000121 13 | 0000133 14 | 0000137 15 | 0000141 16 | 0000143 17 | 0000144 18 | 0000145 19 | 0000147 20 | 0000156 21 | 0000157 22 | 0000159 23 | 0000166 24 | 0000168 25 | 0000169 26 | 0000170 27 | 0000174 28 | 0000177 29 | 0000183 30 | 0000185 31 | 0000186 32 | 0000188 33 | 0000189 34 | 0000192 35 | 0000195 36 | 0000198 37 | 0000202 38 | 0000204 39 | 0000205 40 | 0000207 41 | 0000208 42 | 0000210 43 | 0000211 44 | 0000212 45 | 0000214 46 | 0000220 47 | 0000225 48 | 0000233 49 | 0000238 50 | 0000240 51 | 0000247 52 | 0000249 53 | 0000254 54 | 0000256 55 | 0000260 56 | 0000262 57 | 0000263 58 | 0000268 59 | 0000270 60 | 0000271 61 | 0000272 62 | 0000275 63 | 0000280 64 | 0000281 65 | 0000282 66 | 0000284 67 | 0000286 68 | 0000287 69 | 0000293 70 | 0000295 71 | 0000296 72 | 0000297 73 | 0000299 74 | 0000301 75 | 0000302 76 | 0000304 77 | 0000307 78 | 0000310 79 | 0000317 80 | 0000318 81 | 0000319 82 | 0000321 83 | 0000324 84 | 0000327 85 | 0000331 86 | 0000332 87 | 0000333 88 | 0000334 89 | 0000335 90 | 0000342 91 | 0000343 92 | 0000344 93 | 0000346 94 | 0000349 95 | 0000350 96 | 0000351 97 | 0000352 98 | 0000353 99 | 0000356 100 | 0000357 101 | 0000360 102 | 0000362 103 | 0000363 104 | 0000364 105 | 0000365 106 | 0000368 107 | 0000373 108 | 0000374 109 | 0000381 110 | 0000383 111 | 0000385 112 | 0000386 113 | 0000387 114 | 0000388 115 | 0000389 116 | 0000391 117 | 0000394 118 | 0000396 119 | 0000397 120 | 0000399 121 | 0000402 122 | 0000405 123 | 0000408 124 | 0000410 125 | 0000411 126 | 0000413 127 | 0000415 128 | 0000420 129 | 0000422 130 | 0000426 131 | 0000427 132 | 0000430 133 | 0000431 134 | 0000433 135 | 0000434 136 | 0000436 137 | 0000437 138 | 0000438 139 | 0000439 140 | 0000442 141 | 0000444 142 | 0000446 143 | 0000447 144 | 0000448 145 | 0000451 146 | 0000452 147 | 0000455 148 | 0000457 149 | 0000459 150 | 0000460 151 | 0000461 152 | 0000462 153 | 0000463 154 | 0000464 155 | 0000465 156 | 0000467 157 | 0000471 158 | 0000473 159 | 0000477 160 | 0000480 161 | 0000481 162 | 0000482 163 | 0000483 164 | 0000484 165 | 0000486 166 | 0000487 167 | 0000492 168 | 0000494 169 | 0000495 170 | 0000498 171 | 0000499 172 | 0000500 173 | 0000505 174 | 0000506 175 | 0000510 176 | 0000512 177 | 0000513 178 | 0000514 179 | 0000515 180 | 0000520 181 | 0000521 182 | 0000524 183 | 0000525 184 | 0000526 185 | 0000529 186 | 0000530 187 | 0000531 188 | 0000532 189 | 0000533 190 | 0000534 191 | 0000535 192 | 0000538 193 | 0000539 194 | 0000541 195 | 0000545 196 | 0000546 197 | 0000547 198 | 0000550 199 | 0000551 200 | 0000552 201 | 0000554 202 | 0000555 203 | 0000562 204 | 0000563 205 | 0000568 206 | 0000570 207 | 0000571 208 | 0000574 209 | 0000575 210 | 0000579 211 | 0000580 212 | 0000582 213 | 0000583 214 | 0000585 215 | 0000588 216 | 0000589 217 | 0000592 218 | 0000593 219 | 0000595 220 | 0000596 221 | 0000597 222 | 0000598 223 | 0000599 224 | 0000600 225 | 0000601 226 | 0000605 227 | 0000606 228 | 0000607 229 | 0000609 230 | 0000610 231 | 0000611 232 | 0000612 233 | 0000613 234 | 0000614 235 | 0000615 236 | 0000616 237 | 0000617 238 | 0000619 239 | 0000620 240 | 0000622 241 | 0000623 242 | 0000624 243 | 0000625 244 | 0000628 245 | 0000630 246 | 0000633 247 | 0000637 248 | 0000642 249 | 0000646 250 | 0000648 251 | 0000651 252 | 0000652 253 | 0000653 254 | 0000655 255 | 0000656 256 | 0000657 257 | 0000662 258 | 0000663 259 | 0000664 260 | 0000665 261 | 0000667 262 | 0000670 263 | 0000672 264 | 0000675 265 | 0000678 266 | 0000679 267 | 0000680 268 | 0000688 269 | 0000689 270 | 0000690 271 | 0000691 272 | 0000693 273 | 0000694 274 | 0000695 275 | 0000696 276 | 0000700 277 | 0000703 278 | 0000705 279 | 0000707 280 | 0000708 281 | 0000709 282 | 0000711 283 | 0000717 284 | 0000725 285 | 0000729 286 | 0000745 287 | 0000749 288 | 0000751 289 | 0000756 290 | 0000759 291 | 0000760 292 | 0000775 293 | 0000776 294 | 0000777 295 | 0000782 296 | 0000792 297 | 0000793 298 | 0000796 299 | 0000800 300 | 0000801 301 | 0000803 302 | 0000809 303 | 0000815 304 | 0000816 305 | 0000830 306 | 0000836 307 | 0000837 308 | 0000838 309 | 0000867 310 | 0000868 311 | 0000871 312 | 0000874 313 | 0000876 314 | 0000880 315 | 0000881 316 | 0000884 317 | 0000889 318 | 0000892 319 | 0000893 320 | 0000902 321 | 0000903 322 | 0000915 323 | 0000916 324 | 0000928 325 | 0000929 326 | 0000933 327 | 0000934 328 | 0000943 329 | 0000944 330 | 0000948 331 | 0000950 332 | 0000952 333 | 0000954 334 | 0000956 335 | 0000959 336 | 0000960 337 | 0000961 338 | 0000962 339 | 0000965 340 | 0000968 341 | 0000977 342 | 0000980 343 | 0000981 344 | 0000982 345 | 0000985 346 | 0000986 347 | 0000991 348 | 0000996 349 | 0000997 350 | 0000998 351 | 0001002 352 | 0001004 353 | 0001005 354 | 0001006 355 | 0001015 356 | 0001018 357 | 0001019 358 | 0001022 359 | 0001026 360 | 0001029 361 | 0001035 362 | 0001037 363 | 0001038 364 | 0001039 365 | 0001040 366 | 0001043 367 | 0001044 368 | 0001046 369 | 0001048 370 | 0001049 371 | 0001053 372 | 0001054 373 | 0001057 374 | 0001061 375 | 0001062 376 | 0001063 377 | 0001064 378 | 0001065 379 | 0001068 380 | 0001069 381 | 0001071 382 | 0001075 383 | 0001081 384 | 0001082 385 | 0001083 386 | 0001084 387 | 0001086 388 | 0001089 389 | 0001092 390 | 0001096 391 | 0001097 392 | 0001099 393 | 0001101 394 | 0001103 395 | 0001104 396 | 0001107 397 | 0001108 398 | 0001110 399 | 0001111 400 | 0001114 401 | 0001116 402 | 0001117 403 | 0001118 404 | 0001126 405 | 0001127 406 | 0001129 407 | 0001131 408 | 0001138 409 | 0001139 410 | 0001143 411 | 0001146 412 | 0001147 413 | 0001151 414 | 0001152 415 | 0001153 416 | 0001154 417 | 0001155 418 | 0001162 419 | 0001165 420 | 0001166 421 | 0001168 422 | 0001170 423 | 0001172 424 | 0001176 425 | 0001180 426 | 0001183 427 | 0001187 428 | 0001194 429 | 0001199 430 | 0001200 431 | 0001201 432 | 0001208 433 | 0001209 434 | 0001210 435 | 0001217 436 | 0001218 437 | 0001223 438 | 0001231 439 | 0001233 440 | 0001235 441 | 0001240 442 | 0001242 443 | 0001250 444 | 0001251 445 | 0001261 446 | 0001263 447 | 0001265 448 | 0001266 449 | 0001267 450 | 0001272 451 | 0001274 452 | 0001277 453 | 0001281 454 | 0001282 455 | 0001286 456 | 0001287 457 | 0001290 458 | 0001292 459 | 0001293 460 | 0001295 461 | 0001298 462 | 0001302 463 | 0001305 464 | 0001307 465 | 0001309 466 | 0001312 467 | 0001314 468 | 0001315 469 | 0001317 470 | 0001319 471 | 0001323 472 | 0001324 473 | 0001325 474 | 0001326 475 | 0001332 476 | 0001334 477 | 0001337 478 | 0001339 479 | 0001344 480 | 0001346 481 | 0001347 482 | 0001348 483 | 0001353 484 | 0001356 485 | 0001364 486 | 0001365 487 | 0001367 488 | 0001368 489 | 0001370 490 | 0001373 491 | 0001377 492 | 0001378 493 | 0001384 494 | 0001387 495 | 0001388 496 | 0001389 497 | 0001390 498 | 0001392 499 | 0001393 500 | 0001398 501 | 0001399 502 | 0001400 503 | 0001403 504 | 0001404 505 | 0001406 506 | 0001407 507 | 0001409 508 | 0001410 509 | 0001412 510 | 0001416 511 | 0001418 512 | 0001424 513 | 0001431 514 | 0001435 515 | 0001436 516 | 0001438 517 | 0001440 518 | 0001441 519 | 0001442 520 | 0001444 521 | 0001457 522 | 0001459 523 | 0001461 524 | 0001462 525 | 0001467 526 | 0001468 527 | 0001469 528 | 0001472 529 | 0001480 530 | 0001484 531 | 0001488 532 | 0001491 533 | 0001492 534 | 0001493 535 | 0001494 536 | 0001496 537 | 0001497 538 | 0001498 539 | 0001499 540 | 0001504 541 | 0001507 542 | 0001508 543 | 0001515 544 | 0001517 545 | 0001518 546 | 0001519 547 | 0001520 548 | 0001521 549 | 0001524 550 | 0001525 551 | 0001527 552 | 0001529 553 | 0001530 554 | 0001533 555 | 0001538 556 | 0001540 557 | 0001541 558 | 0001542 559 | 0001548 560 | 0001550 561 | 0001554 562 | 0001556 563 | 0001557 564 | 0001558 565 | 0001562 566 | 0001564 567 | 0001565 568 | 0001567 569 | 0001568 570 | 0001569 571 | 0001572 572 | 0001573 573 | 0001574 574 | 0001575 575 | 0001578 576 | 0001582 577 | 0001589 578 | 0001590 579 | 0001593 580 | 0001594 581 | 0001595 582 | 0001597 583 | 0001598 584 | 0001599 585 | 0001601 586 | 0001602 587 | 0001604 588 | 0001605 589 | 0001607 590 | 0001608 591 | 0001610 592 | 0001614 593 | 0001617 594 | 0001621 595 | 0001622 596 | 0001624 597 | 0001629 598 | 0001630 599 | 0001631 600 | 0001632 601 | 0001633 602 | 0001634 603 | 0001638 604 | 0001640 605 | 0001641 606 | 0001642 607 | 0001650 608 | 0001652 609 | 0001653 610 | 0001661 611 | 0001662 612 | 0001664 613 | 0001667 614 | 0001668 615 | 0001669 616 | 0001670 617 | 0001675 618 | 0001684 619 | 0001688 620 | 0001694 621 | 0001696 622 | 0001697 623 | 0001698 624 | 0001701 625 | 0001704 626 | 0001709 627 | 0001710 628 | 0001711 629 | 0001713 630 | 0001714 631 | 0001716 632 | 0001720 633 | 0001722 634 | 0001723 635 | 0001728 636 | 0001729 637 | 0001731 638 | 0001732 639 | 0001733 640 | 0001735 641 | 0001736 642 | 0001738 643 | 0001741 644 | 0001742 645 | 0001743 646 | 0001746 647 | 0001748 648 | 0001752 649 | 0001756 650 | 0001758 651 | 0001759 652 | 0001760 653 | 0001762 654 | 0001763 655 | 0001764 656 | 0001767 657 | 0001770 658 | 0001773 659 | 0001774 660 | 0001776 661 | 0001778 662 | 0001780 663 | 0001783 664 | 0001785 665 | 0001787 666 | 0001790 667 | 0001793 668 | 0001794 669 | 0001795 670 | 0001796 671 | 0001797 672 | 0001802 673 | 0001803 674 | 0001804 675 | 0001806 676 | 0001808 677 | 0001810 678 | 0001817 679 | 0001821 680 | 0001823 681 | 0001824 682 | 0001828 683 | 0001832 684 | 0001833 685 | 0001834 686 | 0001835 687 | 0001836 688 | 0001837 689 | 0001838 690 | 0001839 691 | 0001840 692 | 0001841 693 | 0001844 694 | 0001845 695 | 0001848 696 | 0001851 697 | 0001852 698 | 0001857 699 | 0001858 700 | 0001863 701 | 0001868 702 | 0001869 703 | 0001873 704 | 0001877 705 | 0001879 706 | 0001880 707 | 0001882 708 | 0001912 709 | 0001938 710 | 0001942 711 | 0001943 712 | 0001951 713 | 0001952 714 | 0001953 715 | 0001971 716 | 0001973 717 | 0001974 718 | 0001978 719 | 0001993 720 | 0002003 721 | 0002004 722 | 0002006 723 | 0002015 724 | 0002023 725 | 0002026 726 | 0002027 727 | 0002029 728 | 0002033 729 | 0002041 730 | 0002043 731 | 0002055 732 | 0002059 733 | 0002064 734 | 0002065 735 | 0002067 736 | 0002073 737 | 0002076 738 | 0002077 739 | 0002088 740 | 0002090 741 | 0002091 742 | 0002100 743 | 0002102 744 | 0002105 745 | 0002117 746 | 0002119 747 | 0002120 748 | 0002122 749 | 0002124 750 | 0002127 751 | 0002140 752 | 0002142 753 | 0002150 754 | 0002191 755 | 0002217 756 | 0002239 757 | 0002243 758 | 0002253 759 | 0002262 760 | 0002325 761 | 0002332 762 | 0002365 763 | 0002396 764 | 0002436 765 | 0002546 766 | 0002653 767 | 0002657 768 | 0002700 769 | 0002743 770 | 0002773 771 | 0002800 772 | 0002801 773 | 0002871 774 | 0002901 775 | 0002907 776 | 0002913 777 | 0002916 778 | 0002928 779 | 0002936 780 | 0002944 781 | 0002956 782 | 0003067 783 | 0003069 784 | 0003071 785 | 0003072 786 | 0003078 787 | 0003082 788 | 0003115 789 | 0003210 790 | 0003244 791 | 0003265 792 | 0003289 793 | 0003353 794 | 0003354 795 | 0003457 796 | 0003494 797 | 0003506 798 | 0003563 799 | 0003577 800 | 0003620 801 | 0003633 802 | 0003697 803 | 0003777 804 | 0003779 805 | 0003807 806 | 0003888 807 | 0003928 808 | 0003931 809 | 0003941 810 | 0003981 811 | 0004051 812 | 0004056 813 | 0004081 814 | 0004095 815 | 0004109 816 | 0004133 817 | 0004137 818 | 0004147 819 | 0004248 820 | 0004266 821 | 0004284 822 | 0004286 823 | 0004294 824 | 0004303 825 | 0004306 826 | 0004328 827 | 0004349 828 | 0004371 829 | 0004376 830 | 0004426 831 | 0004456 832 | 0004518 833 | 0004539 834 | 0004540 835 | 0004623 836 | 0004645 837 | 0004657 838 | 0004691 839 | 0004692 840 | 0004700 841 | 0004705 842 | 0004710 843 | 0004712 844 | 0004715 845 | 0004716 846 | 0004719 847 | 0004721 848 | 0004724 849 | 0004725 850 | 0004727 851 | 0004728 852 | 0004729 853 | 0004731 854 | 0004734 855 | 0004735 856 | 0004736 857 | 0004739 858 | 0004740 859 | 0004741 860 | 0004743 861 | 0004744 862 | 0004745 863 | 0004747 864 | 0004748 865 | 0004749 866 | 0004751 867 | 0004752 868 | 0004753 869 | 0004757 870 | 0004760 871 | 0004761 872 | 0004763 873 | 0004770 874 | 0004771 875 | 0004774 876 | 0004775 877 | 0004779 878 | 0004782 879 | 0004785 880 | 0004786 881 | 0004787 882 | 0004790 883 | 0004792 884 | 0004793 885 | 0004797 886 | 0004801 887 | 0004802 888 | 0004804 889 | 0004805 890 | 0004806 891 | 0004808 892 | 0004809 893 | 0004810 894 | 0004812 895 | 0004814 896 | 0004818 897 | 0004819 898 | 0004820 899 | 0004822 900 | 0004825 901 | 0004826 902 | 0004827 903 | 0004830 904 | 0004834 905 | 0004838 906 | 0004839 907 | 0004840 908 | 0004841 909 | 0004844 910 | 0004846 911 | 0004849 912 | 0004850 913 | 0004852 914 | 0004853 915 | 0004854 916 | 0004857 917 | 0004859 918 | 0004861 919 | 0004865 920 | 0004866 921 | 0004867 922 | 0004868 923 | 0004871 924 | 0004875 925 | 0004879 926 | 0004880 927 | 0004883 928 | 0004884 929 | 0004886 930 | 0004887 931 | 0004889 932 | 0004892 933 | 0004893 934 | 0004894 935 | 0004895 936 | 0004897 937 | 0004898 938 | 0004899 939 | 0004900 940 | 0004904 941 | 0004906 942 | 0004909 943 | 0004911 944 | 0004912 945 | 0004914 946 | 0004917 947 | 0004918 948 | 0004921 949 | 0004922 950 | 0004923 951 | 0004925 952 | 0004928 953 | 0004929 954 | 0004930 955 | 0004933 956 | 0004936 957 | 0004937 958 | 0004939 959 | 0004940 960 | 0004941 961 | 0004943 962 | 0004947 963 | 0004954 964 | 0004955 965 | 0004956 966 | 0004957 967 | 0004959 968 | 0004960 969 | 0004965 970 | 0004966 971 | 0004967 972 | 0004968 973 | 0004969 974 | 0004971 975 | 0004975 976 | 0004977 977 | 0004978 978 | 0004979 979 | 0004980 980 | 0004981 981 | 0004982 982 | 0004984 983 | 0004985 984 | 0004986 985 | 0004987 986 | 0004988 987 | 0004990 988 | 0004991 989 | 0004993 990 | 0004994 991 | 0004996 992 | 0004999 993 | 0005000 994 | 0005002 995 | 0005006 996 | 0005007 997 | 0005009 998 | 0005010 999 | 0005011 1000 | 0005012 1001 | -------------------------------------------------------------------------------- /Demo/align/detect_face.py: -------------------------------------------------------------------------------- 1 | """ Tensorflow implementation of the face detection / alignment algorithm found at 2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 3 | """ 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | from six import string_types, iteritems 30 | 31 | import numpy as np 32 | import tensorflow as tf 33 | #from math import floor 34 | import cv2 35 | import os 36 | 37 | def layer(op): 38 | '''Decorator for composable network layers.''' 39 | 40 | def layer_decorated(self, *args, **kwargs): 41 | # Automatically set a name if not provided. 42 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 43 | # Figure out the layer inputs. 44 | if len(self.terminals) == 0: 45 | raise RuntimeError('No input variables found for layer %s.' % name) 46 | elif len(self.terminals) == 1: 47 | layer_input = self.terminals[0] 48 | else: 49 | layer_input = list(self.terminals) 50 | # Perform the operation and get the output. 51 | layer_output = op(self, layer_input, *args, **kwargs) 52 | # Add to layer LUT. 53 | self.layers[name] = layer_output 54 | # This output is now the input for the next layer. 55 | self.feed(layer_output) 56 | # Return self for chained calls. 57 | return self 58 | 59 | return layer_decorated 60 | 61 | class Network(object): 62 | 63 | def __init__(self, inputs, trainable=True): 64 | # The input nodes for this network 65 | self.inputs = inputs 66 | # The current list of terminal nodes 67 | self.terminals = [] 68 | # Mapping from layer names to layers 69 | self.layers = dict(inputs) 70 | # If true, the resulting variables are set as trainable 71 | self.trainable = trainable 72 | 73 | self.setup() 74 | 75 | def setup(self): 76 | '''Construct the network. ''' 77 | raise NotImplementedError('Must be implemented by the subclass.') 78 | 79 | def load(self, data_path, session, ignore_missing=False): 80 | '''Load network weights. 81 | data_path: The path to the numpy-serialized network weights 82 | session: The current TensorFlow session 83 | ignore_missing: If true, serialized weights for missing layers are ignored. 84 | ''' 85 | data_dict = np.load(data_path, encoding='latin1', allow_pickle=True).item() #pylint: disable=no-member 86 | 87 | for op_name in data_dict: 88 | with tf.variable_scope(op_name, reuse=True): 89 | for param_name, data in iteritems(data_dict[op_name]): 90 | try: 91 | var = tf.get_variable(param_name) 92 | session.run(var.assign(data)) 93 | except ValueError: 94 | if not ignore_missing: 95 | raise 96 | 97 | def feed(self, *args): 98 | '''Set the input(s) for the next operation by replacing the terminal nodes. 99 | The arguments can be either layer names or the actual layers. 100 | ''' 101 | assert len(args) != 0 102 | self.terminals = [] 103 | for fed_layer in args: 104 | if isinstance(fed_layer, string_types): 105 | try: 106 | fed_layer = self.layers[fed_layer] 107 | except KeyError: 108 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 109 | self.terminals.append(fed_layer) 110 | return self 111 | 112 | def get_output(self): 113 | '''Returns the current network output.''' 114 | return self.terminals[-1] 115 | 116 | def get_unique_name(self, prefix): 117 | '''Returns an index-suffixed unique name for the given prefix. 118 | This is used for auto-generating layer names based on the type-prefix. 119 | ''' 120 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 121 | return '%s_%d' % (prefix, ident) 122 | 123 | def make_var(self, name, shape): 124 | '''Creates a new TensorFlow variable.''' 125 | return tf.get_variable(name, shape, trainable=self.trainable) 126 | 127 | def validate_padding(self, padding): 128 | '''Verifies that the padding is one of the supported ones.''' 129 | assert padding in ('SAME', 'VALID') 130 | 131 | @layer 132 | def conv(self, 133 | inp, 134 | k_h, 135 | k_w, 136 | c_o, 137 | s_h, 138 | s_w, 139 | name, 140 | relu=True, 141 | padding='SAME', 142 | group=1, 143 | biased=True): 144 | # Verify that the padding is acceptable 145 | self.validate_padding(padding) 146 | # Get the number of channels in the input 147 | c_i = int(inp.get_shape()[-1]) 148 | # Verify that the grouping parameter is valid 149 | assert c_i % group == 0 150 | assert c_o % group == 0 151 | # Convolution for a given input and kernel 152 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 153 | with tf.variable_scope(name) as scope: 154 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 155 | # This is the common-case. Convolve the input without any further complications. 156 | output = convolve(inp, kernel) 157 | # Add the biases 158 | if biased: 159 | biases = self.make_var('biases', [c_o]) 160 | output = tf.nn.bias_add(output, biases) 161 | if relu: 162 | # ReLU non-linearity 163 | output = tf.nn.relu(output, name=scope.name) 164 | return output 165 | 166 | @layer 167 | def prelu(self, inp, name): 168 | with tf.variable_scope(name): 169 | i = int(inp.get_shape()[-1]) 170 | alpha = self.make_var('alpha', shape=(i,)) 171 | output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) 172 | return output 173 | 174 | @layer 175 | def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): 176 | self.validate_padding(padding) 177 | return tf.nn.max_pool(inp, 178 | ksize=[1, k_h, k_w, 1], 179 | strides=[1, s_h, s_w, 1], 180 | padding=padding, 181 | name=name) 182 | 183 | @layer 184 | def fc(self, inp, num_out, name, relu=True): 185 | with tf.variable_scope(name): 186 | input_shape = inp.get_shape() 187 | if input_shape.ndims == 4: 188 | # The input is spatial. Vectorize it first. 189 | dim = 1 190 | for d in input_shape[1:].as_list(): 191 | dim *= int(d) 192 | feed_in = tf.reshape(inp, [-1, dim]) 193 | else: 194 | feed_in, dim = (inp, input_shape[-1].value) 195 | weights = self.make_var('weights', shape=[dim, num_out]) 196 | biases = self.make_var('biases', [num_out]) 197 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 198 | fc = op(feed_in, weights, biases, name=name) 199 | return fc 200 | 201 | 202 | """ 203 | Multi dimensional softmax, 204 | refer to https://github.com/tensorflow/tensorflow/issues/210 205 | compute softmax along the dimension of target 206 | the native softmax only supports batch_size x dimension 207 | """ 208 | @layer 209 | def softmax(self, target, axis, name=None): 210 | max_axis = tf.reduce_max(target, axis, keep_dims=True) 211 | target_exp = tf.exp(target-max_axis) 212 | normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) 213 | softmax = tf.div(target_exp, normalize, name) 214 | return softmax 215 | 216 | class PNet(Network): 217 | def setup(self): 218 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 219 | .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') 220 | .prelu(name='PReLU1') 221 | .max_pool(2, 2, 2, 2, name='pool1') 222 | .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') 223 | .prelu(name='PReLU2') 224 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') 225 | .prelu(name='PReLU3') 226 | .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') 227 | .softmax(3,name='prob1')) 228 | 229 | (self.feed('PReLU3') #pylint: disable=no-value-for-parameter 230 | .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) 231 | 232 | class RNet(Network): 233 | def setup(self): 234 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 235 | .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') 236 | .prelu(name='prelu1') 237 | .max_pool(3, 3, 2, 2, name='pool1') 238 | .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') 239 | .prelu(name='prelu2') 240 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 241 | .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') 242 | .prelu(name='prelu3') 243 | .fc(128, relu=False, name='conv4') 244 | .prelu(name='prelu4') 245 | .fc(2, relu=False, name='conv5-1') 246 | .softmax(1,name='prob1')) 247 | 248 | (self.feed('prelu4') #pylint: disable=no-value-for-parameter 249 | .fc(4, relu=False, name='conv5-2')) 250 | 251 | class ONet(Network): 252 | def setup(self): 253 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 254 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') 255 | .prelu(name='prelu1') 256 | .max_pool(3, 3, 2, 2, name='pool1') 257 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') 258 | .prelu(name='prelu2') 259 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 260 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') 261 | .prelu(name='prelu3') 262 | .max_pool(2, 2, 2, 2, name='pool3') 263 | .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') 264 | .prelu(name='prelu4') 265 | .fc(256, relu=False, name='conv5') 266 | .prelu(name='prelu5') 267 | .fc(2, relu=False, name='conv6-1') 268 | .softmax(1, name='prob1')) 269 | 270 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 271 | .fc(4, relu=False, name='conv6-2')) 272 | 273 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 274 | .fc(10, relu=False, name='conv6-3')) 275 | 276 | def create_mtcnn(sess, model_path): 277 | if not model_path: 278 | model_path,_ = os.path.split(os.path.realpath(__file__)) 279 | 280 | with tf.variable_scope('pnet'): 281 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 282 | pnet = PNet({'data':data}) 283 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 284 | with tf.variable_scope('rnet'): 285 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 286 | rnet = RNet({'data':data}) 287 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 288 | with tf.variable_scope('onet'): 289 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 290 | onet = ONet({'data':data}) 291 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 292 | 293 | pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) 294 | rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) 295 | onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) 296 | return pnet_fun, rnet_fun, onet_fun 297 | 298 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): 299 | # im: input image 300 | # minsize: minimum of faces' size 301 | # pnet, rnet, onet: caffemodel 302 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold 303 | # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true 304 | factor_count=0 305 | total_boxes=np.empty((0,9)) 306 | points=[] 307 | h=img.shape[0] 308 | w=img.shape[1] 309 | minl=np.amin([h, w]) 310 | m=12.0/minsize 311 | minl=minl*m 312 | # creat scale pyramid 313 | scales=[] 314 | while minl>=12: 315 | scales += [m*np.power(factor, factor_count)] 316 | minl = minl*factor 317 | factor_count += 1 318 | 319 | # first stage 320 | for j in range(len(scales)): 321 | scale=scales[j] 322 | hs=int(np.ceil(h*scale)) 323 | ws=int(np.ceil(w*scale)) 324 | im_data = imresample(img, (hs, ws)) 325 | im_data = (im_data-127.5)*0.0078125 326 | img_x = np.expand_dims(im_data, 0) 327 | img_y = np.transpose(img_x, (0,2,1,3)) 328 | out = pnet(img_y) 329 | out0 = np.transpose(out[0], (0,2,1,3)) 330 | out1 = np.transpose(out[1], (0,2,1,3)) 331 | 332 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 333 | 334 | # inter-scale nms 335 | pick = nms(boxes.copy(), 0.5, 'Union') 336 | if boxes.size>0 and pick.size>0: 337 | boxes = boxes[pick,:] 338 | total_boxes = np.append(total_boxes, boxes, axis=0) 339 | 340 | numbox = total_boxes.shape[0] 341 | if numbox>0: 342 | pick = nms(total_boxes.copy(), 0.7, 'Union') 343 | total_boxes = total_boxes[pick,:] 344 | regw = total_boxes[:,2]-total_boxes[:,0] 345 | regh = total_boxes[:,3]-total_boxes[:,1] 346 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 347 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 348 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 349 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 350 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 351 | total_boxes = rerec(total_boxes.copy()) 352 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 353 | dy,edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 354 | 355 | numbox = total_boxes.shape[0] 356 | if numbox>0: 357 | # second stage 358 | tempimg = np.zeros((24,24,3,numbox)) 359 | for k in range(0,numbox): 360 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 361 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 362 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 363 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 364 | else: 365 | return np.empty() 366 | tempimg = (tempimg-127.5)*0.0078125 367 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 368 | out = rnet(tempimg1) 369 | out0 = np.transpose(out[0]) 370 | out1 = np.transpose(out[1]) 371 | score = out1[1,:] 372 | ipass = np.where(score>threshold[1]) 373 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 374 | mv = out0[:,ipass[0]] 375 | if total_boxes.shape[0]>0: 376 | pick = nms(total_boxes, 0.7, 'Union') 377 | total_boxes = total_boxes[pick,:] 378 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 379 | total_boxes = rerec(total_boxes.copy()) 380 | 381 | numbox = total_boxes.shape[0] 382 | if numbox>0: 383 | # third stage 384 | total_boxes = np.fix(total_boxes).astype(np.int32) 385 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 386 | tempimg = np.zeros((48,48,3,numbox)) 387 | for k in range(0,numbox): 388 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 389 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 390 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 391 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 392 | else: 393 | return np.empty() 394 | tempimg = (tempimg-127.5)*0.0078125 395 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 396 | out = onet(tempimg1) 397 | out0 = np.transpose(out[0]) 398 | out1 = np.transpose(out[1]) 399 | out2 = np.transpose(out[2]) 400 | score = out2[1,:] 401 | points = out1 402 | ipass = np.where(score>threshold[2]) 403 | points = points[:,ipass[0]] 404 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 405 | mv = out0[:,ipass[0]] 406 | 407 | w = total_boxes[:,2]-total_boxes[:,0]+1 408 | h = total_boxes[:,3]-total_boxes[:,1]+1 409 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 410 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 411 | if total_boxes.shape[0]>0: 412 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 413 | pick = nms(total_boxes.copy(), 0.7, 'Min') 414 | total_boxes = total_boxes[pick,:] 415 | points = points[:,pick] 416 | 417 | return total_boxes, points 418 | 419 | def detect_face_force(img, bbox, pnet, rnet, onet): 420 | total_boxes = np.zeros( (1,5), dtype=np.float32) 421 | total_boxes[0,0:4] = bbox 422 | threshold = [0.0,0.0,0.0] 423 | h=img.shape[0] 424 | w=img.shape[1] 425 | numbox = total_boxes.shape[0] 426 | if numbox>0: 427 | dy,edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 428 | # second stage 429 | tempimg = np.zeros((24,24,3,numbox)) 430 | for k in range(0,numbox): 431 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 432 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 433 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 434 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 435 | else: 436 | return np.empty() 437 | tempimg = (tempimg-127.5)*0.0078125 438 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 439 | out = rnet(tempimg1) 440 | out0 = np.transpose(out[0]) 441 | out1 = np.transpose(out[1]) 442 | score = out1[1,:] 443 | ipass = np.where(score>threshold[1]) 444 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 445 | mv = out0[:,ipass[0]] 446 | if total_boxes.shape[0]>0: 447 | pick = nms(total_boxes, 0.7, 'Union') 448 | total_boxes = total_boxes[pick,:] 449 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 450 | total_boxes = rerec(total_boxes.copy()) 451 | 452 | numbox = total_boxes.shape[0] 453 | if numbox>0: 454 | # third stage 455 | total_boxes = np.fix(total_boxes).astype(np.int32) 456 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 457 | tempimg = np.zeros((48,48,3,numbox)) 458 | for k in range(0,numbox): 459 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 460 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 461 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 462 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 463 | else: 464 | return np.empty() 465 | tempimg = (tempimg-127.5)*0.0078125 466 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 467 | out = onet(tempimg1) 468 | out0 = np.transpose(out[0]) 469 | out1 = np.transpose(out[1]) 470 | out2 = np.transpose(out[2]) 471 | score = out2[1,:] 472 | points = out1 473 | ipass = np.where(score>threshold[2]) 474 | points = points[:,ipass[0]] 475 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 476 | mv = out0[:,ipass[0]] 477 | 478 | w = total_boxes[:,2]-total_boxes[:,0]+1 479 | h = total_boxes[:,3]-total_boxes[:,1]+1 480 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 481 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 482 | if total_boxes.shape[0]>0: 483 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 484 | pick = nms(total_boxes.copy(), 0.7, 'Min') 485 | total_boxes = total_boxes[pick,:] 486 | points = points[:,pick] 487 | 488 | return total_boxes, points 489 | 490 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): 491 | # im: input image 492 | # minsize: minimum of faces' size 493 | # pnet, rnet, onet: caffemodel 494 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] 495 | 496 | all_scales = [None] * len(images) 497 | images_with_boxes = [None] * len(images) 498 | 499 | for i in range(len(images)): 500 | images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} 501 | 502 | # create scale pyramid 503 | for index, img in enumerate(images): 504 | all_scales[index] = [] 505 | h = img.shape[0] 506 | w = img.shape[1] 507 | minsize = int(detection_window_size_ratio * np.minimum(w, h)) 508 | factor_count = 0 509 | minl = np.amin([h, w]) 510 | if minsize <= 12: 511 | minsize = 12 512 | 513 | m = 12.0 / minsize 514 | minl = minl * m 515 | while minl >= 12: 516 | all_scales[index].append(m * np.power(factor, factor_count)) 517 | minl = minl * factor 518 | factor_count += 1 519 | 520 | # # # # # # # # # # # # # 521 | # first stage - fast proposal network (pnet) to obtain face candidates 522 | # # # # # # # # # # # # # 523 | 524 | images_obj_per_resolution = {} 525 | 526 | # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images 527 | 528 | for index, scales in enumerate(all_scales): 529 | h = images[index].shape[0] 530 | w = images[index].shape[1] 531 | 532 | for scale in scales: 533 | hs = int(np.ceil(h * scale)) 534 | ws = int(np.ceil(w * scale)) 535 | 536 | if (ws, hs) not in images_obj_per_resolution: 537 | images_obj_per_resolution[(ws, hs)] = [] 538 | 539 | im_data = imresample(images[index], (hs, ws)) 540 | im_data = (im_data - 127.5) * 0.0078125 541 | img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering 542 | images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) 543 | 544 | for resolution in images_obj_per_resolution: 545 | images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] 546 | outs = pnet(images_per_resolution) 547 | 548 | for index in range(len(outs[0])): 549 | scale = images_obj_per_resolution[resolution][index]['scale'] 550 | image_index = images_obj_per_resolution[resolution][index]['index'] 551 | out0 = np.transpose(outs[0][index], (1, 0, 2)) 552 | out1 = np.transpose(outs[1][index], (1, 0, 2)) 553 | 554 | boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) 555 | 556 | # inter-scale nms 557 | pick = nms(boxes.copy(), 0.5, 'Union') 558 | if boxes.size > 0 and pick.size > 0: 559 | boxes = boxes[pick, :] 560 | images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], 561 | boxes, 562 | axis=0) 563 | 564 | for index, image_obj in enumerate(images_with_boxes): 565 | numbox = image_obj['total_boxes'].shape[0] 566 | if numbox > 0: 567 | h = images[index].shape[0] 568 | w = images[index].shape[1] 569 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') 570 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 571 | regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] 572 | regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] 573 | qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw 574 | qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh 575 | qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw 576 | qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh 577 | image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) 578 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 579 | image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) 580 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 581 | 582 | numbox = image_obj['total_boxes'].shape[0] 583 | tempimg = np.zeros((24, 24, 3, numbox)) 584 | 585 | if numbox > 0: 586 | for k in range(0, numbox): 587 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 588 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 589 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 590 | tempimg[:, :, :, k] = imresample(tmp, (24, 24)) 591 | else: 592 | return np.empty() 593 | 594 | tempimg = (tempimg - 127.5) * 0.0078125 595 | image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 596 | 597 | # # # # # # # # # # # # # 598 | # second stage - refinement of face candidates with rnet 599 | # # # # # # # # # # # # # 600 | 601 | bulk_rnet_input = np.empty((0, 24, 24, 3)) 602 | for index, image_obj in enumerate(images_with_boxes): 603 | if 'rnet_input' in image_obj: 604 | bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) 605 | 606 | out = rnet(bulk_rnet_input) 607 | out0 = np.transpose(out[0]) 608 | out1 = np.transpose(out[1]) 609 | score = out1[1, :] 610 | 611 | i = 0 612 | for index, image_obj in enumerate(images_with_boxes): 613 | if 'rnet_input' not in image_obj: 614 | continue 615 | 616 | rnet_input_count = image_obj['rnet_input'].shape[0] 617 | score_per_image = score[i:i + rnet_input_count] 618 | out0_per_image = out0[:, i:i + rnet_input_count] 619 | 620 | ipass = np.where(score_per_image > threshold[1]) 621 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 622 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 623 | 624 | mv = out0_per_image[:, ipass[0]] 625 | 626 | if image_obj['total_boxes'].shape[0] > 0: 627 | h = images[index].shape[0] 628 | w = images[index].shape[1] 629 | pick = nms(image_obj['total_boxes'], 0.7, 'Union') 630 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 631 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) 632 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 633 | 634 | numbox = image_obj['total_boxes'].shape[0] 635 | 636 | if numbox > 0: 637 | tempimg = np.zeros((48, 48, 3, numbox)) 638 | image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) 639 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 640 | 641 | for k in range(0, numbox): 642 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 643 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 644 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 645 | tempimg[:, :, :, k] = imresample(tmp, (48, 48)) 646 | else: 647 | return np.empty() 648 | tempimg = (tempimg - 127.5) * 0.0078125 649 | image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 650 | 651 | i += rnet_input_count 652 | 653 | # # # # # # # # # # # # # 654 | # third stage - further refinement and facial landmarks positions with onet 655 | # # # # # # # # # # # # # 656 | 657 | bulk_onet_input = np.empty((0, 48, 48, 3)) 658 | for index, image_obj in enumerate(images_with_boxes): 659 | if 'onet_input' in image_obj: 660 | bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) 661 | 662 | out = onet(bulk_onet_input) 663 | 664 | out0 = np.transpose(out[0]) 665 | out1 = np.transpose(out[1]) 666 | out2 = np.transpose(out[2]) 667 | score = out2[1, :] 668 | points = out1 669 | 670 | i = 0 671 | ret = [] 672 | for index, image_obj in enumerate(images_with_boxes): 673 | if 'onet_input' not in image_obj: 674 | ret.append(None) 675 | continue 676 | 677 | onet_input_count = image_obj['onet_input'].shape[0] 678 | 679 | out0_per_image = out0[:, i:i + onet_input_count] 680 | score_per_image = score[i:i + onet_input_count] 681 | points_per_image = points[:, i:i + onet_input_count] 682 | 683 | ipass = np.where(score_per_image > threshold[2]) 684 | points_per_image = points_per_image[:, ipass[0]] 685 | 686 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 687 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 688 | mv = out0_per_image[:, ipass[0]] 689 | 690 | w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 691 | h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 692 | points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( 693 | image_obj['total_boxes'][:, 0], (5, 1)) - 1 694 | points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( 695 | image_obj['total_boxes'][:, 1], (5, 1)) - 1 696 | 697 | if image_obj['total_boxes'].shape[0] > 0: 698 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) 699 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') 700 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 701 | points_per_image = points_per_image[:, pick] 702 | 703 | ret.append((image_obj['total_boxes'], points_per_image)) 704 | else: 705 | ret.append(None) 706 | 707 | i += onet_input_count 708 | 709 | return ret 710 | 711 | 712 | # function [boundingbox] = bbreg(boundingbox,reg) 713 | def bbreg(boundingbox,reg): 714 | # calibrate bounding boxes 715 | if reg.shape[1]==1: 716 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 717 | 718 | w = boundingbox[:,2]-boundingbox[:,0]+1 719 | h = boundingbox[:,3]-boundingbox[:,1]+1 720 | b1 = boundingbox[:,0]+reg[:,0]*w 721 | b2 = boundingbox[:,1]+reg[:,1]*h 722 | b3 = boundingbox[:,2]+reg[:,2]*w 723 | b4 = boundingbox[:,3]+reg[:,3]*h 724 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 725 | return boundingbox 726 | 727 | def generateBoundingBox(imap, reg, scale, t): 728 | # use heatmap to generate bounding boxes 729 | stride=2 730 | cellsize=12 731 | 732 | imap = np.transpose(imap) 733 | dx1 = np.transpose(reg[:,:,0]) 734 | dy1 = np.transpose(reg[:,:,1]) 735 | dx2 = np.transpose(reg[:,:,2]) 736 | dy2 = np.transpose(reg[:,:,3]) 737 | y, x = np.where(imap >= t) 738 | if y.shape[0]==1: 739 | dx1 = np.flipud(dx1) 740 | dy1 = np.flipud(dy1) 741 | dx2 = np.flipud(dx2) 742 | dy2 = np.flipud(dy2) 743 | score = imap[(y,x)] 744 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 745 | if reg.size==0: 746 | reg = np.empty((0,3)) 747 | bb = np.transpose(np.vstack([y,x])) 748 | q1 = np.fix((stride*bb+1)/scale) 749 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 750 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 751 | return boundingbox, reg 752 | 753 | # function pick = nms(boxes,threshold,type) 754 | def nms(boxes, threshold, method): 755 | if boxes.size==0: 756 | return np.empty((0,3)) 757 | x1 = boxes[:,0] 758 | y1 = boxes[:,1] 759 | x2 = boxes[:,2] 760 | y2 = boxes[:,3] 761 | s = boxes[:,4] 762 | area = (x2-x1+1) * (y2-y1+1) 763 | I = np.argsort(s) 764 | pick = np.zeros_like(s, dtype=np.int16) 765 | counter = 0 766 | while I.size>0: 767 | i = I[-1] 768 | pick[counter] = i 769 | counter += 1 770 | idx = I[0:-1] 771 | xx1 = np.maximum(x1[i], x1[idx]) 772 | yy1 = np.maximum(y1[i], y1[idx]) 773 | xx2 = np.minimum(x2[i], x2[idx]) 774 | yy2 = np.minimum(y2[i], y2[idx]) 775 | w = np.maximum(0.0, xx2-xx1+1) 776 | h = np.maximum(0.0, yy2-yy1+1) 777 | inter = w * h 778 | if method == 'Min': 779 | o = inter / np.minimum(area[i], area[idx]) 780 | else: 781 | o = inter / (area[i] + area[idx] - inter) 782 | I = I[np.where(o<=threshold)] 783 | pick = pick[0:counter] 784 | return pick 785 | 786 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 787 | def pad(total_boxes, w, h): 788 | # compute the padding coordinates (pad the bounding boxes to square) 789 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 790 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 791 | numbox = total_boxes.shape[0] 792 | 793 | dx = np.ones((numbox), dtype=np.int32) 794 | dy = np.ones((numbox), dtype=np.int32) 795 | edx = tmpw.copy().astype(np.int32) 796 | edy = tmph.copy().astype(np.int32) 797 | 798 | x = total_boxes[:,0].copy().astype(np.int32) 799 | y = total_boxes[:,1].copy().astype(np.int32) 800 | ex = total_boxes[:,2].copy().astype(np.int32) 801 | ey = total_boxes[:,3].copy().astype(np.int32) 802 | 803 | tmp = np.where(ex>w) 804 | edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 805 | ex[tmp] = w 806 | 807 | tmp = np.where(ey>h) 808 | edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 809 | ey[tmp] = h 810 | 811 | tmp = np.where(x<1) 812 | dx.flat[tmp] = np.expand_dims(2-x[tmp],1) 813 | x[tmp] = 1 814 | 815 | tmp = np.where(y<1) 816 | dy.flat[tmp] = np.expand_dims(2-y[tmp],1) 817 | y[tmp] = 1 818 | 819 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 820 | 821 | # function [bboxA] = rerec(bboxA) 822 | def rerec(bboxA): 823 | # convert bboxA to square 824 | h = bboxA[:,3]-bboxA[:,1] 825 | w = bboxA[:,2]-bboxA[:,0] 826 | l = np.maximum(w, h) 827 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 828 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 829 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 830 | return bboxA 831 | 832 | def imresample(img, sz): 833 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable 834 | return im_data 835 | 836 | # This method is kept for debugging purpose 837 | # h=img.shape[0] 838 | # w=img.shape[1] 839 | # hs, ws = sz 840 | # dx = float(w) / ws 841 | # dy = float(h) / hs 842 | # im_data = np.zeros((hs,ws,3)) 843 | # for a1 in range(0,hs): 844 | # for a2 in range(0,ws): 845 | # for a3 in range(0,3): 846 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 847 | # return im_data 848 | 849 | --------------------------------------------------------------------------------