├── README.md ├── camera_tester.py ├── datasets.py ├── main.py ├── models.py └── prepare_data_cofw.py /README.md: -------------------------------------------------------------------------------- 1 | # Face-Occlusion-Detect 2 | A simple CNN face occlusion detect implemented with Tensorflow Keras. 3 | Detailed design doc, please reference to: 4 | 5 | ![image](https://github.com/Oreobird/effect3d/blob/master/wechat.jpg) 6 | 7 | ### Depencies 8 | ``` 9 | dlib >= 19.17.0 10 | tensorflow >= 1.12.0 11 | keras >= 2.2.4 12 | numpy >= 1.11.1 13 | scipy >= 0.14 14 | opencv-python >= 3.4.3 15 | ``` 16 | ### Usage 17 | ##### 1. Data and trained models download 18 | download link: [https://pan.baidu.com/s/10LvoXEUGMTZjufd7R8jh4A](https://pan.baidu.com/s/10LvoXEUGMTZjufd7R8jh4A)
19 | code: 0p5j
20 | Download Cofw dataset and pretrained models in source code directory 21 | 22 | ##### 2. Train 23 | (1) prepare data 24 | ``` 25 | python prepare_data_cofw.py --data_dir 'cofw data directory" 26 | ``` 27 | (2) train 28 | ``` 29 | python main.py --proj_dir \ 30 | --proj_dir "./" \ #Project directory 31 | --input_size 96 \ #Input image size to train 32 | --batch_size 100 \ #train batch size 33 | --fine_tune False \ #Finetune VGG16 or not 34 | --epochs 100 \ #Train epochs 35 | --train True\ #Train or test 36 | ``` 37 | ##### 3. Test 38 | (1) test on test_data 39 | ``` 40 | python main.py --proj_dir \ 41 | --proj_dir "./" \ #Project directory 42 | --input_size 96 \ #Input image size to train 43 | --fine_tune False \ #Finetune VGG16 or not 44 | --train False\ #Train or test 45 | ``` 46 | (2) test on camera video stream data 47 | Need a camera device 48 | ``` 49 | python main.py --proj_dir \ 50 | --proj_dir "./" \ # Project directory 51 | --input_size 96 \ # Input image size to train 52 | --fine_tune False \ # Finetune VGG16 or not 53 | --camera_test True 54 | ``` 55 | 56 | -------------------------------------------------------------------------------- /camera_tester.py: -------------------------------------------------------------------------------- 1 | import os 2 | import dlib 3 | from imutils import face_utils 4 | import cv2 5 | import numpy as np 6 | 7 | class CameraTester(): 8 | def __init__(self, net=None, input_size=96, fine_tune=False, face_landmark_path='./model/shape_predictor_68_face_landmarks.dat'): 9 | self.cap = cv2.VideoCapture(0) 10 | if not self.cap.isOpened(): 11 | raise Exception("Unable to connect to camera.") 12 | self.detector = dlib.get_frontal_face_detector() 13 | self.predictor = dlib.shape_predictor(face_landmark_path) 14 | self.net = net 15 | self.input_size = input_size 16 | self.fine_tune = fine_tune 17 | 18 | def crop_face(self, shape, img, input_size): 19 | x = [] 20 | y = [] 21 | for (_x, _y) in shape: 22 | x.append(_x) 23 | y.append(_y) 24 | 25 | max_x = min(max(x), img.shape[1]) 26 | min_x = max(min(x), 0) 27 | max_y = min(max(y), img.shape[0]) 28 | min_y = max(min(y), 0) 29 | 30 | Lx = max_x - min_x 31 | Ly = max_y - min_y 32 | Lmax = int(max(Lx, Ly)) * 1.15 33 | delta = Lmax // 2 34 | 35 | center_x = (max(x) + min(x)) // 2 36 | center_y = (max(y) + min(y)) // 2 37 | start_x = int(center_x - delta) 38 | start_y = int(center_y - 0.98 * delta) 39 | end_x = int(center_x + delta) 40 | end_y = int(center_y + 1.02 * delta) 41 | 42 | start_y = 0 if start_y < 0 else start_y 43 | start_x = 0 if start_x < 0 else start_x 44 | end_x = img.shape[1] if end_x > img.shape[1] else end_x 45 | end_y = img.shape[0] if end_y > img.shape[0] else end_y 46 | 47 | crop_face = img[start_y:end_y, start_x:end_x] 48 | # print(crop_face.shape) 49 | crop_face = cv2.cvtColor(crop_face, cv2.COLOR_RGB2GRAY) 50 | # cv2.imshow("crop face", crop_face) 51 | # cv2.waitKey(0) 52 | crop_face = cv2.resize(crop_face, (input_size, input_size)) / 255 53 | channel = 3 if self.fine_tune else 1 54 | crop_face = np.resize(crop_face, (input_size, input_size, channel)) 55 | return crop_face, start_y, end_y, start_x, end_x 56 | 57 | def get_area(self, shape, idx): 58 | #[[x, y], radius] 59 | left_eye = [(shape[42] + shape[45]) // 2, abs(shape[45][0] - shape[42][0])] 60 | right_eye = [(shape[36] + shape[39]) // 2, abs(shape[39][0] - shape[36][0])] 61 | nose = [shape[30], int(abs(shape[31][0] - shape[35][0]) / 1.5)] 62 | mouth = [(shape[48] + shape[54]) // 2, abs(shape[48][0] - shape[54][0]) // 2] 63 | chin = [shape[8], nose[1]] 64 | area = [None, right_eye, left_eye, nose, mouth, chin] 65 | block_area = [x for i, x in enumerate(area) if i in idx] 66 | return block_area 67 | 68 | def draw_occlusion_area(self, img, shape, idx): 69 | area = self.get_area(shape, idx) 70 | for k, v in enumerate(area): 71 | if v: 72 | cv2.circle(img, tuple(v[0]), v[1], (0, 255, 0)) 73 | 74 | def run(self): 75 | frames = [] 76 | 77 | while self.cap.isOpened(): 78 | ret, frame = self.cap.read() 79 | if ret: 80 | face_rects = self.detector(frame, 0) 81 | 82 | if len(face_rects) > 0: 83 | shape = self.predictor(frame, face_rects[0]) 84 | shape = face_utils.shape_to_np(shape) 85 | 86 | input_img, start_y, end_y, start_x, end_x = self.crop_face(shape, frame, self.input_size) 87 | 88 | cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), thickness=2) 89 | 90 | frames.append({'fod_input': input_img}) 91 | if len(frames) == 1: 92 | pred = self.net.test_online(frames) 93 | 94 | # print(pred) 95 | idx = [i for i, x in enumerate(pred[0]) if x > 0.8] 96 | frames = [] 97 | # print(idx) 98 | if len(idx): 99 | self.draw_occlusion_area(frame, shape, idx) 100 | else: 101 | print("No face detect") 102 | 103 | cv2.imshow("frame", frame) 104 | if cv2.waitKey(1) & 0xFF == ord('q'): 105 | break 106 | 107 | if __name__ == '__main__': 108 | camera_tester= CameraTester() 109 | camera_tester.run() 110 | -------------------------------------------------------------------------------- /datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import random 5 | import tensorflow as tf 6 | 7 | 8 | class DataSet: 9 | def __init__(self, proj_dir, data_dir, batch_size=64, input_size=64, fine_tune=False): 10 | self.proj_dir = proj_dir 11 | self.data_dir = os.path.join(proj_dir, data_dir) 12 | self.batch_size = batch_size 13 | self.input_size = input_size 14 | self.fine_tune = fine_tune 15 | self.__train_num, self.__val_num, self.__test_num = self.__get_samples_num(os.path.join(self.data_dir, 'train.txt'), 16 | os.path.join(self.data_dir, 'val.txt'), 17 | os.path.join(self.data_dir, 'test.txt')) 18 | 19 | def __get_samples_num(self, train_label_file, val_label_file, test_label_file): 20 | train_num = 0 21 | val_num = 0 22 | test_num = 0 23 | if not os.path.exists(train_label_file) or \ 24 | not os.path.exists(val_label_file) or \ 25 | not os.path.exists(test_label_file): 26 | return train_num, val_num, test_num 27 | 28 | with open(train_label_file) as f: 29 | train_num = len(f.readlines()) 30 | with open(val_label_file) as f: 31 | val_num = len(f.readlines()) 32 | with open(test_label_file) as f: 33 | test_num = len(f.readlines()) 34 | return train_num, val_num, test_num 35 | 36 | 37 | def __load_input_img(self, proj_dir, file_name, fine_tune=False): 38 | img_path = os.path.join(proj_dir, file_name) 39 | 40 | # print(img_path) 41 | if fine_tune: 42 | img = cv2.imread(img_path) 43 | else: 44 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 45 | # print(img) 46 | img = cv2.resize(img, (self.input_size, self.input_size)) / 255 47 | 48 | return img 49 | 50 | def load_input_imgpath_label(self, file_name, labels_num=1, shuffle=True): 51 | imgpath = [] 52 | labels = [] 53 | 54 | with open(os.path.join(self.data_dir, file_name)) as f: 55 | lines_list = f.readlines() 56 | if shuffle: 57 | random.shuffle(lines_list) 58 | 59 | for lines in lines_list: 60 | line = lines.rstrip().split(',') 61 | label = [] 62 | if labels_num == 1: 63 | label = int(line[1]) 64 | else: 65 | lab = line[1].split(' ') 66 | for i in range(labels_num): 67 | label.append(int(lab[i])) 68 | imgpath.append(line[0]) 69 | labels.append(label) 70 | return np.array(imgpath), np.array(labels) 71 | 72 | def train_num(self): 73 | return self.__train_num 74 | 75 | def val_num(self): 76 | return self.__val_num 77 | 78 | def test_num(self): 79 | return self.__test_num 80 | 81 | def load_batch_data_label(self, filename_list, label_list, label_num=1, shuffle=True): 82 | file_num = len(filename_list) 83 | if shuffle: 84 | idx = np.random.permutation(range(file_num)) 85 | filename_list = filename_list[idx] 86 | label_list = label_list[idx] 87 | max_num = file_num - (file_num % self.batch_size) 88 | for i in range(0, max_num, self.batch_size): 89 | batch_x = [] 90 | batch_y = [] 91 | for j in range(self.batch_size): 92 | img = self.__load_input_img(self.proj_dir, filename_list[i + j], self.fine_tune) 93 | if not self.fine_tune: 94 | img = np.resize(img, (self.input_size, self.input_size, 1)) 95 | label = label_list[i + j] 96 | batch_x.append(img) 97 | batch_y.append(label) 98 | batch_x = np.array(batch_x, dtype=np.float32) 99 | if label_num == 1: 100 | batch_y = tf.keras.utils.to_categorical(batch_y, 7) 101 | else: 102 | batch_y = np.array(batch_y) 103 | if shuffle: 104 | idx = np.random.permutation(range(self.batch_size)) 105 | batch_x = batch_x[idx] 106 | batch_y = batch_y[idx] 107 | yield batch_x, batch_y 108 | 109 | 110 | class Cofw(DataSet): 111 | def __init__(self, proj_dir, data_dir, batch_size=64, input_size=64, class_num=2, fine_tune=False): 112 | DataSet.__init__(self, proj_dir, data_dir, batch_size, input_size, fine_tune) 113 | self.class_num = class_num 114 | 115 | print("fod train_num:%d" % self.train_num()) 116 | print("fod val_num:%d" % self.val_num()) 117 | print("fod test_num:%d" % self.test_num()) 118 | 119 | def data_generator(self, input_name_list, output_name_list, label_file_name='train.txt', shuffle=True): 120 | fod_filenames, fod_labels = self.load_input_imgpath_label(label_file_name, labels_num=self.class_num, shuffle=shuffle) 121 | while True: 122 | fod_generator = self.load_batch_data_label(fod_filenames, fod_labels, label_num=self.class_num, shuffle=shuffle) 123 | fod_batch_x, fod_batch_y = next(fod_generator) 124 | 125 | yield ({input_name_list[0]: fod_batch_x}, 126 | {output_name_list[0]: fod_batch_y}) 127 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.platform import app 3 | import os 4 | import numpy as np 5 | import cv2 6 | import argparse 7 | import sys 8 | import datasets 9 | import models 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser() 13 | parser.register("type", "bool", lambda v: v.lower() == "true") 14 | 15 | parser.add_argument("--proj_dir", type=str, default="./", help="Project directory") 16 | parser.add_argument("--input_size", type=int, default=96, help="Input image size") 17 | parser.add_argument("--batch_size", type=int, default=100, help="Batch size.") 18 | parser.add_argument("--fine_tune", type=bool, default=False, help="Fine tune based on Vgg16.") 19 | parser.add_argument("--train", type=bool, default=False, help="Train or test.") 20 | parser.add_argument("--epochs", type=int, default=100, help="Train epochs") 21 | parser.add_argument("--camera_test", type=bool, default=True, help="Camera video stream test. Need a camera device") 22 | 23 | return parser.parse_known_args() 24 | 25 | def main(unused_args): 26 | if not len(FLAGS.proj_dir): 27 | raise Exception("Please set project directory") 28 | 29 | MODEL_DIR = os.path.join(FLAGS.proj_dir, 'model/') 30 | LOG_DIR = os.path.join(FLAGS.proj_dir, 'log/') 31 | 32 | FOD_CLASS_NAMES = ['normal', 'right_eye', 'left_eye', 'nose', 'mouth', 'chin'] 33 | CLASS_NUM = len(FOD_CLASS_NAMES) 34 | 35 | 36 | dataset = datasets.Cofw(proj_dir=FLAGS.proj_dir, data_dir='data/cofw/', batch_size=FLAGS.batch_size, 37 | input_size=FLAGS.input_size, class_num=CLASS_NUM, 38 | fine_tune=FLAGS.fine_tune) 39 | 40 | net = models.FodNet(dataset, CLASS_NUM, batch_size=FLAGS.batch_size, 41 | input_size=FLAGS.input_size, fine_tune=FLAGS.fine_tune, 42 | fine_tune_model_file=os.path.join(MODEL_DIR, 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')) 43 | 44 | net.fit(MODEL_DIR + 'fod_model.h5', MODEL_DIR, LOG_DIR, 45 | max_epoches=FLAGS.epochs, 46 | train=FLAGS.train) 47 | 48 | if not FLAGS.camera_test: 49 | net.predict() 50 | else: 51 | import camera_tester 52 | tester = camera_tester.CameraTester(net, FLAGS.input_size, FLAGS.fine_tune, 53 | os.path.join(MODEL_DIR, 'shape_predictor_68_face_landmarks.dat')) 54 | tester.run() 55 | 56 | if __name__ == '__main__': 57 | FLAGS, unparsed = parse_args() 58 | app.run(main=main, argv=[sys.argv[0]] + unparsed) 59 | 60 | 61 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | import numpy as np 4 | import cv2 5 | import heapq 6 | 7 | # import tensorflow.contrib.eager as tfe 8 | # tfe.enable_eager_execution() 9 | 10 | # np.set_printoptions(threshold=np.nan) 11 | 12 | EPOCHS = 25 13 | 14 | class FodNet: 15 | def __init__(self, dataset, class_num, batch_size, input_size, fine_tune=True, fine_tune_model_file='imagenet'): 16 | self.class_num = class_num 17 | self.batch_size = batch_size 18 | self.input_size = input_size 19 | self.dataset = dataset 20 | self.fine_tune_model_file = fine_tune_model_file 21 | if fine_tune: 22 | self.model = self.fine_tune_model() 23 | else: 24 | self.model = self.__create_model() 25 | 26 | def __base_model(self, inputs): 27 | 28 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs) 29 | feature = tf.keras.layers.BatchNormalization()(feature) 30 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature) 31 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(feature) 32 | feature = tf.keras.layers.BatchNormalization()(feature) 33 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature) 34 | feature = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2))(feature) 35 | 36 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(feature) 37 | feature = tf.keras.layers.BatchNormalization()(feature) 38 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature) 39 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(feature) 40 | feature = tf.keras.layers.BatchNormalization()(feature) 41 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature) 42 | feature = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2))(feature) 43 | 44 | return feature 45 | 46 | def __dense(self, feature): 47 | feature = tf.keras.layers.Flatten()(feature) 48 | feature = tf.keras.layers.Dense(units=128)(feature) 49 | feature = tf.keras.layers.BatchNormalization()(feature) 50 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature) 51 | feature = tf.keras.layers.Dropout(0.5)(feature) 52 | feature = tf.keras.layers.Dense(units=256)(feature) 53 | feature = tf.keras.layers.BatchNormalization()(feature) 54 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature) 55 | feature = tf.keras.layers.Dropout(0.5)(feature) 56 | return feature 57 | 58 | def __create_model(self): 59 | input_fod = tf.keras.layers.Input(name='fod_input', shape=(self.input_size, self.input_size, 1)) 60 | 61 | feature_fod = self.__base_model(input_fod) 62 | feature_fod = self.__dense(feature_fod) 63 | 64 | output_fod = tf.keras.layers.Dense(name='fod_output', units=self.class_num, activation=tf.nn.sigmoid)(feature_fod) 65 | 66 | model = tf.keras.Model(inputs=[input_fod], outputs=[output_fod]) 67 | 68 | losses = { 69 | 'fod_output': 'binary_crossentropy', 70 | } 71 | 72 | model.compile(optimizer=tf.train.AdamOptimizer(), 73 | loss=losses, 74 | metrics=['accuracy']) 75 | 76 | return model 77 | 78 | def __extract_output(self, model, name, input): 79 | model._name = name 80 | for layer in model.layers: 81 | layer.trainable = True 82 | return model(input) 83 | 84 | def fine_tune_model(self): 85 | input_fod = tf.keras.layers.Input(name='fod_input', shape=(self.input_size, self.input_size, 3)) 86 | 87 | # resnet_fod = tf.keras.applications.ResNet50(weights='imagenet', include_top=False) 88 | # feature_fod = self.__extract_output(resnet_fod, 'resnet_fod', input_fod) 89 | 90 | vgg16_fod = tf.keras.applications.VGG16(weights=self.fine_tune_model_file, include_top=False) 91 | feature_fod = self.__extract_output(vgg16_fod, 'vgg16_fod', input_fod) 92 | 93 | feature_fod = self.__dense(feature_fod) 94 | output_fod = tf.keras.layers.Dense(name='fod_output', units=self.class_num, activation=tf.nn.sigmoid)(feature_fod) 95 | 96 | model = tf.keras.Model(inputs=[input_fod], outputs=[output_fod]) 97 | 98 | losses = { 99 | 'fod_output': 'binary_crossentropy', 100 | } 101 | 102 | model.compile(optimizer=tf.train.AdamOptimizer(), 103 | loss=losses, 104 | metrics=['accuracy']) 105 | return model 106 | 107 | def fit(self, model_file, checkpoint_dir, log_dir, max_epoches=EPOCHS, train=True): 108 | self.model.summary() 109 | 110 | if not train: 111 | self.model.load_weights(model_file) 112 | else: 113 | cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir, 114 | save_weights_only=True, 115 | save_best_only=True, 116 | period=1, 117 | verbose=1) 118 | earlystop_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 119 | mode='min', 120 | min_delta=0.001, 121 | patience=1, 122 | verbose=1) 123 | 124 | tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) 125 | 126 | input_name_list = ['fod_input'] 127 | output_name_list = ['fod_output'] 128 | self.model.fit_generator(generator=self.dataset.data_generator(input_name_list, output_name_list, 'train.txt'), 129 | epochs=max_epoches, 130 | steps_per_epoch=self.dataset.train_num() // self.batch_size, 131 | validation_data=self.dataset.data_generator(input_name_list, output_name_list, 'val.txt'), 132 | validation_steps=self.dataset.val_num() // self.batch_size, 133 | callbacks=[cp_callback, earlystop_cb, tb_callback], 134 | max_queue_size=10, 135 | workers=1, 136 | verbose=1) 137 | 138 | self.model.save(model_file) 139 | 140 | def predict(self): 141 | input_name_list = ['fod_input'] 142 | output_name_list = ['fod_output'] 143 | predictions = self.model.predict_generator(generator=self.dataset.data_generator(input_name_list, output_name_list, 'test.txt', shuffle=False), 144 | steps=self.dataset.test_num() // self.batch_size, 145 | verbose=1) 146 | if len(predictions) > 0: 147 | fod_preds = predictions 148 | # print(fod_preds) 149 | test_data = self.dataset.data_generator(input_name_list, output_name_list, 'test.txt', shuffle=False) 150 | correct = 0 151 | steps = self.dataset.test_num() // self.batch_size 152 | total = steps * self.batch_size 153 | 154 | for step in range(steps): 155 | _, test_batch_y = next(test_data) 156 | fod_real_batch = test_batch_y['fod_output'] 157 | for i, fod_real in enumerate(fod_real_batch): 158 | fod_real = fod_real.tolist() 159 | one_num = fod_real.count(1) 160 | fod_pred_idxs = sorted(list(map(fod_preds[self.batch_size * step + i].tolist().index, 161 | heapq.nlargest(one_num, fod_preds[self.batch_size * step + i])))) 162 | fod_real_idxs = [i for i,x in enumerate(fod_real) if x == 1] 163 | # print(fod_pred_idxs) 164 | # print(fod_real_idxs) 165 | if fod_real_idxs == fod_pred_idxs: 166 | correct += 1 167 | 168 | print("fod==> correct:{}, total:{}, correct_rate:{}".format(correct, total, 1.0 * correct / total)) 169 | return predictions 170 | 171 | def test_online(self, face_imgs): 172 | batch_x = np.array(face_imgs[0]['fod_input'], dtype=np.float32) 173 | batch_x = np.expand_dims(batch_x, 0) 174 | 175 | predictions = self.model.predict({'fod_input': batch_x}, batch_size=1) 176 | # predictions = np.asarray(predictions) 177 | return predictions 178 | -------------------------------------------------------------------------------- /prepare_data_cofw.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import cv2 4 | import random 5 | import os 6 | import math 7 | import shutil 8 | import argparse 9 | 10 | 11 | class Augmenter: 12 | def __init__(self, save_dir="."): 13 | self.save_dir = save_dir 14 | 15 | def adjust_brightness(self, img, a=1.0, g=8): 16 | h, w, c = img.shape 17 | mask = np.zeros([h, w, c], img.dtype) 18 | dst_img = cv2.addWeighted(img, a, mask, 1 - a, g) 19 | # cv2.imshow("dst_Img", dst_img) 20 | # cv2.waitKey(0) 21 | return dst_img 22 | 23 | def darker(self, img): 24 | return self.adjust_brightness(img, 0.8) 25 | 26 | def brighter(self, img): 27 | return self.adjust_brightness(img, 1.3) 28 | 29 | def flip(self, img): 30 | return cv2.flip(img, 1) 31 | 32 | def gasuss_noise(self, img, mean=0, var=0.001): 33 | img = np.array(img / 255, dtype=float) 34 | noise = np.random.normal(mean, var ** 0.5, img.shape) 35 | out = img + noise 36 | if out.min() < 0: 37 | low_clip = -1. 38 | else: 39 | low_clip = 0. 40 | out = np.clip(out, low_clip, 1.0) 41 | out = np.uint8(out * 255) 42 | # cv2.imshow("gasuss", out) 43 | # cv2.waitKey(0) 44 | return out 45 | 46 | def augment(self, img): 47 | darker_img = self.darker(img) 48 | brighter_img = self.brighter(img) 49 | noise_img = self.gasuss_noise(img) 50 | return darker_img, brighter_img, noise_img 51 | 52 | 53 | def augmentate(img_idx_file, aug_img_dir, aug_label_file): 54 | augmenter = Augmenter(aug_img_dir) 55 | 56 | if not os.path.exists(aug_img_dir): 57 | os.mkdir(aug_img_dir) 58 | 59 | num = 1 60 | with open(img_idx_file, 'r') as fr: 61 | lines = fr.readlines() 62 | with open(aug_label_file, 'w+') as fw: 63 | for line in lines: 64 | has_phis = False 65 | if len(line.split(',')) > 2: 66 | img_path, bbox, phis = line.split(',') 67 | has_phis = True 68 | else: 69 | img_path, label = line.split(',') 70 | # print(img_path) 71 | img = cv2.imread(img_path) 72 | 73 | darker_img, brighter_img, noise_img = augmenter.augment(img) 74 | cv2.imwrite(os.path.join(aug_img_dir, 'dark_' + str(num) + '.jpg'), darker_img) 75 | cv2.imwrite(os.path.join(aug_img_dir, 'brighter_' + str(num) + '.jpg'), brighter_img) 76 | cv2.imwrite(os.path.join(aug_img_dir, 'noise_' + str(num) + '.jpg'), noise_img) 77 | if has_phis: 78 | fw.write(os.path.join(aug_img_dir, 'dark_' + str(num) + '.jpg') + ',' + bbox + ',' + phis) 79 | fw.write(os.path.join(aug_img_dir, 'brighter_' + str(num) + '.jpg') + ',' + bbox + ',' + phis) 80 | fw.write(os.path.join(aug_img_dir, 'noise_' + str(num) + '.jpg') + ',' + bbox + ',' + phis) 81 | else: 82 | fw.write(os.path.join(aug_img_dir, 'dark_' + str(num) + '.jpg') + ',' + label) 83 | fw.write(os.path.join(aug_img_dir, 'brighter_' + str(num) + '.jpg') + ',' + label) 84 | fw.write(os.path.join(aug_img_dir, 'noise_' + str(num) + '.jpg') + ',' + label) 85 | num += 1 86 | 87 | #mat_file: COFW_train.mat, COFW_test.mat 88 | #img_token: 'IsTr', 'IsT' 89 | #bbox_token: 'bboxesTr', 'bboxesT' 90 | #phis_token: 'phisTr', 'phisT' 91 | def mat_to_files(mat_file, img_token, bbox_token, phis_token, img_dir, gt_txt_file): 92 | train_mat = h5py.File(mat_file, 'r') 93 | tr_imgs_obj = train_mat[img_token][:] 94 | total_num = tr_imgs_obj.shape[1] 95 | # print(total_num) 96 | 97 | with open(gt_txt_file, "w+") as trf: 98 | for i in range(total_num): 99 | img = train_mat[tr_imgs_obj[0][i]][:] 100 | bbox = train_mat[bbox_token][:] 101 | bbox = np.transpose(bbox)[i] 102 | 103 | img = np.transpose(img) 104 | if not os.path.exists(img_dir): 105 | os.mkdir(img_dir) 106 | 107 | cv2.imwrite(img_dir + "/{}.jpg".format(i), img) 108 | gt = train_mat[phis_token][:] 109 | gt = np.transpose(gt)[i] 110 | 111 | content = img_dir + "/{}.jpg,".format(i) 112 | for k in range(bbox.shape[0]): 113 | content = content + bbox[k].astype(str) + ' ' 114 | content += ',' 115 | for k in range(gt.shape[0]): 116 | content = content + gt[k].astype(str) + ' ' 117 | content += '\n' 118 | trf.write(content) 119 | 120 | 121 | def move_test_to_train(test_gt_txt, train_gt_txt, new_test_txt, new_train_txt, test_num): 122 | shutil.copy(train_gt_txt, new_train_txt) 123 | with open(test_gt_txt, 'r') as t_fp: 124 | test_lines = t_fp.readlines() 125 | with open(new_test_txt, 'w+') as new_t_fp: 126 | with open(new_train_txt, 'a+') as new_tr_fp: 127 | num = 0 128 | for line in test_lines: 129 | num += 1 130 | if num <= test_num: 131 | new_t_fp.write(line) 132 | else: 133 | new_tr_fp.write(line) 134 | 135 | 136 | def crop_face(gt_txt, face_img_dir, show=False): 137 | if not os.path.exists(face_img_dir): 138 | os.mkdir(face_img_dir) 139 | img_num = 1 140 | with open(gt_txt, 'r') as gt_fp: 141 | line = gt_fp.readline() 142 | while line: 143 | img_path, bbox, phis = line.split(',') 144 | # print(img_path) 145 | 146 | img = cv2.imread(img_path) 147 | 148 | phis = phis.strip('\n').strip(' ').split(' ') 149 | phis = [int(float(x)) for x in phis] 150 | 151 | xarr = phis[:29] 152 | yarr = phis[30:58] 153 | min_x = np.min(xarr) 154 | max_x = np.max(xarr) 155 | min_y = np.min(yarr) 156 | max_y = np.max(yarr) 157 | #print(min_x, max_x, min_y, max_y) 158 | Lmax = np.max([max_x - min_x, max_y - min_y]) * 1.15 159 | 160 | delta = Lmax // 2 161 | center_x = (max_x + min_x) // 2 162 | center_y = (max_y + min_y) // 2 163 | x = int(center_x - delta) 164 | y = int(center_y - 0.98 * delta) 165 | endx = int(center_x + delta) 166 | endy = int(center_y + 1.02 * delta) 167 | 168 | if x < 0: x = 0 169 | if y < 0: y = 0 170 | 171 | if endx > img.shape[1]: endx = img.shape[1] 172 | if endy > img.shape[0]: endy = img.shape[0] 173 | 174 | face = img[y: endy, x: endx] 175 | 176 | if show: 177 | cv2.imshow("face", face) 178 | cv2.waitKey(0) 179 | 180 | cv2.imwrite(face_img_dir + "{}.jpg".format(img_num), face) 181 | 182 | line = gt_fp.readline() 183 | img_num += 1 184 | 185 | 186 | def face_label(gt_txt, face_img_dir, face_txt, show=False): 187 | img_num = 1 188 | with open(face_txt, "w+") as face_txt_fp: 189 | with open(gt_txt, 'r') as gt_fp: 190 | line = gt_fp.readline() 191 | while line: 192 | img_path, bbox, phis = line.split(',') 193 | 194 | phis = phis.strip('\n').strip(' ').split(' ') 195 | phis = [int(float(x)) for x in phis] 196 | # print(phis) 197 | 198 | if show: 199 | img = cv2.imread(img_path) 200 | for i in range(29): 201 | cv2.circle(img, (phis[i], phis[i + 29]), 2, (0, 255, 255)) 202 | cv2.putText(img, str(i), (phis[i], phis[i + 29]), cv2.FONT_HERSHEY_COMPLEX,0.3,(0,0,255),1) 203 | cv2.imshow("img", img) 204 | cv2.waitKey(0) 205 | 206 | slot = phis[58:] 207 | label = [1, 0, 0, 0, 0, 0] 208 | # if slot[0] and slot[2] and slot[4] and slot[5]: 209 | # label[1] = 1 # right eyebrow 210 | # label[0] = 0 211 | if slot[16]: # slot[10] or slot[12] or slot[13] or slot[16] or slot[8]: 212 | label[1] = 1 # right eye 213 | label[0] = 0 214 | # if slot[1] and slot[3] and slot[6] and slot[7]: 215 | # label[3] = 1 # left eyebrow 216 | # label[0] = 0 217 | if slot[17]: # slot[11] or slot[14] or slot[15] or slot[17] or slot[9]: 218 | label[2] = 1 # left eye 219 | label[0] = 0 220 | if slot[20]: # slot[18] or slot[19] or slot[20] or slot[21]: 221 | label[3] = 1 # nose 222 | label[0] = 0 223 | if slot[22] or slot[23] or slot[25] or slot[26] or slot[27]: # or slot[24] 224 | label[4] = 1 # mouth 225 | label[0] = 0 226 | if slot[28]: 227 | label[5] = 1 # chin 228 | label[0] = 0 229 | 230 | lab_str = '' 231 | for x in label: 232 | lab_str += str(x) + ' ' 233 | 234 | content = face_img_dir + "{}.jpg".format(img_num) + ',' + lab_str.rstrip(' ') 235 | content += '\n' 236 | # print(content) 237 | face_txt_fp.write(content) 238 | 239 | line = gt_fp.readline() 240 | img_num += 1 241 | 242 | def img_shift(img_size, delta, shift_dir, orig_txt, shift_txt, show=False): 243 | with open(shift_txt, "w+") as shift_fp: 244 | with open(orig_txt, 'r') as orig_fp: 245 | if not os.path.exists(shift_dir): 246 | os.mkdir(shift_dir) 247 | 248 | line = orig_fp.readline() 249 | while line: 250 | # print(line) 251 | 252 | shift_fp.write(line) 253 | 254 | img_path, label = line.split(',') 255 | # print(img_path, label) 256 | 257 | img_name = os.path.basename(img_path) 258 | 259 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 260 | img = cv2.resize(img, (img_size, img_size)) 261 | 262 | crop_size = img_size - delta 263 | shift_type = ['tl_', 'tr_', 'bl_', 'br_', 'ct_'] 264 | 265 | for shift in shift_type: 266 | crop_img = img 267 | if shift == 'tl_': 268 | crop_img = img[0:crop_size, 0:crop_size] 269 | elif shift == 'tr_': 270 | crop_img = img[0:crop_size, delta - 1:-1] 271 | elif shift == 'bl_': 272 | crop_img = img[delta - 1:-1, 0:crop_size] 273 | elif shift == 'br_': 274 | crop_img = img[delta - 1:-1, delta - 1:-1] 275 | elif shift == 'ct_': 276 | crop_img = img[delta // 2:crop_size // 2, delta // 2:crop_size // 2] 277 | 278 | shift_img_path = shift_dir + shift + img_name 279 | cv2.imwrite(shift_img_path, crop_img) 280 | shift_fp.write(shift_img_path + "," + label) 281 | if show: 282 | cv2.imshow(shift, crop_img) 283 | cv2.waitKey(0) 284 | 285 | line = orig_fp.readline() 286 | 287 | 288 | 289 | def add_block_and_crop_face(gt_txt, block_txt, block_dir, rand_num, show=False): 290 | with open(block_txt, "w+") as block_txt_fp: 291 | if not os.path.exists(block_dir): 292 | os.mkdir(block_dir) 293 | num = 1 294 | for i in range(rand_num): 295 | with open(gt_txt, 'r') as gt: 296 | line = gt.readline() 297 | while line: 298 | img_path, bbox, phis = line.split(',') 299 | # print(img_path) 300 | img = cv2.imread(img_path) 301 | 302 | phis = phis.strip('\n').strip(' ').split(' ') 303 | phis = [int(float(x)) for x in phis] 304 | # print(phis) 305 | 306 | slot = phis[58:] 307 | label = [1, 0, 0, 0, 0, 0] 308 | # if slot[0] and slot[2] and slot[4] and slot[5]: 309 | # label[1] = 1 # right eyebrow 310 | # label[0] = 0 311 | if slot[16]: # slot[10] or slot[12] or slot[13] or slot[16]: # slot[8] outer 312 | label[1] = 1 # right eye 313 | label[0] = 0 314 | # if slot[1] and slot[3] and slot[6] and slot[7]: 315 | # label[3] = 1 # left eyebrow 316 | # label[0] = 0 317 | if slot[17]: # slot[11] or slot[14] or slot[15] or slot[17]: # slot[9] outer 318 | label[2] = 1 # left eye 319 | label[0] = 0 320 | if slot[20]: # slot[18] or slot[19] or slot[20] or slot[21]: 321 | label[3] = 1 # nose 322 | label[0] = 0 323 | if slot[22] or slot[23] or slot[25] or slot[26] or slot[27]: # or slot[24] 324 | label[4] = 1 # mouth 325 | label[0] = 0 326 | if slot[28]: 327 | label[5] = 1 # chin 328 | label[0] = 0 329 | 330 | block_img = img 331 | area_num = random.randint(1, 6) 332 | 333 | for i in range(area_num): 334 | value = random.randint(10, 240) 335 | area_idx = random.randint(1, 6) 336 | # print("area_idx:{}".format(area_idx)) 337 | if area_idx == 1: 338 | if label[area_idx] == 0: 339 | label[area_idx] = 1 340 | label[0] = 0 341 | block_img[phis[29 + 12] - 8: phis[29 + 13] + 6, phis[8] - 8: phis[10] + 8] = value 342 | elif area_idx == 2: 343 | if label[area_idx] == 0: 344 | label[area_idx] = 1 345 | label[0] = 0 346 | block_img[phis[29 + 14] - 8: phis[29 + 15] + 6, phis[11] - 8: phis[9] + 8] = value 347 | elif area_idx == 3: 348 | if label[area_idx] == 0: 349 | label[area_idx] = 1 350 | label[0] = 0 351 | block_img[phis[29 + 20] - 20: phis[29 + 21] + 1, phis[18] - 4: phis[19] + 5] = value 352 | elif area_idx == 4: 353 | if label[area_idx] == 0: 354 | label[area_idx] = 1 355 | label[0] = 0 356 | block_img[phis[29 + 24] - 1: phis[29 + 27] + 1, phis[22] - 2: phis[23] + 2] = value 357 | elif area_idx == 5: 358 | if label[area_idx] == 0: 359 | label[area_idx] = 1 360 | label[0] = 0 361 | block_img[phis[29 + 28] - 12: phis[29 + 28] + 15, 362 | phis[28] - 25: phis[28] + 25] = random.randint(160, 255) 363 | else: 364 | if label[1] == 0 and label[2] == 0: 365 | label[1] = 1 366 | label[0] = 0 367 | block_img[phis[29 + 12] - 8: phis[29 + 13] + 6, 368 | phis[8] - 10: phis[10] + 8] = random.randint(0, 50) 369 | label[2] = 1 370 | label[0] = 0 371 | block_img[phis[29 + 14] - 8: phis[29 + 15] + 6, 372 | phis[11] - 10: phis[9] + 10] = random.randint(0, 50) 373 | 374 | img_file = str(num) + ".jpg" 375 | lab_str = '' 376 | for x in label: 377 | lab_str += str(x) + ' ' 378 | content = block_dir + img_file + ',' + lab_str.rstrip(' ') 379 | content += '\n' 380 | block_txt_fp.write(content) 381 | 382 | xarr = phis[:29] 383 | yarr = phis[30:58] 384 | min_x = np.min(xarr) 385 | max_x = np.max(xarr) 386 | min_y = np.min(yarr) 387 | max_y = np.max(yarr) 388 | # print(min_x, max_x, min_y, max_y) 389 | Lmax = np.max([max_x - min_x, max_y - min_y]) * 1.15 390 | 391 | delta = Lmax // 2 392 | center_x = (max_x + min_x) // 2 393 | center_y = (max_y + min_y) // 2 394 | x = int(center_x - delta) 395 | y = int(center_y - 0.98 * delta) 396 | endx = int(center_x + delta) 397 | endy = int(center_y + 1.02 * delta) 398 | 399 | if x < 0: x = 0 400 | if y < 0: y = 0 401 | 402 | # print(img.shape) 403 | if endx > block_img.shape[1]: endx = block_img.shape[1] 404 | if endy > block_img.shape[0]: endy = block_img.shape[0] 405 | 406 | face = block_img[y: endy, x: endx] 407 | 408 | # print(face.shape) 409 | if not face.shape[0] > 0: 410 | raise Exception("face size error") 411 | 412 | cv2.imwrite(block_dir + img_file, face) 413 | if show: 414 | cv2.imshow("face", face) 415 | cv2.waitKey(0) 416 | 417 | line = gt.readline() 418 | num += 1 419 | 420 | 421 | def label_spec_face(label_file, file_root): 422 | labels = '1 0 0 0 0 0' 423 | with open(label_file, 'w+') as tlf: 424 | for root, dirs, files in os.walk(file_root): 425 | for f in files: 426 | img_path = file_root + f 427 | content = img_path + ',' + labels + '\n' 428 | # print(content) 429 | tlf.write(content) 430 | 431 | 432 | def merge_txt(txt1, txt2, merge_txt): 433 | with open(merge_txt, 'w+') as trf: 434 | with open(txt1, 'r') as fp: 435 | line = fp.readline() 436 | while line: 437 | trf.write(line) 438 | line = fp.readline() 439 | with open(txt2, 'r') as fp: 440 | line = fp.readline() 441 | while line: 442 | trf.write(line) 443 | line = fp.readline() 444 | 445 | 446 | def split_train_file(merge_train_txt, train_txt, val_txt, train_ratio): 447 | with open(merge_train_txt, 'r') as fp: 448 | lines_list = fp.readlines() 449 | 450 | # random.shuffle(lines_list) 451 | total = len(lines_list) 452 | train_num = math.floor(total * train_ratio) 453 | val_num = total - train_num 454 | count = 0 455 | data = [] 456 | for line in lines_list: 457 | # print(line) 458 | count += 1 459 | data.append(line) 460 | if count == train_num: 461 | with open(train_txt, "w+") as trainf: 462 | random.shuffle(data) 463 | for d in data: 464 | trainf.write(d) 465 | data = [] 466 | 467 | if count == train_num + val_num: 468 | with open(val_txt, "w+") as valf: 469 | random.shuffle(data) 470 | for d in data: 471 | valf.write(d) 472 | data = [] 473 | print("train_num:{}, val_num:{}".format(train_num, val_num)) 474 | 475 | 476 | def prepare_data(data_root): 477 | # 1. Get original txt 478 | print("1. Generate original data index file...") 479 | if not os.path.exists(data_root + "glass_face.txt"): 480 | label_spec_face(data_root + "glass_face.txt", 481 | data_root + "glass_face/") 482 | 483 | if not os.path.exists(data_root + "beard_face.txt"): 484 | label_spec_face(data_root + "beard_face.txt", 485 | data_root + "beard_face/") 486 | 487 | if not os.path.exists(data_root + "train_ground_true.txt"): 488 | mat_to_files(data_root + "COFW_train.mat", 489 | 'IsTr', 'bboxesTr', 'phisTr', 490 | data_root + "train", 491 | data_root + "train_ground_true.txt") 492 | 493 | if not os.path.exists(data_root + "test_ground_true.txt"): 494 | mat_to_files(data_root + "COFW_test.mat", 495 | 'IsT', 'bboxesT', 'phisT', 496 | data_root + "test", 497 | data_root + "test_ground_true.txt") 498 | 499 | if not os.path.exists(data_root + "train_gt.txt"): 500 | move_test_to_train(data_root + "test_ground_true.txt", 501 | data_root + "train_ground_true.txt", 502 | data_root + "test_gt.txt", 503 | data_root + "train_gt.txt", 504 | 100) 505 | 506 | print("2. Generate augmented train & test faces and label...") 507 | if not os.path.exists(data_root + "aug_train_gt.txt"): 508 | augmentate(data_root + "train_gt.txt", 509 | data_root + "aug_train/", 510 | data_root + "aug_train_gt.txt") 511 | 512 | if not os.path.exists(data_root + "aug_test_gt.txt"): 513 | augmentate(data_root + "test_gt.txt", 514 | data_root + "aug_test/", 515 | data_root + "aug_test_gt.txt") 516 | 517 | if not os.path.exists(data_root + 'spec_faces.txt'): 518 | merge_txt(data_root + 'beard_face.txt', data_root + 'glass_face.txt', data_root + 'spec_faces.txt') 519 | 520 | if not os.path.exists(data_root + "aug_spec_faces.txt"): 521 | augmentate(data_root + 'spec_faces.txt', 522 | data_root + 'aug_spec_faces/', 523 | data_root + 'aug_spec_faces.txt') 524 | 525 | print("3. Add block to orig and augmented img and crop faces...") 526 | if not os.path.exists(data_root + "merge_train_gt.txt"): 527 | merge_txt(data_root + "train_gt.txt", data_root + "aug_train_gt.txt", data_root + "merge_train_gt.txt") 528 | 529 | if not os.path.exists(data_root + "merge_test_gt.txt"): 530 | merge_txt(data_root + "test_gt.txt", data_root + "aug_test_gt.txt", data_root + "merge_test_gt.txt") 531 | 532 | if not os.path.exists(data_root + "face_train_block.txt"): 533 | add_block_and_crop_face(data_root + "merge_train_gt.txt", 534 | data_root + "face_train_block.txt", 535 | data_root + "face_train_block/", 536 | 16, False) 537 | 538 | # 4. Crop face and get face txt 539 | print("4. Crop orig train & test faces and get label...") 540 | if not os.path.exists(data_root + "face_train/"): 541 | crop_face(data_root + "merge_train_gt.txt", 542 | data_root + "face_train/") 543 | if not os.path.exists(data_root + "face_train.txt"): 544 | face_label(data_root + "merge_train_gt.txt", 545 | data_root + "face_train/", 546 | data_root + "face_train.txt", False) 547 | 548 | if not os.path.exists(data_root + "face_test/"): 549 | crop_face(data_root +"merge_test_gt.txt", 550 | data_root + "face_test/") 551 | if not os.path.exists(data_root + "face_test.txt"): 552 | face_label(data_root + "merge_test_gt.txt", 553 | data_root + "face_test/", 554 | data_root + "face_test.txt") 555 | 556 | # 5. merge spec face txt with face train txt 557 | print("5. Merge spec face txt with face train txt...") 558 | if not os.path.exists(data_root + "spec_face.txt"): 559 | merge_txt(data_root + "spec_faces.txt", data_root + "aug_spec_faces.txt", data_root + "orig_aug_spec_faces.txt") 560 | if not os.path.exists(data_root + "merge_face.txt"): 561 | merge_txt(data_root + "orig_aug_spec_faces.txt", data_root + "face_train.txt", data_root + "merge_face.txt") 562 | 563 | #6. merge all faces to merge_train.txt 564 | print("6. Merge all faces to merge_train.txt...") 565 | if not os.path.exists(data_root + 'merge_train.txt'): 566 | merge_txt(data_root + 'merge_face.txt', 567 | data_root + 'face_train_block.txt', 568 | data_root + 'merge_train.txt') 569 | 570 | #7. get train, val and test 571 | print("7. Generate train, val and test...") 572 | if not os.path.exists(data_root + "train.txt"): 573 | split_train_file(data_root + "merge_train.txt", 574 | data_root + "train.txt", 575 | data_root + "val.txt", 576 | 0.85) 577 | 578 | if not os.path.exists(data_root + "test.txt"): 579 | shutil.copy(data_root + 'face_test.txt', 580 | data_root + 'test.txt') 581 | print("Done!") 582 | 583 | def parse_args(): 584 | parser = argparse.ArgumentParser() 585 | parser.register("type", "bool", lambda v: v.lower() == "true") 586 | 587 | parser.add_argument("--data_dir", type=str, default="./data/cofw/", help="Data root directory") 588 | return parser.parse_known_args() 589 | 590 | if __name__ == '__main__': 591 | FLAGS, unparsed = parse_args() 592 | 593 | if not len(FLAGS.data_dir): 594 | raise Exception("Please set data root directory via --data_dir") 595 | 596 | prepare_data(FLAGS.data_dir) 597 | --------------------------------------------------------------------------------