├── README.md
├── camera_tester.py
├── datasets.py
├── main.py
├── models.py
└── prepare_data_cofw.py
/README.md:
--------------------------------------------------------------------------------
1 | # Face-Occlusion-Detect
2 | A simple CNN face occlusion detect implemented with Tensorflow Keras.
3 | Detailed design doc, please reference to:
4 |
5 | 
6 |
7 | ### Depencies
8 | ```
9 | dlib >= 19.17.0
10 | tensorflow >= 1.12.0
11 | keras >= 2.2.4
12 | numpy >= 1.11.1
13 | scipy >= 0.14
14 | opencv-python >= 3.4.3
15 | ```
16 | ### Usage
17 | ##### 1. Data and trained models download
18 | download link: [https://pan.baidu.com/s/10LvoXEUGMTZjufd7R8jh4A](https://pan.baidu.com/s/10LvoXEUGMTZjufd7R8jh4A)
19 | code: 0p5j
20 | Download Cofw dataset and pretrained models in source code directory
21 |
22 | ##### 2. Train
23 | (1) prepare data
24 | ```
25 | python prepare_data_cofw.py --data_dir 'cofw data directory"
26 | ```
27 | (2) train
28 | ```
29 | python main.py --proj_dir \
30 | --proj_dir "./" \ #Project directory
31 | --input_size 96 \ #Input image size to train
32 | --batch_size 100 \ #train batch size
33 | --fine_tune False \ #Finetune VGG16 or not
34 | --epochs 100 \ #Train epochs
35 | --train True\ #Train or test
36 | ```
37 | ##### 3. Test
38 | (1) test on test_data
39 | ```
40 | python main.py --proj_dir \
41 | --proj_dir "./" \ #Project directory
42 | --input_size 96 \ #Input image size to train
43 | --fine_tune False \ #Finetune VGG16 or not
44 | --train False\ #Train or test
45 | ```
46 | (2) test on camera video stream data
47 | Need a camera device
48 | ```
49 | python main.py --proj_dir \
50 | --proj_dir "./" \ # Project directory
51 | --input_size 96 \ # Input image size to train
52 | --fine_tune False \ # Finetune VGG16 or not
53 | --camera_test True
54 | ```
55 |
56 |
--------------------------------------------------------------------------------
/camera_tester.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dlib
3 | from imutils import face_utils
4 | import cv2
5 | import numpy as np
6 |
7 | class CameraTester():
8 | def __init__(self, net=None, input_size=96, fine_tune=False, face_landmark_path='./model/shape_predictor_68_face_landmarks.dat'):
9 | self.cap = cv2.VideoCapture(0)
10 | if not self.cap.isOpened():
11 | raise Exception("Unable to connect to camera.")
12 | self.detector = dlib.get_frontal_face_detector()
13 | self.predictor = dlib.shape_predictor(face_landmark_path)
14 | self.net = net
15 | self.input_size = input_size
16 | self.fine_tune = fine_tune
17 |
18 | def crop_face(self, shape, img, input_size):
19 | x = []
20 | y = []
21 | for (_x, _y) in shape:
22 | x.append(_x)
23 | y.append(_y)
24 |
25 | max_x = min(max(x), img.shape[1])
26 | min_x = max(min(x), 0)
27 | max_y = min(max(y), img.shape[0])
28 | min_y = max(min(y), 0)
29 |
30 | Lx = max_x - min_x
31 | Ly = max_y - min_y
32 | Lmax = int(max(Lx, Ly)) * 1.15
33 | delta = Lmax // 2
34 |
35 | center_x = (max(x) + min(x)) // 2
36 | center_y = (max(y) + min(y)) // 2
37 | start_x = int(center_x - delta)
38 | start_y = int(center_y - 0.98 * delta)
39 | end_x = int(center_x + delta)
40 | end_y = int(center_y + 1.02 * delta)
41 |
42 | start_y = 0 if start_y < 0 else start_y
43 | start_x = 0 if start_x < 0 else start_x
44 | end_x = img.shape[1] if end_x > img.shape[1] else end_x
45 | end_y = img.shape[0] if end_y > img.shape[0] else end_y
46 |
47 | crop_face = img[start_y:end_y, start_x:end_x]
48 | # print(crop_face.shape)
49 | crop_face = cv2.cvtColor(crop_face, cv2.COLOR_RGB2GRAY)
50 | # cv2.imshow("crop face", crop_face)
51 | # cv2.waitKey(0)
52 | crop_face = cv2.resize(crop_face, (input_size, input_size)) / 255
53 | channel = 3 if self.fine_tune else 1
54 | crop_face = np.resize(crop_face, (input_size, input_size, channel))
55 | return crop_face, start_y, end_y, start_x, end_x
56 |
57 | def get_area(self, shape, idx):
58 | #[[x, y], radius]
59 | left_eye = [(shape[42] + shape[45]) // 2, abs(shape[45][0] - shape[42][0])]
60 | right_eye = [(shape[36] + shape[39]) // 2, abs(shape[39][0] - shape[36][0])]
61 | nose = [shape[30], int(abs(shape[31][0] - shape[35][0]) / 1.5)]
62 | mouth = [(shape[48] + shape[54]) // 2, abs(shape[48][0] - shape[54][0]) // 2]
63 | chin = [shape[8], nose[1]]
64 | area = [None, right_eye, left_eye, nose, mouth, chin]
65 | block_area = [x for i, x in enumerate(area) if i in idx]
66 | return block_area
67 |
68 | def draw_occlusion_area(self, img, shape, idx):
69 | area = self.get_area(shape, idx)
70 | for k, v in enumerate(area):
71 | if v:
72 | cv2.circle(img, tuple(v[0]), v[1], (0, 255, 0))
73 |
74 | def run(self):
75 | frames = []
76 |
77 | while self.cap.isOpened():
78 | ret, frame = self.cap.read()
79 | if ret:
80 | face_rects = self.detector(frame, 0)
81 |
82 | if len(face_rects) > 0:
83 | shape = self.predictor(frame, face_rects[0])
84 | shape = face_utils.shape_to_np(shape)
85 |
86 | input_img, start_y, end_y, start_x, end_x = self.crop_face(shape, frame, self.input_size)
87 |
88 | cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), thickness=2)
89 |
90 | frames.append({'fod_input': input_img})
91 | if len(frames) == 1:
92 | pred = self.net.test_online(frames)
93 |
94 | # print(pred)
95 | idx = [i for i, x in enumerate(pred[0]) if x > 0.8]
96 | frames = []
97 | # print(idx)
98 | if len(idx):
99 | self.draw_occlusion_area(frame, shape, idx)
100 | else:
101 | print("No face detect")
102 |
103 | cv2.imshow("frame", frame)
104 | if cv2.waitKey(1) & 0xFF == ord('q'):
105 | break
106 |
107 | if __name__ == '__main__':
108 | camera_tester= CameraTester()
109 | camera_tester.run()
110 |
--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import cv2
4 | import random
5 | import tensorflow as tf
6 |
7 |
8 | class DataSet:
9 | def __init__(self, proj_dir, data_dir, batch_size=64, input_size=64, fine_tune=False):
10 | self.proj_dir = proj_dir
11 | self.data_dir = os.path.join(proj_dir, data_dir)
12 | self.batch_size = batch_size
13 | self.input_size = input_size
14 | self.fine_tune = fine_tune
15 | self.__train_num, self.__val_num, self.__test_num = self.__get_samples_num(os.path.join(self.data_dir, 'train.txt'),
16 | os.path.join(self.data_dir, 'val.txt'),
17 | os.path.join(self.data_dir, 'test.txt'))
18 |
19 | def __get_samples_num(self, train_label_file, val_label_file, test_label_file):
20 | train_num = 0
21 | val_num = 0
22 | test_num = 0
23 | if not os.path.exists(train_label_file) or \
24 | not os.path.exists(val_label_file) or \
25 | not os.path.exists(test_label_file):
26 | return train_num, val_num, test_num
27 |
28 | with open(train_label_file) as f:
29 | train_num = len(f.readlines())
30 | with open(val_label_file) as f:
31 | val_num = len(f.readlines())
32 | with open(test_label_file) as f:
33 | test_num = len(f.readlines())
34 | return train_num, val_num, test_num
35 |
36 |
37 | def __load_input_img(self, proj_dir, file_name, fine_tune=False):
38 | img_path = os.path.join(proj_dir, file_name)
39 |
40 | # print(img_path)
41 | if fine_tune:
42 | img = cv2.imread(img_path)
43 | else:
44 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
45 | # print(img)
46 | img = cv2.resize(img, (self.input_size, self.input_size)) / 255
47 |
48 | return img
49 |
50 | def load_input_imgpath_label(self, file_name, labels_num=1, shuffle=True):
51 | imgpath = []
52 | labels = []
53 |
54 | with open(os.path.join(self.data_dir, file_name)) as f:
55 | lines_list = f.readlines()
56 | if shuffle:
57 | random.shuffle(lines_list)
58 |
59 | for lines in lines_list:
60 | line = lines.rstrip().split(',')
61 | label = []
62 | if labels_num == 1:
63 | label = int(line[1])
64 | else:
65 | lab = line[1].split(' ')
66 | for i in range(labels_num):
67 | label.append(int(lab[i]))
68 | imgpath.append(line[0])
69 | labels.append(label)
70 | return np.array(imgpath), np.array(labels)
71 |
72 | def train_num(self):
73 | return self.__train_num
74 |
75 | def val_num(self):
76 | return self.__val_num
77 |
78 | def test_num(self):
79 | return self.__test_num
80 |
81 | def load_batch_data_label(self, filename_list, label_list, label_num=1, shuffle=True):
82 | file_num = len(filename_list)
83 | if shuffle:
84 | idx = np.random.permutation(range(file_num))
85 | filename_list = filename_list[idx]
86 | label_list = label_list[idx]
87 | max_num = file_num - (file_num % self.batch_size)
88 | for i in range(0, max_num, self.batch_size):
89 | batch_x = []
90 | batch_y = []
91 | for j in range(self.batch_size):
92 | img = self.__load_input_img(self.proj_dir, filename_list[i + j], self.fine_tune)
93 | if not self.fine_tune:
94 | img = np.resize(img, (self.input_size, self.input_size, 1))
95 | label = label_list[i + j]
96 | batch_x.append(img)
97 | batch_y.append(label)
98 | batch_x = np.array(batch_x, dtype=np.float32)
99 | if label_num == 1:
100 | batch_y = tf.keras.utils.to_categorical(batch_y, 7)
101 | else:
102 | batch_y = np.array(batch_y)
103 | if shuffle:
104 | idx = np.random.permutation(range(self.batch_size))
105 | batch_x = batch_x[idx]
106 | batch_y = batch_y[idx]
107 | yield batch_x, batch_y
108 |
109 |
110 | class Cofw(DataSet):
111 | def __init__(self, proj_dir, data_dir, batch_size=64, input_size=64, class_num=2, fine_tune=False):
112 | DataSet.__init__(self, proj_dir, data_dir, batch_size, input_size, fine_tune)
113 | self.class_num = class_num
114 |
115 | print("fod train_num:%d" % self.train_num())
116 | print("fod val_num:%d" % self.val_num())
117 | print("fod test_num:%d" % self.test_num())
118 |
119 | def data_generator(self, input_name_list, output_name_list, label_file_name='train.txt', shuffle=True):
120 | fod_filenames, fod_labels = self.load_input_imgpath_label(label_file_name, labels_num=self.class_num, shuffle=shuffle)
121 | while True:
122 | fod_generator = self.load_batch_data_label(fod_filenames, fod_labels, label_num=self.class_num, shuffle=shuffle)
123 | fod_batch_x, fod_batch_y = next(fod_generator)
124 |
125 | yield ({input_name_list[0]: fod_batch_x},
126 | {output_name_list[0]: fod_batch_y})
127 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.platform import app
3 | import os
4 | import numpy as np
5 | import cv2
6 | import argparse
7 | import sys
8 | import datasets
9 | import models
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser()
13 | parser.register("type", "bool", lambda v: v.lower() == "true")
14 |
15 | parser.add_argument("--proj_dir", type=str, default="./", help="Project directory")
16 | parser.add_argument("--input_size", type=int, default=96, help="Input image size")
17 | parser.add_argument("--batch_size", type=int, default=100, help="Batch size.")
18 | parser.add_argument("--fine_tune", type=bool, default=False, help="Fine tune based on Vgg16.")
19 | parser.add_argument("--train", type=bool, default=False, help="Train or test.")
20 | parser.add_argument("--epochs", type=int, default=100, help="Train epochs")
21 | parser.add_argument("--camera_test", type=bool, default=True, help="Camera video stream test. Need a camera device")
22 |
23 | return parser.parse_known_args()
24 |
25 | def main(unused_args):
26 | if not len(FLAGS.proj_dir):
27 | raise Exception("Please set project directory")
28 |
29 | MODEL_DIR = os.path.join(FLAGS.proj_dir, 'model/')
30 | LOG_DIR = os.path.join(FLAGS.proj_dir, 'log/')
31 |
32 | FOD_CLASS_NAMES = ['normal', 'right_eye', 'left_eye', 'nose', 'mouth', 'chin']
33 | CLASS_NUM = len(FOD_CLASS_NAMES)
34 |
35 |
36 | dataset = datasets.Cofw(proj_dir=FLAGS.proj_dir, data_dir='data/cofw/', batch_size=FLAGS.batch_size,
37 | input_size=FLAGS.input_size, class_num=CLASS_NUM,
38 | fine_tune=FLAGS.fine_tune)
39 |
40 | net = models.FodNet(dataset, CLASS_NUM, batch_size=FLAGS.batch_size,
41 | input_size=FLAGS.input_size, fine_tune=FLAGS.fine_tune,
42 | fine_tune_model_file=os.path.join(MODEL_DIR, 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'))
43 |
44 | net.fit(MODEL_DIR + 'fod_model.h5', MODEL_DIR, LOG_DIR,
45 | max_epoches=FLAGS.epochs,
46 | train=FLAGS.train)
47 |
48 | if not FLAGS.camera_test:
49 | net.predict()
50 | else:
51 | import camera_tester
52 | tester = camera_tester.CameraTester(net, FLAGS.input_size, FLAGS.fine_tune,
53 | os.path.join(MODEL_DIR, 'shape_predictor_68_face_landmarks.dat'))
54 | tester.run()
55 |
56 | if __name__ == '__main__':
57 | FLAGS, unparsed = parse_args()
58 | app.run(main=main, argv=[sys.argv[0]] + unparsed)
59 |
60 |
61 |
--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | import numpy as np
4 | import cv2
5 | import heapq
6 |
7 | # import tensorflow.contrib.eager as tfe
8 | # tfe.enable_eager_execution()
9 |
10 | # np.set_printoptions(threshold=np.nan)
11 |
12 | EPOCHS = 25
13 |
14 | class FodNet:
15 | def __init__(self, dataset, class_num, batch_size, input_size, fine_tune=True, fine_tune_model_file='imagenet'):
16 | self.class_num = class_num
17 | self.batch_size = batch_size
18 | self.input_size = input_size
19 | self.dataset = dataset
20 | self.fine_tune_model_file = fine_tune_model_file
21 | if fine_tune:
22 | self.model = self.fine_tune_model()
23 | else:
24 | self.model = self.__create_model()
25 |
26 | def __base_model(self, inputs):
27 |
28 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
29 | feature = tf.keras.layers.BatchNormalization()(feature)
30 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature)
31 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(feature)
32 | feature = tf.keras.layers.BatchNormalization()(feature)
33 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature)
34 | feature = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2))(feature)
35 |
36 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(feature)
37 | feature = tf.keras.layers.BatchNormalization()(feature)
38 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature)
39 | feature = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(feature)
40 | feature = tf.keras.layers.BatchNormalization()(feature)
41 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature)
42 | feature = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2))(feature)
43 |
44 | return feature
45 |
46 | def __dense(self, feature):
47 | feature = tf.keras.layers.Flatten()(feature)
48 | feature = tf.keras.layers.Dense(units=128)(feature)
49 | feature = tf.keras.layers.BatchNormalization()(feature)
50 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature)
51 | feature = tf.keras.layers.Dropout(0.5)(feature)
52 | feature = tf.keras.layers.Dense(units=256)(feature)
53 | feature = tf.keras.layers.BatchNormalization()(feature)
54 | feature = tf.keras.layers.Activation(activation=tf.nn.relu)(feature)
55 | feature = tf.keras.layers.Dropout(0.5)(feature)
56 | return feature
57 |
58 | def __create_model(self):
59 | input_fod = tf.keras.layers.Input(name='fod_input', shape=(self.input_size, self.input_size, 1))
60 |
61 | feature_fod = self.__base_model(input_fod)
62 | feature_fod = self.__dense(feature_fod)
63 |
64 | output_fod = tf.keras.layers.Dense(name='fod_output', units=self.class_num, activation=tf.nn.sigmoid)(feature_fod)
65 |
66 | model = tf.keras.Model(inputs=[input_fod], outputs=[output_fod])
67 |
68 | losses = {
69 | 'fod_output': 'binary_crossentropy',
70 | }
71 |
72 | model.compile(optimizer=tf.train.AdamOptimizer(),
73 | loss=losses,
74 | metrics=['accuracy'])
75 |
76 | return model
77 |
78 | def __extract_output(self, model, name, input):
79 | model._name = name
80 | for layer in model.layers:
81 | layer.trainable = True
82 | return model(input)
83 |
84 | def fine_tune_model(self):
85 | input_fod = tf.keras.layers.Input(name='fod_input', shape=(self.input_size, self.input_size, 3))
86 |
87 | # resnet_fod = tf.keras.applications.ResNet50(weights='imagenet', include_top=False)
88 | # feature_fod = self.__extract_output(resnet_fod, 'resnet_fod', input_fod)
89 |
90 | vgg16_fod = tf.keras.applications.VGG16(weights=self.fine_tune_model_file, include_top=False)
91 | feature_fod = self.__extract_output(vgg16_fod, 'vgg16_fod', input_fod)
92 |
93 | feature_fod = self.__dense(feature_fod)
94 | output_fod = tf.keras.layers.Dense(name='fod_output', units=self.class_num, activation=tf.nn.sigmoid)(feature_fod)
95 |
96 | model = tf.keras.Model(inputs=[input_fod], outputs=[output_fod])
97 |
98 | losses = {
99 | 'fod_output': 'binary_crossentropy',
100 | }
101 |
102 | model.compile(optimizer=tf.train.AdamOptimizer(),
103 | loss=losses,
104 | metrics=['accuracy'])
105 | return model
106 |
107 | def fit(self, model_file, checkpoint_dir, log_dir, max_epoches=EPOCHS, train=True):
108 | self.model.summary()
109 |
110 | if not train:
111 | self.model.load_weights(model_file)
112 | else:
113 | cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_dir,
114 | save_weights_only=True,
115 | save_best_only=True,
116 | period=1,
117 | verbose=1)
118 | earlystop_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
119 | mode='min',
120 | min_delta=0.001,
121 | patience=1,
122 | verbose=1)
123 |
124 | tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
125 |
126 | input_name_list = ['fod_input']
127 | output_name_list = ['fod_output']
128 | self.model.fit_generator(generator=self.dataset.data_generator(input_name_list, output_name_list, 'train.txt'),
129 | epochs=max_epoches,
130 | steps_per_epoch=self.dataset.train_num() // self.batch_size,
131 | validation_data=self.dataset.data_generator(input_name_list, output_name_list, 'val.txt'),
132 | validation_steps=self.dataset.val_num() // self.batch_size,
133 | callbacks=[cp_callback, earlystop_cb, tb_callback],
134 | max_queue_size=10,
135 | workers=1,
136 | verbose=1)
137 |
138 | self.model.save(model_file)
139 |
140 | def predict(self):
141 | input_name_list = ['fod_input']
142 | output_name_list = ['fod_output']
143 | predictions = self.model.predict_generator(generator=self.dataset.data_generator(input_name_list, output_name_list, 'test.txt', shuffle=False),
144 | steps=self.dataset.test_num() // self.batch_size,
145 | verbose=1)
146 | if len(predictions) > 0:
147 | fod_preds = predictions
148 | # print(fod_preds)
149 | test_data = self.dataset.data_generator(input_name_list, output_name_list, 'test.txt', shuffle=False)
150 | correct = 0
151 | steps = self.dataset.test_num() // self.batch_size
152 | total = steps * self.batch_size
153 |
154 | for step in range(steps):
155 | _, test_batch_y = next(test_data)
156 | fod_real_batch = test_batch_y['fod_output']
157 | for i, fod_real in enumerate(fod_real_batch):
158 | fod_real = fod_real.tolist()
159 | one_num = fod_real.count(1)
160 | fod_pred_idxs = sorted(list(map(fod_preds[self.batch_size * step + i].tolist().index,
161 | heapq.nlargest(one_num, fod_preds[self.batch_size * step + i]))))
162 | fod_real_idxs = [i for i,x in enumerate(fod_real) if x == 1]
163 | # print(fod_pred_idxs)
164 | # print(fod_real_idxs)
165 | if fod_real_idxs == fod_pred_idxs:
166 | correct += 1
167 |
168 | print("fod==> correct:{}, total:{}, correct_rate:{}".format(correct, total, 1.0 * correct / total))
169 | return predictions
170 |
171 | def test_online(self, face_imgs):
172 | batch_x = np.array(face_imgs[0]['fod_input'], dtype=np.float32)
173 | batch_x = np.expand_dims(batch_x, 0)
174 |
175 | predictions = self.model.predict({'fod_input': batch_x}, batch_size=1)
176 | # predictions = np.asarray(predictions)
177 | return predictions
178 |
--------------------------------------------------------------------------------
/prepare_data_cofw.py:
--------------------------------------------------------------------------------
1 | import h5py
2 | import numpy as np
3 | import cv2
4 | import random
5 | import os
6 | import math
7 | import shutil
8 | import argparse
9 |
10 |
11 | class Augmenter:
12 | def __init__(self, save_dir="."):
13 | self.save_dir = save_dir
14 |
15 | def adjust_brightness(self, img, a=1.0, g=8):
16 | h, w, c = img.shape
17 | mask = np.zeros([h, w, c], img.dtype)
18 | dst_img = cv2.addWeighted(img, a, mask, 1 - a, g)
19 | # cv2.imshow("dst_Img", dst_img)
20 | # cv2.waitKey(0)
21 | return dst_img
22 |
23 | def darker(self, img):
24 | return self.adjust_brightness(img, 0.8)
25 |
26 | def brighter(self, img):
27 | return self.adjust_brightness(img, 1.3)
28 |
29 | def flip(self, img):
30 | return cv2.flip(img, 1)
31 |
32 | def gasuss_noise(self, img, mean=0, var=0.001):
33 | img = np.array(img / 255, dtype=float)
34 | noise = np.random.normal(mean, var ** 0.5, img.shape)
35 | out = img + noise
36 | if out.min() < 0:
37 | low_clip = -1.
38 | else:
39 | low_clip = 0.
40 | out = np.clip(out, low_clip, 1.0)
41 | out = np.uint8(out * 255)
42 | # cv2.imshow("gasuss", out)
43 | # cv2.waitKey(0)
44 | return out
45 |
46 | def augment(self, img):
47 | darker_img = self.darker(img)
48 | brighter_img = self.brighter(img)
49 | noise_img = self.gasuss_noise(img)
50 | return darker_img, brighter_img, noise_img
51 |
52 |
53 | def augmentate(img_idx_file, aug_img_dir, aug_label_file):
54 | augmenter = Augmenter(aug_img_dir)
55 |
56 | if not os.path.exists(aug_img_dir):
57 | os.mkdir(aug_img_dir)
58 |
59 | num = 1
60 | with open(img_idx_file, 'r') as fr:
61 | lines = fr.readlines()
62 | with open(aug_label_file, 'w+') as fw:
63 | for line in lines:
64 | has_phis = False
65 | if len(line.split(',')) > 2:
66 | img_path, bbox, phis = line.split(',')
67 | has_phis = True
68 | else:
69 | img_path, label = line.split(',')
70 | # print(img_path)
71 | img = cv2.imread(img_path)
72 |
73 | darker_img, brighter_img, noise_img = augmenter.augment(img)
74 | cv2.imwrite(os.path.join(aug_img_dir, 'dark_' + str(num) + '.jpg'), darker_img)
75 | cv2.imwrite(os.path.join(aug_img_dir, 'brighter_' + str(num) + '.jpg'), brighter_img)
76 | cv2.imwrite(os.path.join(aug_img_dir, 'noise_' + str(num) + '.jpg'), noise_img)
77 | if has_phis:
78 | fw.write(os.path.join(aug_img_dir, 'dark_' + str(num) + '.jpg') + ',' + bbox + ',' + phis)
79 | fw.write(os.path.join(aug_img_dir, 'brighter_' + str(num) + '.jpg') + ',' + bbox + ',' + phis)
80 | fw.write(os.path.join(aug_img_dir, 'noise_' + str(num) + '.jpg') + ',' + bbox + ',' + phis)
81 | else:
82 | fw.write(os.path.join(aug_img_dir, 'dark_' + str(num) + '.jpg') + ',' + label)
83 | fw.write(os.path.join(aug_img_dir, 'brighter_' + str(num) + '.jpg') + ',' + label)
84 | fw.write(os.path.join(aug_img_dir, 'noise_' + str(num) + '.jpg') + ',' + label)
85 | num += 1
86 |
87 | #mat_file: COFW_train.mat, COFW_test.mat
88 | #img_token: 'IsTr', 'IsT'
89 | #bbox_token: 'bboxesTr', 'bboxesT'
90 | #phis_token: 'phisTr', 'phisT'
91 | def mat_to_files(mat_file, img_token, bbox_token, phis_token, img_dir, gt_txt_file):
92 | train_mat = h5py.File(mat_file, 'r')
93 | tr_imgs_obj = train_mat[img_token][:]
94 | total_num = tr_imgs_obj.shape[1]
95 | # print(total_num)
96 |
97 | with open(gt_txt_file, "w+") as trf:
98 | for i in range(total_num):
99 | img = train_mat[tr_imgs_obj[0][i]][:]
100 | bbox = train_mat[bbox_token][:]
101 | bbox = np.transpose(bbox)[i]
102 |
103 | img = np.transpose(img)
104 | if not os.path.exists(img_dir):
105 | os.mkdir(img_dir)
106 |
107 | cv2.imwrite(img_dir + "/{}.jpg".format(i), img)
108 | gt = train_mat[phis_token][:]
109 | gt = np.transpose(gt)[i]
110 |
111 | content = img_dir + "/{}.jpg,".format(i)
112 | for k in range(bbox.shape[0]):
113 | content = content + bbox[k].astype(str) + ' '
114 | content += ','
115 | for k in range(gt.shape[0]):
116 | content = content + gt[k].astype(str) + ' '
117 | content += '\n'
118 | trf.write(content)
119 |
120 |
121 | def move_test_to_train(test_gt_txt, train_gt_txt, new_test_txt, new_train_txt, test_num):
122 | shutil.copy(train_gt_txt, new_train_txt)
123 | with open(test_gt_txt, 'r') as t_fp:
124 | test_lines = t_fp.readlines()
125 | with open(new_test_txt, 'w+') as new_t_fp:
126 | with open(new_train_txt, 'a+') as new_tr_fp:
127 | num = 0
128 | for line in test_lines:
129 | num += 1
130 | if num <= test_num:
131 | new_t_fp.write(line)
132 | else:
133 | new_tr_fp.write(line)
134 |
135 |
136 | def crop_face(gt_txt, face_img_dir, show=False):
137 | if not os.path.exists(face_img_dir):
138 | os.mkdir(face_img_dir)
139 | img_num = 1
140 | with open(gt_txt, 'r') as gt_fp:
141 | line = gt_fp.readline()
142 | while line:
143 | img_path, bbox, phis = line.split(',')
144 | # print(img_path)
145 |
146 | img = cv2.imread(img_path)
147 |
148 | phis = phis.strip('\n').strip(' ').split(' ')
149 | phis = [int(float(x)) for x in phis]
150 |
151 | xarr = phis[:29]
152 | yarr = phis[30:58]
153 | min_x = np.min(xarr)
154 | max_x = np.max(xarr)
155 | min_y = np.min(yarr)
156 | max_y = np.max(yarr)
157 | #print(min_x, max_x, min_y, max_y)
158 | Lmax = np.max([max_x - min_x, max_y - min_y]) * 1.15
159 |
160 | delta = Lmax // 2
161 | center_x = (max_x + min_x) // 2
162 | center_y = (max_y + min_y) // 2
163 | x = int(center_x - delta)
164 | y = int(center_y - 0.98 * delta)
165 | endx = int(center_x + delta)
166 | endy = int(center_y + 1.02 * delta)
167 |
168 | if x < 0: x = 0
169 | if y < 0: y = 0
170 |
171 | if endx > img.shape[1]: endx = img.shape[1]
172 | if endy > img.shape[0]: endy = img.shape[0]
173 |
174 | face = img[y: endy, x: endx]
175 |
176 | if show:
177 | cv2.imshow("face", face)
178 | cv2.waitKey(0)
179 |
180 | cv2.imwrite(face_img_dir + "{}.jpg".format(img_num), face)
181 |
182 | line = gt_fp.readline()
183 | img_num += 1
184 |
185 |
186 | def face_label(gt_txt, face_img_dir, face_txt, show=False):
187 | img_num = 1
188 | with open(face_txt, "w+") as face_txt_fp:
189 | with open(gt_txt, 'r') as gt_fp:
190 | line = gt_fp.readline()
191 | while line:
192 | img_path, bbox, phis = line.split(',')
193 |
194 | phis = phis.strip('\n').strip(' ').split(' ')
195 | phis = [int(float(x)) for x in phis]
196 | # print(phis)
197 |
198 | if show:
199 | img = cv2.imread(img_path)
200 | for i in range(29):
201 | cv2.circle(img, (phis[i], phis[i + 29]), 2, (0, 255, 255))
202 | cv2.putText(img, str(i), (phis[i], phis[i + 29]), cv2.FONT_HERSHEY_COMPLEX,0.3,(0,0,255),1)
203 | cv2.imshow("img", img)
204 | cv2.waitKey(0)
205 |
206 | slot = phis[58:]
207 | label = [1, 0, 0, 0, 0, 0]
208 | # if slot[0] and slot[2] and slot[4] and slot[5]:
209 | # label[1] = 1 # right eyebrow
210 | # label[0] = 0
211 | if slot[16]: # slot[10] or slot[12] or slot[13] or slot[16] or slot[8]:
212 | label[1] = 1 # right eye
213 | label[0] = 0
214 | # if slot[1] and slot[3] and slot[6] and slot[7]:
215 | # label[3] = 1 # left eyebrow
216 | # label[0] = 0
217 | if slot[17]: # slot[11] or slot[14] or slot[15] or slot[17] or slot[9]:
218 | label[2] = 1 # left eye
219 | label[0] = 0
220 | if slot[20]: # slot[18] or slot[19] or slot[20] or slot[21]:
221 | label[3] = 1 # nose
222 | label[0] = 0
223 | if slot[22] or slot[23] or slot[25] or slot[26] or slot[27]: # or slot[24]
224 | label[4] = 1 # mouth
225 | label[0] = 0
226 | if slot[28]:
227 | label[5] = 1 # chin
228 | label[0] = 0
229 |
230 | lab_str = ''
231 | for x in label:
232 | lab_str += str(x) + ' '
233 |
234 | content = face_img_dir + "{}.jpg".format(img_num) + ',' + lab_str.rstrip(' ')
235 | content += '\n'
236 | # print(content)
237 | face_txt_fp.write(content)
238 |
239 | line = gt_fp.readline()
240 | img_num += 1
241 |
242 | def img_shift(img_size, delta, shift_dir, orig_txt, shift_txt, show=False):
243 | with open(shift_txt, "w+") as shift_fp:
244 | with open(orig_txt, 'r') as orig_fp:
245 | if not os.path.exists(shift_dir):
246 | os.mkdir(shift_dir)
247 |
248 | line = orig_fp.readline()
249 | while line:
250 | # print(line)
251 |
252 | shift_fp.write(line)
253 |
254 | img_path, label = line.split(',')
255 | # print(img_path, label)
256 |
257 | img_name = os.path.basename(img_path)
258 |
259 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
260 | img = cv2.resize(img, (img_size, img_size))
261 |
262 | crop_size = img_size - delta
263 | shift_type = ['tl_', 'tr_', 'bl_', 'br_', 'ct_']
264 |
265 | for shift in shift_type:
266 | crop_img = img
267 | if shift == 'tl_':
268 | crop_img = img[0:crop_size, 0:crop_size]
269 | elif shift == 'tr_':
270 | crop_img = img[0:crop_size, delta - 1:-1]
271 | elif shift == 'bl_':
272 | crop_img = img[delta - 1:-1, 0:crop_size]
273 | elif shift == 'br_':
274 | crop_img = img[delta - 1:-1, delta - 1:-1]
275 | elif shift == 'ct_':
276 | crop_img = img[delta // 2:crop_size // 2, delta // 2:crop_size // 2]
277 |
278 | shift_img_path = shift_dir + shift + img_name
279 | cv2.imwrite(shift_img_path, crop_img)
280 | shift_fp.write(shift_img_path + "," + label)
281 | if show:
282 | cv2.imshow(shift, crop_img)
283 | cv2.waitKey(0)
284 |
285 | line = orig_fp.readline()
286 |
287 |
288 |
289 | def add_block_and_crop_face(gt_txt, block_txt, block_dir, rand_num, show=False):
290 | with open(block_txt, "w+") as block_txt_fp:
291 | if not os.path.exists(block_dir):
292 | os.mkdir(block_dir)
293 | num = 1
294 | for i in range(rand_num):
295 | with open(gt_txt, 'r') as gt:
296 | line = gt.readline()
297 | while line:
298 | img_path, bbox, phis = line.split(',')
299 | # print(img_path)
300 | img = cv2.imread(img_path)
301 |
302 | phis = phis.strip('\n').strip(' ').split(' ')
303 | phis = [int(float(x)) for x in phis]
304 | # print(phis)
305 |
306 | slot = phis[58:]
307 | label = [1, 0, 0, 0, 0, 0]
308 | # if slot[0] and slot[2] and slot[4] and slot[5]:
309 | # label[1] = 1 # right eyebrow
310 | # label[0] = 0
311 | if slot[16]: # slot[10] or slot[12] or slot[13] or slot[16]: # slot[8] outer
312 | label[1] = 1 # right eye
313 | label[0] = 0
314 | # if slot[1] and slot[3] and slot[6] and slot[7]:
315 | # label[3] = 1 # left eyebrow
316 | # label[0] = 0
317 | if slot[17]: # slot[11] or slot[14] or slot[15] or slot[17]: # slot[9] outer
318 | label[2] = 1 # left eye
319 | label[0] = 0
320 | if slot[20]: # slot[18] or slot[19] or slot[20] or slot[21]:
321 | label[3] = 1 # nose
322 | label[0] = 0
323 | if slot[22] or slot[23] or slot[25] or slot[26] or slot[27]: # or slot[24]
324 | label[4] = 1 # mouth
325 | label[0] = 0
326 | if slot[28]:
327 | label[5] = 1 # chin
328 | label[0] = 0
329 |
330 | block_img = img
331 | area_num = random.randint(1, 6)
332 |
333 | for i in range(area_num):
334 | value = random.randint(10, 240)
335 | area_idx = random.randint(1, 6)
336 | # print("area_idx:{}".format(area_idx))
337 | if area_idx == 1:
338 | if label[area_idx] == 0:
339 | label[area_idx] = 1
340 | label[0] = 0
341 | block_img[phis[29 + 12] - 8: phis[29 + 13] + 6, phis[8] - 8: phis[10] + 8] = value
342 | elif area_idx == 2:
343 | if label[area_idx] == 0:
344 | label[area_idx] = 1
345 | label[0] = 0
346 | block_img[phis[29 + 14] - 8: phis[29 + 15] + 6, phis[11] - 8: phis[9] + 8] = value
347 | elif area_idx == 3:
348 | if label[area_idx] == 0:
349 | label[area_idx] = 1
350 | label[0] = 0
351 | block_img[phis[29 + 20] - 20: phis[29 + 21] + 1, phis[18] - 4: phis[19] + 5] = value
352 | elif area_idx == 4:
353 | if label[area_idx] == 0:
354 | label[area_idx] = 1
355 | label[0] = 0
356 | block_img[phis[29 + 24] - 1: phis[29 + 27] + 1, phis[22] - 2: phis[23] + 2] = value
357 | elif area_idx == 5:
358 | if label[area_idx] == 0:
359 | label[area_idx] = 1
360 | label[0] = 0
361 | block_img[phis[29 + 28] - 12: phis[29 + 28] + 15,
362 | phis[28] - 25: phis[28] + 25] = random.randint(160, 255)
363 | else:
364 | if label[1] == 0 and label[2] == 0:
365 | label[1] = 1
366 | label[0] = 0
367 | block_img[phis[29 + 12] - 8: phis[29 + 13] + 6,
368 | phis[8] - 10: phis[10] + 8] = random.randint(0, 50)
369 | label[2] = 1
370 | label[0] = 0
371 | block_img[phis[29 + 14] - 8: phis[29 + 15] + 6,
372 | phis[11] - 10: phis[9] + 10] = random.randint(0, 50)
373 |
374 | img_file = str(num) + ".jpg"
375 | lab_str = ''
376 | for x in label:
377 | lab_str += str(x) + ' '
378 | content = block_dir + img_file + ',' + lab_str.rstrip(' ')
379 | content += '\n'
380 | block_txt_fp.write(content)
381 |
382 | xarr = phis[:29]
383 | yarr = phis[30:58]
384 | min_x = np.min(xarr)
385 | max_x = np.max(xarr)
386 | min_y = np.min(yarr)
387 | max_y = np.max(yarr)
388 | # print(min_x, max_x, min_y, max_y)
389 | Lmax = np.max([max_x - min_x, max_y - min_y]) * 1.15
390 |
391 | delta = Lmax // 2
392 | center_x = (max_x + min_x) // 2
393 | center_y = (max_y + min_y) // 2
394 | x = int(center_x - delta)
395 | y = int(center_y - 0.98 * delta)
396 | endx = int(center_x + delta)
397 | endy = int(center_y + 1.02 * delta)
398 |
399 | if x < 0: x = 0
400 | if y < 0: y = 0
401 |
402 | # print(img.shape)
403 | if endx > block_img.shape[1]: endx = block_img.shape[1]
404 | if endy > block_img.shape[0]: endy = block_img.shape[0]
405 |
406 | face = block_img[y: endy, x: endx]
407 |
408 | # print(face.shape)
409 | if not face.shape[0] > 0:
410 | raise Exception("face size error")
411 |
412 | cv2.imwrite(block_dir + img_file, face)
413 | if show:
414 | cv2.imshow("face", face)
415 | cv2.waitKey(0)
416 |
417 | line = gt.readline()
418 | num += 1
419 |
420 |
421 | def label_spec_face(label_file, file_root):
422 | labels = '1 0 0 0 0 0'
423 | with open(label_file, 'w+') as tlf:
424 | for root, dirs, files in os.walk(file_root):
425 | for f in files:
426 | img_path = file_root + f
427 | content = img_path + ',' + labels + '\n'
428 | # print(content)
429 | tlf.write(content)
430 |
431 |
432 | def merge_txt(txt1, txt2, merge_txt):
433 | with open(merge_txt, 'w+') as trf:
434 | with open(txt1, 'r') as fp:
435 | line = fp.readline()
436 | while line:
437 | trf.write(line)
438 | line = fp.readline()
439 | with open(txt2, 'r') as fp:
440 | line = fp.readline()
441 | while line:
442 | trf.write(line)
443 | line = fp.readline()
444 |
445 |
446 | def split_train_file(merge_train_txt, train_txt, val_txt, train_ratio):
447 | with open(merge_train_txt, 'r') as fp:
448 | lines_list = fp.readlines()
449 |
450 | # random.shuffle(lines_list)
451 | total = len(lines_list)
452 | train_num = math.floor(total * train_ratio)
453 | val_num = total - train_num
454 | count = 0
455 | data = []
456 | for line in lines_list:
457 | # print(line)
458 | count += 1
459 | data.append(line)
460 | if count == train_num:
461 | with open(train_txt, "w+") as trainf:
462 | random.shuffle(data)
463 | for d in data:
464 | trainf.write(d)
465 | data = []
466 |
467 | if count == train_num + val_num:
468 | with open(val_txt, "w+") as valf:
469 | random.shuffle(data)
470 | for d in data:
471 | valf.write(d)
472 | data = []
473 | print("train_num:{}, val_num:{}".format(train_num, val_num))
474 |
475 |
476 | def prepare_data(data_root):
477 | # 1. Get original txt
478 | print("1. Generate original data index file...")
479 | if not os.path.exists(data_root + "glass_face.txt"):
480 | label_spec_face(data_root + "glass_face.txt",
481 | data_root + "glass_face/")
482 |
483 | if not os.path.exists(data_root + "beard_face.txt"):
484 | label_spec_face(data_root + "beard_face.txt",
485 | data_root + "beard_face/")
486 |
487 | if not os.path.exists(data_root + "train_ground_true.txt"):
488 | mat_to_files(data_root + "COFW_train.mat",
489 | 'IsTr', 'bboxesTr', 'phisTr',
490 | data_root + "train",
491 | data_root + "train_ground_true.txt")
492 |
493 | if not os.path.exists(data_root + "test_ground_true.txt"):
494 | mat_to_files(data_root + "COFW_test.mat",
495 | 'IsT', 'bboxesT', 'phisT',
496 | data_root + "test",
497 | data_root + "test_ground_true.txt")
498 |
499 | if not os.path.exists(data_root + "train_gt.txt"):
500 | move_test_to_train(data_root + "test_ground_true.txt",
501 | data_root + "train_ground_true.txt",
502 | data_root + "test_gt.txt",
503 | data_root + "train_gt.txt",
504 | 100)
505 |
506 | print("2. Generate augmented train & test faces and label...")
507 | if not os.path.exists(data_root + "aug_train_gt.txt"):
508 | augmentate(data_root + "train_gt.txt",
509 | data_root + "aug_train/",
510 | data_root + "aug_train_gt.txt")
511 |
512 | if not os.path.exists(data_root + "aug_test_gt.txt"):
513 | augmentate(data_root + "test_gt.txt",
514 | data_root + "aug_test/",
515 | data_root + "aug_test_gt.txt")
516 |
517 | if not os.path.exists(data_root + 'spec_faces.txt'):
518 | merge_txt(data_root + 'beard_face.txt', data_root + 'glass_face.txt', data_root + 'spec_faces.txt')
519 |
520 | if not os.path.exists(data_root + "aug_spec_faces.txt"):
521 | augmentate(data_root + 'spec_faces.txt',
522 | data_root + 'aug_spec_faces/',
523 | data_root + 'aug_spec_faces.txt')
524 |
525 | print("3. Add block to orig and augmented img and crop faces...")
526 | if not os.path.exists(data_root + "merge_train_gt.txt"):
527 | merge_txt(data_root + "train_gt.txt", data_root + "aug_train_gt.txt", data_root + "merge_train_gt.txt")
528 |
529 | if not os.path.exists(data_root + "merge_test_gt.txt"):
530 | merge_txt(data_root + "test_gt.txt", data_root + "aug_test_gt.txt", data_root + "merge_test_gt.txt")
531 |
532 | if not os.path.exists(data_root + "face_train_block.txt"):
533 | add_block_and_crop_face(data_root + "merge_train_gt.txt",
534 | data_root + "face_train_block.txt",
535 | data_root + "face_train_block/",
536 | 16, False)
537 |
538 | # 4. Crop face and get face txt
539 | print("4. Crop orig train & test faces and get label...")
540 | if not os.path.exists(data_root + "face_train/"):
541 | crop_face(data_root + "merge_train_gt.txt",
542 | data_root + "face_train/")
543 | if not os.path.exists(data_root + "face_train.txt"):
544 | face_label(data_root + "merge_train_gt.txt",
545 | data_root + "face_train/",
546 | data_root + "face_train.txt", False)
547 |
548 | if not os.path.exists(data_root + "face_test/"):
549 | crop_face(data_root +"merge_test_gt.txt",
550 | data_root + "face_test/")
551 | if not os.path.exists(data_root + "face_test.txt"):
552 | face_label(data_root + "merge_test_gt.txt",
553 | data_root + "face_test/",
554 | data_root + "face_test.txt")
555 |
556 | # 5. merge spec face txt with face train txt
557 | print("5. Merge spec face txt with face train txt...")
558 | if not os.path.exists(data_root + "spec_face.txt"):
559 | merge_txt(data_root + "spec_faces.txt", data_root + "aug_spec_faces.txt", data_root + "orig_aug_spec_faces.txt")
560 | if not os.path.exists(data_root + "merge_face.txt"):
561 | merge_txt(data_root + "orig_aug_spec_faces.txt", data_root + "face_train.txt", data_root + "merge_face.txt")
562 |
563 | #6. merge all faces to merge_train.txt
564 | print("6. Merge all faces to merge_train.txt...")
565 | if not os.path.exists(data_root + 'merge_train.txt'):
566 | merge_txt(data_root + 'merge_face.txt',
567 | data_root + 'face_train_block.txt',
568 | data_root + 'merge_train.txt')
569 |
570 | #7. get train, val and test
571 | print("7. Generate train, val and test...")
572 | if not os.path.exists(data_root + "train.txt"):
573 | split_train_file(data_root + "merge_train.txt",
574 | data_root + "train.txt",
575 | data_root + "val.txt",
576 | 0.85)
577 |
578 | if not os.path.exists(data_root + "test.txt"):
579 | shutil.copy(data_root + 'face_test.txt',
580 | data_root + 'test.txt')
581 | print("Done!")
582 |
583 | def parse_args():
584 | parser = argparse.ArgumentParser()
585 | parser.register("type", "bool", lambda v: v.lower() == "true")
586 |
587 | parser.add_argument("--data_dir", type=str, default="./data/cofw/", help="Data root directory")
588 | return parser.parse_known_args()
589 |
590 | if __name__ == '__main__':
591 | FLAGS, unparsed = parse_args()
592 |
593 | if not len(FLAGS.data_dir):
594 | raise Exception("Please set data root directory via --data_dir")
595 |
596 | prepare_data(FLAGS.data_dir)
597 |
--------------------------------------------------------------------------------