├── assets └── teaser.jpg ├── scripts ├── elfw-cleaner.py ├── takethecleanest.py ├── takenonlabeled.py ├── takefromfile.py ├── hof-annotations2masks.py ├── elfw-drawComparisons.py ├── elfw-findNonExtended.py ├── computeClassWeights.py ├── elfw-counter.py ├── elfw-dataset_comparisons.py ├── elfw-separateSunglasses4Validation.py ├── elfw-inspector.py ├── elfw-counter2.py ├── elfw-refineMe.py ├── elfw-plotResults.py ├── headPose.py ├── elfw-evaluateOnValidation.py ├── elfw-scribbleMe.py ├── elfw-makeThemWearSunglasses.py ├── elfw-makeThemWearMasks.py ├── elfw-makeItLookCool.py └── elfw-putYourHandsOnMeWithDlib.py ├── visualize.py ├── metrics.py ├── utils.py ├── README.md ├── transform.py ├── tester.py ├── run_trainer.py ├── demos ├── EUT_ELFW_webcam_jupyter.ipynb └── EUT_ELFW_singleimage_colab.ipynb ├── list_experiments.sh ├── models.py ├── trainer.py ├── LICENSE └── elfw.py /assets/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/multimedia-eurecat/ELFW/HEAD/assets/teaser.jpg -------------------------------------------------------------------------------- /scripts/elfw-cleaner.py: -------------------------------------------------------------------------------- 1 | # This code cleans some corrupted hands in the augmented dataset. 2 | # R. Redondo (c) Eurecat 2019 3 | 4 | import os 5 | 6 | source_path = '../elfw/elfw_AugmentedHands' 7 | path_faces = os.path.join(source_path, 'faces') 8 | path_labels = os.path.join(source_path, 'labels') 9 | 10 | target_strs = [ 'h2f_web-147', 11 | 'hof-005', 12 | 'hof-032', 13 | 'hof-042', 14 | 'hof-143', 15 | 'hof-184', 16 | 'Devin_Harris'] 17 | 18 | def clean(path): 19 | 20 | for file in os.listdir(path): 21 | for target in target_strs: 22 | if target in file: 23 | origin = os.path.join(path, file) 24 | destination = os.path.join(source_path, file) 25 | print('Moving file to ' + destination) 26 | os.rename(origin, destination) 27 | break 28 | 29 | clean(path_faces) 30 | clean(path_labels) -------------------------------------------------------------------------------- /scripts/takethecleanest.py: -------------------------------------------------------------------------------- 1 | # This script takes all non-ground-truth-labeled images in LFW and copies them apart. 2 | # Rafael Redondo - 2019 3 | 4 | import os 5 | import sys 6 | import shutil 7 | 8 | if len(sys.argv) != 3: 9 | print "Usage: $ takethecleanest " 10 | exit(0) 11 | 12 | filelist = sys.argv[1] 13 | f = open(filelist,"r") 14 | labels = [] 15 | for line in f: 16 | labels.append(line) 17 | 18 | output_folder = sys.argv[2] 19 | 20 | if not os.path.exists(output_folder): 21 | os.mkdir(output_folder) 22 | 23 | faces_folder = '../Datasets/lfw-deepfunneled-bagoffaces/all/' 24 | labels_folder = '../Datasets/lfw-original_from_parts/' 25 | 26 | #print labels 27 | 28 | for file in os.listdir(labels_folder): 29 | 30 | if not file.endswith(".jpg"): 31 | continue 32 | 33 | if not any(file in s for s in labels): 34 | 35 | src_file = faces_folder + '/' + file 36 | dst_file = output_folder + '/' + file 37 | shutil.copyfile(src_file, dst_file) 38 | print "Copied to " + output_folder + " the file " + src_file 39 | 40 | -------------------------------------------------------------------------------- /scripts/takenonlabeled.py: -------------------------------------------------------------------------------- 1 | # This script takes all non-ground-truth-labeled images in LFW and copies them apart. 2 | # Rafael Redondo - 2019 3 | 4 | import os 5 | import shutil 6 | 7 | labels = os.listdir('../Datasets/parts/parts_lfw_funneled_gt_images/') 8 | labels = [label.replace('.ppm','') for label in labels] 9 | #print labels 10 | 11 | output_folder = '../Datasets/lfw-deepfunneled-discarded/' 12 | if not os.path.exists(output_folder): 13 | os.mkdir(output_folder) 14 | 15 | faces_folder = '../Datasets/lfw-deepfunneled/' 16 | 17 | for person in os.listdir(faces_folder): 18 | 19 | if not os.path.isdir(faces_folder + person): 20 | continue 21 | 22 | for face_file in os.listdir(faces_folder + person): 23 | 24 | if not face_file.endswith(".jpg"): 25 | continue 26 | 27 | name = os.path.splitext(face_file)[0] 28 | 29 | if not any(name in s for s in labels): 30 | 31 | src_file = faces_folder + person + "/" + face_file 32 | dst_file = output_folder + "/" + face_file 33 | shutil.copyfile(src_file, dst_file) 34 | 35 | print "Copied to " + output_folder + " the file " + src_file -------------------------------------------------------------------------------- /scripts/takefromfile.py: -------------------------------------------------------------------------------- 1 | # This script takes all non-ground-truth-labeled images in LFW and copies them apart. 2 | # Rafael Redondo - 2019 3 | 4 | import os 5 | import sys 6 | import shutil 7 | 8 | if len(sys.argv) != 4: 9 | print("Usage: $ takefromfile ") 10 | exit(0) 11 | 12 | filelist = sys.argv[1] 13 | f = open(filelist,"r") 14 | targets = [] 15 | for line in f: 16 | targets.append(line) 17 | 18 | # faces_folder = '../Datasets/lfw-deepfunneled/' 19 | faces_folder = sys.argv[2] 20 | output_folder = sys.argv[3] 21 | 22 | if not os.path.exists(output_folder): 23 | os.mkdir(output_folder) 24 | 25 | # print(targets) 26 | 27 | for person in os.listdir(faces_folder): 28 | 29 | person_path = os.path.join(faces_folder, person) 30 | 31 | if not os.path.isdir(person_path): 32 | continue 33 | 34 | for face_file in os.listdir(person_path): 35 | 36 | if not face_file.endswith(".jpg"): 37 | continue 38 | 39 | name = os.path.splitext(face_file)[0] + os.path.splitext(face_file)[1] 40 | 41 | if any(name in s for s in targets): 42 | src_file = os.path.join(person_path, face_file) 43 | dst_file = os.path.join(output_folder, face_file) 44 | shutil.copyfile(src_file, dst_file) 45 | print "Copied to " + output_folder + " the file " + src_file 46 | 47 | -------------------------------------------------------------------------------- /visualize.py: -------------------------------------------------------------------------------- 1 | import visdom 2 | import numpy as np 3 | 4 | class LinePlotter(object): 5 | def __init__(self, env_name="main"): 6 | print("Connecting to the Visdom server. Make sure it is online by running 'python -m visdom.server'.") 7 | self.vis = visdom.Visdom() 8 | self.env = env_name 9 | self.plots = {} 10 | 11 | def plot(self, x, y, y_label, var_name, title=""): 12 | window = title + " " + y_label 13 | if window not in self.plots: 14 | self.plots[window] = self.vis.line( 15 | X=np.array([x, x]), 16 | Y=np.array([y, y]), 17 | env=self.env, 18 | opts=dict( 19 | legend=[var_name], 20 | title=window, 21 | xlabel="Epochs", 22 | ylabel=y_label 23 | )) 24 | else: 25 | self.vis.line(X=np.array([x]), 26 | Y=np.array([y]), 27 | env=self.env, 28 | win=self.plots[window], 29 | name=var_name, 30 | update = 'append') 31 | -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | # This file computes common semantic segmentation metrics 2 | # Rafael Redondo and Jaume Gibert (c) Eurecat 2019 3 | 4 | import numpy as np 5 | e = 1E-10 # epsilon 6 | 7 | def ZerosTFPN(num_classes): 8 | 9 | return np.zeros(num_classes), np.zeros(num_classes), np.zeros(num_classes), np.zeros(num_classes) 10 | 11 | 12 | def TrueFalsePositiveNegatives(labels, predictions, num_classes): 13 | 14 | TP, TN, FP, FN = ZerosTFPN(num_classes) 15 | 16 | for c in range(0, num_classes): 17 | 18 | A = (predictions == c).cpu().detach().numpy() 19 | B = (labels == c).cpu().detach().numpy() 20 | C = np.logical_not(A) 21 | D = np.logical_not(B) 22 | 23 | TP[c] = np.sum(np.logical_and(A, B)) # True Positives 24 | TN[c] = np.sum(np.logical_and(C, D)) # True Negatives 25 | FP[c] = np.sum(np.logical_and(A, D)) # False Positives 26 | FN[c] = np.sum(np.logical_and(C, B)) # False Negatives 27 | 28 | return TP, TN, FP, FN 29 | 30 | 31 | def PixelAccuracy(TP, FN): 32 | 33 | return np.sum(TP) / (np.sum(TP + FN) + e) 34 | 35 | 36 | def MeanAccuracy(TP, FN): # Also True Positive Rate (TPR) on average for all classes 37 | 38 | accuracy = TP / (TP + FN + e) 39 | return np.mean(accuracy), accuracy 40 | 41 | 42 | def MeanIU(TP, FN, FP): # Also Threat Score(TS) or Critical Success Index (CSI) 43 | 44 | iu = TP / (TP + FN + FP + e) 45 | return np.mean(iu), iu 46 | 47 | 48 | def FrequencyWeightedIU(TP, FN, FP): 49 | 50 | total_i = TP + FN 51 | return np.sum(total_i * TP / (total_i + FP + e)) / (np.sum(total_i) + e) 52 | 53 | 54 | def MeanF1Score(TP, FN, FP): 55 | 56 | f1_score = 2*TP / (2*TP + FP + FN + e) 57 | return np.mean(f1_score), f1_score 58 | -------------------------------------------------------------------------------- /scripts/hof-annotations2masks.py: -------------------------------------------------------------------------------- 1 | # Converts HandOverFace annotations to binary masks 2 | # Rafael Redondo (c) Eurecat 2019 3 | 4 | # Erratas in the original dataset: 5 | # 10.jpg does not match 10.png neither 10.xml 6 | # 216.jpg and 221.jpg are actually a GIFs in the original size folder 7 | # 225.jpg contains a body a part from hands 8 | 9 | import os 10 | import cv2 11 | import numpy as np 12 | 13 | annotations_path = '../Datasets/Hand_datasets/hand_over_face/annotations' 14 | handfaces_path = '../Datasets/Hand_datasets/hand_over_face/images_original_size' 15 | output_path = '../Datasets/Hand_datasets/HOF_highres_mask' 16 | 17 | annotations = os.listdir(annotations_path) 18 | handfaces = os.listdir(handfaces_path) 19 | 20 | import xml.etree.ElementTree as ET 21 | 22 | # Run over all annotation files 23 | for _, file in enumerate(annotations): 24 | 25 | basename, extension = os.path.splitext(file) 26 | if extension != '.xml': 27 | continue 28 | 29 | print('Processing file \033[1m%s\033[0m' % file) 30 | 31 | xml_file = os.path.join(annotations_path, file) 32 | parsed_annotation = ET.parse(xml_file) 33 | xml_root = parsed_annotation.getroot() 34 | 35 | handface_file = os.path.join(handfaces_path, basename + '.jpg') 36 | handface = cv2.imread(handface_file) 37 | nrows , ncols, _ = handface.shape 38 | mask = np.zeros((nrows, ncols)) 39 | 40 | # Run over all objects and polygons 41 | for elem in xml_root: 42 | if elem.tag != 'object': 43 | continue 44 | 45 | for subelem in elem: 46 | if subelem.tag != 'polygon': 47 | continue 48 | 49 | vertices = [] 50 | for subsubelem in subelem: 51 | if subsubelem.tag == 'pt': 52 | x = int(subsubelem[0].text) 53 | y = int(subsubelem[1].text) 54 | vertices.append((x,y)) 55 | 56 | if len(vertices): 57 | print('Found %d vertices' % len(vertices)) 58 | cv2.fillPoly(mask, np.array([vertices], dtype=np.int32), 255) 59 | else: 60 | print('Wop! no vertices found in ' + xml_file) 61 | continue 62 | 63 | mask_file = os.path.join(output_path, basename + '.png') 64 | cv2.imwrite(mask_file, mask) 65 | # test_file = os.path.join(output_path, 'test-' + basename + '.png') 66 | # mmm = cv2.merge((mask, mask, mask)) 67 | # cv2.imwrite(test_file, handface * mmm / 255) 68 | # cv2.imshow('mask', mask) 69 | # cv2.waitKey() 70 | -------------------------------------------------------------------------------- /scripts/elfw-drawComparisons.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import os 4 | #from import check_mkdir, bcolors 5 | import sys 6 | 7 | 8 | target_size = 256 9 | 10 | # I/O 11 | augmentation_ratios = [0, 0.1, 0.25, 0.5, 0.75, 1.0, 1.5] 12 | epochs = range(0, 201, 10) 13 | images_folder = "/media/jaume.gibert/Data/elfw/elfw_01_basic/faces" 14 | labels_folder = "/media/jaume.gibert/Data/elfw/elfw_01_basic/labels" 15 | predictions_folder = "/home/jaume.gibert/Code/facesinthewild/predictions/Background_Vs_Sunglasses/lr1e-4" 16 | 17 | # Input From File 18 | f = open("/media/jaume.gibert/Data/elfw/elfw_01_basic/elfw_set_00.txt", "r") 19 | names = [] 20 | for line in f: 21 | # for some reason it's also loading the \n at the end of each line 22 | if line[-1:]=='\n': 23 | names.append(line[:-1]) 24 | else: 25 | names.append(line) 26 | 27 | def main(): 28 | 29 | for image_name in names: 30 | 31 | # Read the image (resize it) and labels 32 | image_name_path = os.path.join(images_folder, image_name + ".jpg") 33 | image = Image.open(image_name_path).convert("RGB") 34 | image = image.resize((target_size, target_size), Image.BILINEAR) 35 | 36 | label_name_path = os.path.join(labels_folder, image_name + ".png") 37 | label = Image.open(label_name_path).convert("RGB") 38 | label = label.resize((target_size, target_size), Image.BILINEAR) 39 | 40 | A = np.concatenate((np.array(image), np.array(label)), axis=1) 41 | B = np.zeros((target_size, 2*target_size, 3)) 42 | 43 | for rho in augmentation_ratios: 44 | 45 | path = os.path.join(predictions_folder, "Aug_ratio_"+str(rho)) 46 | out = A if rho == 0 else B 47 | for ep in epochs: 48 | 49 | prediction_path = os.path.join(path, image_name+"_gcn-epoch_"+str(ep).zfill(4)+".png") 50 | prediction = Image.open(prediction_path).convert("RGB") 51 | prediction = prediction.resize((target_size, target_size), Image.BILINEAR) 52 | out = np.concatenate((out, prediction), axis=1) 53 | 54 | composition = out if rho==0 else np.concatenate((composition, out), axis=0) 55 | 56 | composition = Image.fromarray(composition.astype('uint8')) 57 | output_file = os.path.join(predictions_folder, 'compositions', image_name+".jpg") 58 | composition.save(output_file) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /scripts/elfw-findNonExtended.py: -------------------------------------------------------------------------------- 1 | # This script goes over a folder of labels and finds those non-extended, i.e. background, skin, and hair only. 2 | # R. Redondo (c) Eurecat 2021 3 | 4 | import os 5 | import sys 6 | import numpy as np 7 | import cv2 8 | from shutil import copyfile 9 | 10 | if len(sys.argv) != 4: 11 | print("Usage: $ elfw-findNonExtended ") 12 | exit(0) 13 | 14 | faces_path = sys.argv[1] # '../elfw/elfw_Baseline/faces' 15 | labels_path = sys.argv[2] # '../elfw/elfw_Baseline/labels' 16 | if not os.path.exists(labels_path): 17 | print('Input path not found ' + labels_path) 18 | exit(-1) 19 | 20 | output_path = sys.argv[3] 21 | output_path_faces = os.path.join(output_path, 'faces') 22 | output_path_labels = os.path.join(output_path, 'labels') 23 | 24 | if not os.path.exists(output_path): 25 | os.mkdir(output_path) 26 | if not os.path.exists(output_path_faces): 27 | os.mkdir(output_path_faces) 28 | if not os.path.exists(output_path_labels): 29 | os.mkdir(output_path_labels) 30 | 31 | label_colors = [ 32 | (0, 0, 0), 33 | (0, 255, 0), 34 | (255, 0, 0), 35 | (0, 255, 255), 36 | (0, 0, 255), 37 | (255, 0, 255)] 38 | # (255, 255, 0)] 39 | 40 | grouped_per_name = True 41 | non_extended_files = [] 42 | 43 | print('This will take a while...') 44 | files = os.listdir(labels_path) 45 | 46 | for file in files: 47 | 48 | file_path = os.path.join(labels_path, file) 49 | image = cv2.imread(file_path) 50 | # print(file_path) 51 | 52 | image_size = image.shape 53 | b = image[:, :, 0] 54 | g = image[:, :, 1] 55 | r = image[:, :, 2] 56 | 57 | extended = False 58 | 59 | for c in range(0, len(label_colors)): 60 | 61 | mask = (r == label_colors[c][0]) & (g == label_colors[c][1]) & (b == label_colors[c][2]) 62 | pixels = np.sum(mask) 63 | 64 | if pixels and c > 2: 65 | extended = True 66 | 67 | if extended: 68 | continue 69 | 70 | non_extended_files.append(file) 71 | 72 | # Copy labels 73 | copyfile(file_path, os.path.join(output_path_labels, file)) 74 | 75 | # Copy faces 76 | face_file = os.path.splitext(file)[0] + '.jpg' 77 | if grouped_per_name: 78 | face_grouped_file = os.path.join(face_file[:-9], face_file) 79 | else: 80 | face_grouped_file = '' 81 | copyfile(os.path.join(faces_path, face_grouped_file), os.path.join(output_path_faces, face_file)) 82 | 83 | # cv2.imshow('Labels', image) 84 | # cv2.waitKey(0) 85 | 86 | print(non_extended_files) 87 | print('Number of faces without extended categories: {}'.format(len(non_extended_files))) -------------------------------------------------------------------------------- /scripts/computeClassWeights.py: -------------------------------------------------------------------------------- 1 | # From the original SegNet paper: 2 | # 3 | # We use median frequency balancing [13] where the weight assigned to a class in the loss function 4 | # is the ratio of the median of class frequencies computed on the entire training set divided by the class frequency. 5 | 6 | # If we go to the reference the SegNet authors refer to: 7 | 8 | # we weight each pixel by a_c = median_freq / freq(c) where freq(c) is the number of pixels of class c divided by the 9 | # total number of pixels in images where c is present, and median_freq is the median of these frequencies. 10 | 11 | import os 12 | import numpy as np 13 | from PIL import Image 14 | import cv2 15 | import sys 16 | 17 | def ToELFWLabel(data, label_colors): 18 | 19 | r = data[:, :, 0] 20 | g = data[:, :, 1] 21 | b = data[:, :, 2] 22 | 23 | output = np.zeros((data.shape[0], data.shape[1])) 24 | for c in range(0,len(label_colors)): 25 | color_mask = (r == label_colors[c][0]) & (g == label_colors[c][1]) & (b == label_colors[c][2]) 26 | output[color_mask] = c 27 | 28 | return output 29 | 30 | if len(sys.argv) != 2: 31 | print "Usage: $ computeClassWeights " 32 | exit(0) 33 | 34 | label_colors = [ 35 | (0, 0, 0), 36 | (0, 255, 0), 37 | (255, 0, 0), 38 | (0, 255, 255), 39 | (0, 0, 255), 40 | (255, 0, 255), 41 | (255, 255, 0)] 42 | 43 | label_names = [ 44 | "background", 45 | "skin", 46 | "hair", 47 | "beard-mustache", 48 | "sunglasses", 49 | "wearable", 50 | "mouth-mask"] 51 | 52 | src_folder = sys.argv[1] 53 | n = len(label_names) 54 | px_frequencies = np.zeros(n) 55 | im_frequencies = np.zeros(n) 56 | 57 | print 'Please wait. Processing dataset...' 58 | 59 | for f in os.listdir(src_folder): 60 | file_name = os.path.join(src_folder, f) 61 | image = Image.open(file_name).convert("RGB") 62 | img = np.array(image) 63 | img = ToELFWLabel(img, label_colors) 64 | for l in range(0, n): 65 | px = np.sum(img==l) 66 | # label counts if it is present in the image 67 | if px > 0: 68 | px_frequencies[l] += px 69 | im_frequencies[l] += img.size 70 | 71 | # Mask for indices of appearing classes in the train set 72 | m = (px_frequencies>0) 73 | idx = np.where(m) 74 | 75 | frequencies = np.divide(px_frequencies[m], im_frequencies[m]) 76 | pos_median_frequencies = np.divide(np.median(frequencies), frequencies) 77 | 78 | median_frequencies = np.zeros(n) 79 | for l in range(0,len(pos_median_frequencies)): 80 | median_frequencies[idx[0][l]] = pos_median_frequencies[l] 81 | 82 | for l in range(0,len(median_frequencies)): 83 | print "Weight %f for class %s" % (median_frequencies[l], label_names[l]) 84 | 85 | -------------------------------------------------------------------------------- /scripts/elfw-counter.py: -------------------------------------------------------------------------------- 1 | # This code counts face names and augmentation usages. 2 | # R. Redondo (c) Eurecat 2019 3 | 4 | import os 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | path = '../Datasets/lfw-bagsoffaces/elfw_handoverfaces' 9 | 10 | if not os.path.exists(path): 11 | print('Input path not found ' + path) 12 | exit(-1) 13 | 14 | 15 | def update_dict(dictionary, key): 16 | 17 | if key in dictionary: 18 | dictionary[key] += 1 19 | else: 20 | dictionary[key] = 1 21 | 22 | # Counter 23 | 24 | face_counter = dict(hof=dict(),h2f_web=dict()) 25 | face_counter_total = dict() 26 | hand_counter_total = dict() 27 | 28 | files = os.listdir(path) 29 | 30 | for file in files: 31 | 32 | filename = os.path.basename(file) 33 | name, ext = os.path.splitext(filename) 34 | 35 | if ext != '.jpg': 36 | continue 37 | 38 | name_split = name.split('-') 39 | 40 | item = name_split[-1] 41 | aug_dataset = name_split[-2] 42 | picture = ''.join(name_split[:-2]) 43 | 44 | # print('%s %s %s' % (picture, aug_dataset, item)) 45 | 46 | update_dict(face_counter[aug_dataset], picture) 47 | update_dict(face_counter_total, picture) 48 | update_dict(hand_counter_total, aug_dataset+item) 49 | 50 | # Statistics 51 | font = {'family':'normal', 'weight':'normal', 'size':13} 52 | plt.rc('font', **font) 53 | fig_size = 3 54 | subplots = len(face_counter) + 1 55 | fig, axs = plt.subplots(1, subplots, figsize=(subplots * fig_size, fig_size), sharey=True) 56 | titles = ['HandOverFace', 'Hand2Face'] 57 | usage_histogram_total = dict() 58 | 59 | for idx, d in enumerate(face_counter.keys()): 60 | 61 | print('Total used faces with %s: %d' % (str(d), len(face_counter[d]))) 62 | 63 | usage_histogram = dict() 64 | 65 | for f in face_counter[d].keys(): 66 | 67 | usages = face_counter[d][f] 68 | 69 | update_dict(usage_histogram, usages) 70 | update_dict(usage_histogram_total, usages) 71 | 72 | print('Usage histogram with %s:' % str(d)) 73 | print([(u,usage_histogram[u]) for u in sorted(usage_histogram.keys())]) 74 | 75 | # Plot 76 | axs[idx].bar(list(usage_histogram.keys()), list(usage_histogram.values())) 77 | axs[idx].set_title(titles[idx]) 78 | axs[idx].set_xticks(np.arange(1, np.max(np.array(list(usage_histogram.keys())))+1, step=1)) 79 | 80 | print('Total used faces: %d' % len(face_counter_total)) 81 | print('Total used hands: %d' % len(hand_counter_total)) 82 | 83 | axs[0].set_ylabel('№ hand-augmented faces') 84 | axs[-1].bar(list(usage_histogram_total.keys()), list(usage_histogram_total.values())) 85 | axs[-1].set_xticks(np.arange(1, np.max(np.array(list(usage_histogram_total.keys())))+1, step=1)) 86 | axs[-1].set_title('All') 87 | 88 | for a in axs: 89 | a.set_xlabel('№ different hands') 90 | 91 | 92 | plt.show() -------------------------------------------------------------------------------- /scripts/elfw-dataset_comparisons.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import cv2 4 | import os 5 | 6 | def get_masks(img, old): 7 | 8 | b = img[:, :, 0] 9 | g = img[:, :, 1] 10 | r = img[:, :, 2] 11 | 12 | # the background mask of the original labels 13 | if old: 14 | background_mask = (r == 0) & (g == 0) & (b == 255) 15 | else: 16 | background_mask = (r == 0) & (g == 0) & (b == 0) 17 | 18 | skin_mask = (r == 0) & (g == 255) & (b == 0) 19 | hair_mask = (r == 255) & (g == 0) & (b == 0) 20 | 21 | return background_mask, skin_mask, hair_mask 22 | 23 | base_path = "/media/jaume.gibert/Data/elfw" 24 | images = os.path.join(base_path, "elfw_01_basic/faces") 25 | labels_1 = os.path.join(base_path, "parts_lfw_funneled_gt_images") 26 | labels_2 = os.path.join(base_path, "elfw_01_basic/labels") 27 | output = os.path.join(base_path, "comparisons") 28 | out_improvements = os.path.join(output, "relabelled") 29 | out_new = os.path.join(output, "new_labels") 30 | 31 | if not os.path.exists(output): 32 | os.mkdir(output) 33 | if not os.path.exists(out_improvements): 34 | os.mkdir(out_improvements) 35 | if not os.path.exists(out_new): 36 | os.mkdir(out_new) 37 | 38 | for image in os.listdir(images): 39 | 40 | base_name = os.path.splitext(image)[0] 41 | print(base_name) 42 | 43 | # Face image 44 | img_file = os.path.join(images, image) 45 | lab_1_file = os.path.join(labels_1, base_name + '.ppm') 46 | lab_2_file = os.path.join(labels_2, base_name + '.png') 47 | 48 | # If all three exist, it means that we either didn't touch the labels or we improved it 49 | # Save into improvements 50 | # TODO: check equal old and new labels and only save the improved ones 51 | if os.path.exists(img_file) and os.path.exists(lab_1_file) and os.path.exists(lab_2_file): 52 | img = cv2.imread(img_file) 53 | lab_1 = cv2.imread(lab_1_file) 54 | lab_2 = cv2.imread(lab_2_file) 55 | 56 | # I take the original labels (lab_1), convert the blue (background into black) 57 | # and check if it's the same as in the new label image (lab_2) 58 | # if so, I don't want it, I want the other cases. 59 | b1, s1, h1 = get_masks(lab_1, old=True) 60 | b2, s2, h2 = get_masks(lab_2, old=False) 61 | 62 | if not (np.array_equal(b1, b2) and np.array_equal(s1, s2) and np.array_equal(h1, h2)): 63 | result = np.vstack((img, np.vstack((lab_2, lab_1)))) 64 | cv2.imwrite(os.path.join(out_improvements, base_name+".png"), result) 65 | 66 | # if only the image and our labels exist, it means the image-label is new and didn't exist in LFW 67 | elif os.path.exists(img_file) and not os.path.exists(lab_1_file) and os.path.exists(lab_2_file): 68 | 69 | img = cv2.imread(img_file) 70 | lab_2 = cv2.imread(lab_2_file) 71 | result = np.vstack((img, lab_2)) 72 | cv2.imwrite(os.path.join(out_new, base_name+".png"), result) 73 | 74 | else: 75 | continue 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /scripts/elfw-separateSunglasses4Validation.py: -------------------------------------------------------------------------------- 1 | # This code lists all images that have sunglasses labelled in the Labeled Faces in the Wild dataset 2 | # J.Gibert based on code by R.Redondo, Eurecat 2019 (c). 3 | 4 | import numpy as np 5 | import sys 6 | import cv2 7 | import os 8 | import fnmatch 9 | import imutils 10 | from random import randint 11 | from random import random 12 | from random import shuffle 13 | 14 | label_colors = [ 15 | ( 0, 0, 0), # black - background 16 | ( 0,255, 0), # green - skin 17 | ( 0, 0,255), # red - hair 18 | (255,255, 0), # light blue - beard-mustache 19 | (255, 0, 0), # blue - sunglasses 20 | (255, 0,255), # pink - wearable 21 | ( 0,255,255)] # yellow - mouth-mask 22 | 23 | label_names = [ 24 | "background", 25 | "skin", 26 | "hair", 27 | "beard-mustache", 28 | "sunglasses", 29 | "wearable", 30 | "mouth-mask"] 31 | 32 | class bcolors: 33 | PURPLE = '\033[95m' 34 | BLUE = '\033[94m' 35 | GREEN = '\033[92m' 36 | YELLOW = '\033[93m' 37 | RED = '\033[91m' 38 | CYAN = '\033[96m' 39 | ENDC = '\033[0m' 40 | BOLD = '\033[1m' 41 | CYAN = '\033[96m' 42 | 43 | 44 | #---------------------------------------------------------------------------------------------------- 45 | 46 | if len(sys.argv) != 3: 47 | print("Usage: $ elfw-separateSunglasses4Validation.py ") 48 | exit(0) 49 | 50 | labels_folder = sys.argv[1] 51 | output_file_folder = sys.argv[2] 52 | 53 | #----------------------------------------------------------------------------------------------------- 54 | # For each image, check if it has sunglasses and print its name out in a file 55 | 56 | N = len(os.listdir(labels_folder)) 57 | count = 0 58 | 59 | f_with = open(os.path.join(output_file_folder,"with_sunglasses.txt"), 'w') 60 | f_without = open(os.path.join(output_file_folder,"without_sunglasses.txt"), 'w') 61 | 62 | for n, face_file in enumerate(os.listdir(labels_folder)): 63 | 64 | base_name = os.path.splitext(face_file)[0] 65 | 66 | # Print the image number and name 67 | if not n: 68 | sys.stdout.flush() 69 | print("") 70 | sys.stdout.write('\x1b[1A') 71 | sys.stdout.write('\x1b[2K') 72 | print(bcolors.BLUE + "["+ str(n).zfill(4) +"/"+ str(N) +"] " + base_name + bcolors.ENDC) 73 | 74 | # Load labels image 75 | labels = cv2.imread(os.path.join(labels_folder, base_name+'.png')) 76 | 77 | # Build up a mask for the sunglasses class 78 | sunglasses_color = label_colors[4] 79 | mask = np.ones((labels.shape[0],labels.shape[1])) 80 | for c in [0,1,2]: 81 | mask_c = np.zeros((labels.shape[0],labels.shape[1])) 82 | index = (labels[:,:,c] == sunglasses_color[c]) 83 | mask_c[index] = 1 84 | mask = mask * mask_c 85 | 86 | if np.sum(mask)>0: 87 | #print(bcolors.BLUE + "Already has sunglasses: " + base_name + bcolors.ENDC) 88 | count +=1 89 | f_with.write(base_name+"\n") 90 | cv2.imwrite(os.path.join('/media/jaume.gibert/Data/elfw/elfw_01_basic', 'with_sunglasses', base_name+'.png'), labels) 91 | else: 92 | f_without.write(base_name+"\n") 93 | 94 | f_with.close() 95 | f_without.close() 96 | 97 | print("\n" + bcolors.RED + "Total number of files .... " + bcolors.ENDC + str(N)) 98 | print( bcolors.BOLD + "With sunglasses .......... " + bcolors.ENDC + str(count)) 99 | print("\n") 100 | 101 | -------------------------------------------------------------------------------- /scripts/elfw-inspector.py: -------------------------------------------------------------------------------- 1 | # This code visualizes superpixels of the Labeled Faces in the Wild dataset. 2 | # R. Redondo, Eurecat 2019 (c). 3 | 4 | import numpy as np 5 | import sys 6 | import cv2 7 | import os 8 | 9 | if len(sys.argv) != 4: 10 | print("Usage: $ elfw-inspector ") 11 | exit(0) 12 | 13 | faces_folder = sys.argv[1] 14 | sp_folder = sys.argv[2] 15 | output_folder = sys.argv[3] 16 | 17 | # faces_folder = '../Datasets/lfw-deepfunneled/' 18 | # sp_folder = '../Datasets/lfw-deepfunneled-sp/' 19 | # output_folder = '../Datasets/lfw-deepfunneled-sp-overlay/' 20 | 21 | if not os.path.exists(output_folder): 22 | os.mkdir(output_folder) 23 | 24 | for person in os.listdir(faces_folder): 25 | 26 | face_path = os.path.join(faces_folder, person) 27 | 28 | if not os.path.isdir(face_path): 29 | continue 30 | 31 | sp_path = os.path.join(sp_folder, person) 32 | 33 | if not os.path.isdir(sp_path): 34 | continue 35 | 36 | for face_file in os.listdir(face_path): 37 | 38 | if not face_file.endswith(".jpg"): 39 | continue 40 | 41 | print('Processing file ' + face_file) 42 | 43 | name = os.path.splitext(face_file)[0] 44 | sp_file = os.path.join(sp_path, name + '.dat') 45 | 46 | if not os.path.exists( sp_file ): 47 | print('\033[1m' + 'Superpixels not found in ' + sp_file + '\033[0m') 48 | continue 49 | 50 | # Face image 51 | image = cv2.imread(os.path.join(face_path, face_file)) 52 | 53 | # Superpixels 54 | sp = np.fromfile(sp_file, dtype=int, count=-1, sep=' ') 55 | sp = np.array(sp, dtype=np.uint8) 56 | sp = np.reshape(sp, (250, -1)) 57 | 58 | # Superpixels bounds 59 | bounds = np.zeros(sp.shape) 60 | h, w = bounds.shape 61 | for y in range(0, h): 62 | for x in range(0, w): 63 | if y > 0: 64 | if sp[y, x] != sp[y-1, x ]: 65 | bounds[y,x] = 255; 66 | continue 67 | if y < h-1: 68 | if sp[y, x] != sp[y+1, x ]: 69 | bounds[y,x] = 255; 70 | continue 71 | if y < h-1 and x > 0: 72 | if sp[y, x] != sp[y+1, x-1]: 73 | bounds[y,x] = 255; 74 | continue 75 | if y < h-1 and x < w-1: 76 | if sp[y, x] != sp[y+1, x+1]: 77 | bounds[y,x] = 255; 78 | continue 79 | if y > 0 and x > 0: 80 | if sp[y, x] != sp[y-1, x-1]: 81 | bounds[y,x] = 255; 82 | continue 83 | if y > 0 and x < w-1: 84 | if sp[y, x] != sp[y-1, x+1]: 85 | bounds[y,x] = 255; 86 | continue 87 | if x > 0: 88 | if sp[y, x] != sp[y , x-1]: 89 | bounds[y,x] = 255; 90 | continue 91 | if x < w-1: 92 | if sp[y, x] != sp[y , x+1]: 93 | bounds[y,x] = 255; 94 | continue 95 | 96 | # Erode 97 | kernel = np.ones((2,2),np.uint8) 98 | bounds = cv2.erode(bounds, kernel, iterations = 1) 99 | 100 | # Visualization 101 | sp = cv2.cvtColor(sp, cv2.COLOR_GRAY2RGB) 102 | b,g,r = cv2.split(image) 103 | r[bounds > 0] = r[bounds > 0] * 0.2 + 255 * 0.8; 104 | bounds = cv2.merge((b,g,r)) 105 | vis = np.concatenate((image, sp), axis=1) 106 | vis = np.concatenate((vis, bounds), axis=1) 107 | 108 | # Save output 109 | sp_overlay_file = os.path.join(output_folder, name + '.jpg') 110 | cv2.imwrite(sp_overlay_file, bounds) 111 | 112 | # Show output 113 | # cv2.imshow(face_file, vis) 114 | # if cv2.waitKey(0) & 0xFF == 27: 115 | # exit(0) 116 | # cv2.destroyWindow(face_file) 117 | 118 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import sys, os, time 2 | 3 | # Rafael Redondo, Jaume Gibert - Eurecat (c) 2019 4 | 5 | class bcolors: 6 | PURPLE = '\033[95m' 7 | BLUE = '\033[94m' 8 | GREEN = '\033[92m' 9 | YELLOW = '\033[93m' 10 | RED = '\033[91m' 11 | CYAN = '\033[96m' 12 | ENDC = '\033[0m' 13 | BOLD = '\033[1m' 14 | CYAN = '\033[96m' 15 | 16 | class AverageMeter(object): 17 | def __init__(self,offset=0): 18 | self.reset(offset) 19 | 20 | def reset(self, offset=0): 21 | self.val = 0 22 | self.avg = 0 23 | self.sum = offset 24 | self.count = 0 25 | self.offset = offset 26 | 27 | def update(self, val, n=1): 28 | self.val = val 29 | self.sum += self.val * n 30 | self.count += n 31 | self.avg = (self.sum - self.offset) / (self.count + 1E-3) 32 | 33 | def check_mkdir(dir_name): 34 | if not os.path.exists(dir_name): 35 | os.mkdir(dir_name) 36 | return False 37 | else: 38 | return True 39 | 40 | class AverageConsole(object): 41 | def __init__(self,split,iters): 42 | self.split = split 43 | self.max_iters = iters 44 | self.meter = AverageMeter(time_millis()) 45 | self.snapshot = 0 46 | 47 | def snap(self): 48 | self.snapshot = time_millis() - self.meter.sum 49 | 50 | def updateprint(self,i): 51 | self.meter.update(time_millis() - self.meter.sum) 52 | 53 | p = (i+1) * 100 / self.max_iters 54 | t_load = self.snapshot 55 | t_run = self.meter.val - self.snapshot 56 | total = (self.meter.sum - self.meter.offset ) / 1000 57 | 58 | if not i: 59 | sys.stdout.flush() 60 | print('') 61 | sys.stdout.write('\x1b[1A') 62 | sys.stdout.write('\x1b[2K') 63 | 64 | msg = self.split.ljust(5) + ' [' + str(int(p)).rjust(3) + '%,' + str(int(total)).rjust(4) + 'sec]' + \ 65 | ' < ' + str(i).rjust(4) + '-it' + \ 66 | ' (load: ' + str(int(t_load)) + 'ms, run: ' + str(int(t_run)) + 'ms, avg: ' + str(int(self.meter.avg)) + 'ms)' 67 | print(msg) 68 | 69 | def time_millis(): 70 | return int(round(time.time() * 1000)) 71 | 72 | class Logger(object): 73 | def __init__(self,file): 74 | self.terminal = sys.stdout 75 | self.log = open(file, "a") 76 | 77 | def __del__(self): 78 | sys.stdout = self.terminal 79 | self.log.close() 80 | 81 | def write(self, message): 82 | self.terminal.write(message) 83 | self.log.write(message) 84 | 85 | def flush(self): 86 | #this flush method is needed for python 3 compatibility. 87 | #this handles the flush command by doing nothing. 88 | #you might want to specify some extra behavior here. 89 | pass 90 | 91 | class EarlyStop(object): 92 | def __init__(self, patience, aim='minimum'): 93 | self.patience = patience 94 | self.counter = 0 95 | self.aim = aim 96 | 97 | # aiming a minimum use a high value as init, low value otherwise 98 | if self.aim == 'minimum': 99 | self.best_score = 1E10 100 | else: 101 | self.best_score = -1E10 102 | 103 | def step(self, score): 104 | 105 | should_stop = score < self.best_score 106 | if self.aim == 'minimum': 107 | should_stop = not should_stop 108 | 109 | if should_stop: 110 | self.counter += 1 111 | else: 112 | self.counter = 0 113 | self.best_score = score 114 | 115 | return self.patience <= self.counter -------------------------------------------------------------------------------- /scripts/elfw-counter2.py: -------------------------------------------------------------------------------- 1 | # This code calculates appearance frequencies and area occupation of the ELFW classes. 2 | # R. Redondo (c) Eurecat 2019 3 | 4 | import os 5 | import numpy as np 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | 9 | path = '../Datasets/lfw-bagsoffaces/elfw/elfw_01_basic/labels' 10 | 11 | if not os.path.exists(path): 12 | print('Input path not found ' + path) 13 | exit(-1) 14 | 15 | label_colors = [ 16 | (0, 0, 0), 17 | (0, 255, 0), 18 | (255, 0, 0), 19 | (0, 255, 255), 20 | (0, 0, 255), 21 | (255, 0, 255)] 22 | # (255, 255, 0)] 23 | 24 | label_names = [ 25 | "bkgnd", 26 | "skin", 27 | "hair", 28 | "beard", 29 | "sunglasses", 30 | "wearable"] 31 | # "mask"] 32 | 33 | def set_fontsize(type): 34 | 35 | if type == 'normal': 36 | size = 9 37 | elif type == 'small': 38 | size = 7 39 | 40 | font = {'family': 'normal', 'weight': 'normal', 'size': size} 41 | plt.rc('font', **font) 42 | 43 | def rotate_ticks(axis, degrees): 44 | for tick in axis.get_xticklabels(): 45 | tick.set_rotation(degrees) 46 | 47 | def format_plot(axis, title, mean, std=None): 48 | 49 | set_fontsize('normal') 50 | axis.set_title(title) 51 | axis.yaxis.grid(b=True, linestyle='--') 52 | rotate_ticks(axis, 25) 53 | set_fontsize('small') 54 | 55 | for i, m in enumerate(mean): 56 | label = '{:0.2f}'.format(m) 57 | label_size = len(label) 58 | y = m 59 | if std is not None: 60 | s = std[i] 61 | label_std = '(±' + '{:0.3f})'.format(s) 62 | label_size = len(label_std) 63 | label = ' ' + label + '\n' + label_std 64 | y += s + 4E-2 65 | 66 | x = i - label_size * 6E-2 67 | y += 1E-2 68 | axis.text(x, y, label, color='black') 69 | 70 | set_fontsize('normal') 71 | 72 | # -------------------------------------------------------------------------------- 73 | # Calculate class contributions 74 | 75 | files = os.listdir(path) 76 | class_contributions = [ [] for l in range(len(label_names)) ] 77 | num_extended_faces = 0 78 | 79 | print("This will take a while...") 80 | 81 | for file in files: 82 | 83 | file_path = os.path.join(path, file) 84 | image = cv2.imread(file_path) 85 | # print(file_path) 86 | 87 | image_size = image.shape 88 | b = image[:, :, 0] 89 | g = image[:, :, 1] 90 | r = image[:, :, 2] 91 | 92 | extended = False 93 | 94 | for c in range(0, len(label_colors)): 95 | 96 | mask = (r == label_colors[c][0]) & (g == label_colors[c][1]) & (b == label_colors[c][2]) 97 | pixels = np.sum(mask) 98 | 99 | if pixels: 100 | class_contributions[c].append(pixels) 101 | if c > 2: 102 | extended = True 103 | 104 | if extended: 105 | num_extended_faces += 1 106 | 107 | # Plot statistics 108 | 109 | fig_size = 3 110 | fig, axs = plt.subplots(1, 2, figsize=(2 * fig_size, fig_size), sharey=True) 111 | label_colors = np.array(label_colors) / 255 112 | # set_fontsize('normal') 113 | 114 | class_frequencies = np.array([len(c) for c in class_contributions]) / float(len(files)) 115 | axs[0].bar(list(label_names), list(class_frequencies), color=label_colors, alpha=0.8) 116 | format_plot(axs[0], 'Normalized Appearance Frequency', class_frequencies) 117 | 118 | size = float( image_size[0] * image_size[1] ) 119 | class_mean = np.array([np.mean(np.array(c)) for c in class_contributions]) / size 120 | class_std = np.array([np.std(np.array(c)) for c in class_contributions]) / size 121 | 122 | axs[1].bar(list(label_names), list(class_mean), yerr=list(class_std), color=label_colors, ecolor='black', capsize=5, alpha=0.8) 123 | format_plot(axs[1], 'Normalized Area Occupation', class_mean, class_std) 124 | 125 | print('Number of faces with at least 1 extended category: %d' % num_extended_faces) 126 | 127 | plt.tight_layout() 128 | # plt.savefig('myplot.eps') 129 | print("Done.") 130 | plt.show() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Extended Labeled Faces In-The-Wild (ELFW) 2 | 3 | Development code on face semantic segmentation for Extended Labeled Faces In-The-Wild (ELFW). 4 | Dataset and further details at the [Project Site](https://multimedia-eurecat.github.io/2020/06/22/extended-faces-in-the-wild.html). 5 | 6 | ![teaser](assets/teaser.jpg) 7 | 8 | >**Examples from the ELFW dataset**. Original LFW categories *background*, *skin*, and *hair*, new categories *beard-mustache*, *sunglasses*, *head-wearable*, and exclusively synthetic *mouth-mask*. (left) Re-labeled faces with manual refinement compared to the original LFW labels in blue background, (right-top) faces previously unlabeled in LFW, and (right-bottom) synthetic object augmentation with sunglasses, mouth-masks, and occluding hands. 9 | 10 | 11 | ## What is this file for 12 | 13 | - `run_trainer.py`: main file to be run for training (see below). 14 | - `trainer.py`: the trainer, i.e. SGD, scheduler, epochs, loss, and all deep learning artillery. 15 | - `models.py`: the NN architectures, namely FCN, DeeplabV3, and GCN. 16 | - `elfw.py`: the dataloader and label conversion utilities for the ELFW dataset. 17 | - `transform.py`: image transformations (scaling, flips, relabeling,...) for data augmentation. 18 | - `metrics.py`: useful compendium of metrics including pixel accuracy, mean accuracy, mean IoU, frequency weighted, and Mean F1-Score. 19 | - `utils.py`: some utilities for console output, time metering, or early-stopper. 20 | - `visualize.py`: handy visdom class for performance visualization on web navigator. 21 | - `tester.py`: use this file for segmenting an image of your own once having a trained model. 22 | 23 | 24 | ## How to train 25 | 26 | Training settings are described in `run_trainer.py`. Some arguments are called via console, while other hyperparameters are fixed. See `list_experiments.sh` for an exhaustive list of experiments carried out during the project. 27 | 28 | ```python 29 | max_epochs = 301 # Maximum number of epochs 30 | lr = 1E-3 # Learning rate 31 | lr_decay = 0.2 # Learning rate decay factor 32 | w_decay = 5E-4 # Weight decay, typically [5e-4] 33 | momentum = 0.99 # Momentum, typically [0.9-0.99] 34 | lr_milestones = [35,90,180] # lr milestones for a multistep lr scheduler 35 | augment = True # random transformations for data augmentation 36 | gcn_levels = 3 # Number of GCN levels, typically 3 for 256x256 and 4 for 512x512 image sizes 37 | ``` 38 | 39 | Trained models not at disposal for the moment. 40 | 41 | #### Main Frameworks 42 | 43 | - `Python 3.5`, `PyTorch 1.1.0`, `TorchVision 0.3.0`, `Visdom 0.1.8`, `PIL 6.2.1`. 44 | - Data augmentation: `OpenCV 3.1.0` and `Dlib`. 45 | 46 | 47 | 48 | ## Included Networks for Semantic Segmentation 49 | 50 | - **FCN**: Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431–3440 (2015). 51 | 52 | - **DeeplabV3**: Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE transactions on pattern analysis and machine intelligence 40(4), 834–848 (2018). 53 | 54 | - **GCN**: Peng, C., Zhang, X., Yu, G., Luo, G., & Sun, J. (2017). Large kernel matters--improve semantic segmentation by global convolutional network. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4353-4361). [ *Not considered for the paper* ] 55 | 56 | 57 | ## Jupyter demos 58 | 59 | The folder `./demos` contains code snippets to test the GCN model over the webcam. Have a look to [Jupyter Notebooks](https://jupyter.org/) or [Colab](https://colab.research.google.com/). 60 | 61 | 62 | ## Handy scripting toolbox 63 | 64 | The folder `./scripts` contains some useful code tools for labeling and processing the dataset. Main files are: 65 | 66 | - `computeClassWeights.py`: computes weight for class balancing over the training loss. 67 | - `elfw-makeThemWearMasks.py`: overlays synthetic masks (must provide) on face images. 68 | - `elfw-makeThemWearSunglasses.py`: same for sunglasses. 69 | - `elfw-putYourHandsOnMeWithDlib.py`: same for hands based on Dlib. 70 | - `elfw-scribbleMe.py`: tool for label annotation by filling superpixels on mouse scribbling. 71 | - `elfw-refineMe.py`: tool for refining the annotated segments. 72 | 73 | 74 | ## BibTeX Citation 75 | @article{redondo2020extended, 76 | title={Extended labeled faces in-the-wild (elfw): Augmenting classes for face segmentation}, 77 | author={Redondo, Rafael and Gibert, Jaume}, 78 | journal={arXiv preprint arXiv:2006.13980}, 79 | year={2020} 80 | } 81 | --- 82 | Rafael Redondo and Jaume Gibert (c) 2019-20 Eurecat 83 | -------------------------------------------------------------------------------- /transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from PIL import Image 4 | import collections 5 | 6 | 7 | class Scale(object): 8 | def __init__(self, size, interpolation=Image.BILINEAR): 9 | assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2) 10 | self.size = size 11 | self.interpolation = interpolation 12 | 13 | def __call__(self, img): 14 | if isinstance(self.size, int): 15 | w, h = img.size 16 | if (w <= h and w == self.size) or (h <= w and h == self.size): 17 | return img 18 | if w < h: 19 | ow = self.size 20 | oh = int(self.size * h / w) 21 | return img.resize((ow, oh), self.interpolation) 22 | else: 23 | oh = self.size 24 | ow = int(self.size * w / h) 25 | return img.resize((ow, oh), self.interpolation) 26 | else: 27 | return img.resize(self.size, self.interpolation) 28 | 29 | 30 | class ToParallel(object): 31 | def __init__(self, transforms): 32 | self.transforms = transforms 33 | 34 | def __call__(self, img): 35 | yield img 36 | for t in self.transforms: 37 | yield t(img) 38 | 39 | 40 | class ToLabel(object): 41 | def __call__(self, inputs): 42 | tensors = [] 43 | for i in inputs: 44 | tensors.append(torch.from_numpy(np.array(i)).long()) 45 | return tensors 46 | 47 | 48 | class ReLabel(object): 49 | def __init__(self, olabel, nlabel): 50 | self.olabel = olabel 51 | self.nlabel = nlabel 52 | 53 | def __call__(self, inputs): 54 | # assert isinstance(input, torch.LongTensor), 'tensor needs to be LongTensor' 55 | for i in inputs: 56 | i[i == self.olabel] = self.nlabel 57 | return inputs 58 | 59 | class ToSP(object): 60 | def __init__(self, size): 61 | self.scale2 = Scale(size/2, Image.NEAREST) 62 | self.scale4 = Scale(size/4, Image.NEAREST) 63 | self.scale8 = Scale(size/8, Image.NEAREST) 64 | self.scale16 = Scale(size/16, Image.NEAREST) 65 | self.scale32 = Scale(size/32, Image.NEAREST) 66 | 67 | def __call__(self, input): 68 | input2 = self.scale2(input) 69 | input4 = self.scale4(input) 70 | input8 = self.scale8(input) 71 | input16 = self.scale16(input) 72 | input32 = self.scale32(input) 73 | inputs = [input, input2, input4, input8, input16, input32] 74 | # inputs = [input] 75 | 76 | return inputs 77 | 78 | class HorizontalFlip(object): 79 | """Horizontally flips the given PIL.Image with a probability of 0.5.""" 80 | 81 | def __call__(self, img): 82 | return img.transpose(Image.FLIP_LEFT_RIGHT) 83 | 84 | 85 | class VerticalFlip(object): 86 | def __call__(self, img): 87 | return img.transpose(Image.FLIP_TOP_BOTTOM) 88 | 89 | def uint82bin(n, count=8): 90 | """returns the binary of integer n, count refers to amount of bits""" 91 | return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)]) 92 | 93 | def labelcolormap(N): 94 | cmap = np.zeros((N, 3), dtype=np.uint8) 95 | for i in range(N): 96 | r = 0 97 | g = 0 98 | b = 0 99 | id = i 100 | for j in range(7): 101 | str_id = uint82bin(id) 102 | r = r ^ (np.uint8(str_id[-1]) << (7-j)) 103 | g = g ^ (np.uint8(str_id[-2]) << (7-j)) 104 | b = b ^ (np.uint8(str_id[-3]) << (7-j)) 105 | id = id >> 3 106 | cmap[i, 0] = r 107 | cmap[i, 1] = g 108 | cmap[i, 2] = b 109 | return cmap 110 | 111 | def colormap(n): 112 | cmap = np.zeros([n, 3]).astype(np.uint8) 113 | 114 | for i in np.arange(n): 115 | r, g, b = np.zeros(3) 116 | 117 | for j in np.arange(8): 118 | r = r + (1 << (7-j))*((i & (1 << (3*j))) >> (3*j)) 119 | g = g + (1 << (7-j))*((i & (1 << (3*j+1))) >> (3*j+1)) 120 | b = b + (1 << (7-j))*((i & (1 << (3*j+2))) >> (3*j+2)) 121 | 122 | cmap[i, :] = np.array([r, g, b]) 123 | 124 | return cmap 125 | 126 | 127 | class Colorize(object): 128 | def __init__(self, n=22): 129 | self.cmap = labelcolormap(22) 130 | self.cmap = torch.from_numpy(self.cmap[:n]) 131 | 132 | def __call__(self, gray_image): 133 | size = gray_image.size() 134 | color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0) 135 | 136 | for label in range(0, len(self.cmap)): 137 | mask = (label == gray_image[0]).cpu() 138 | color_image[0][mask] = self.cmap[label][0] 139 | color_image[1][mask] = self.cmap[label][1] 140 | color_image[2][mask] = self.cmap[label][2] 141 | 142 | return color_image 143 | -------------------------------------------------------------------------------- /tester.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | import numpy as np 3 | from PIL import Image 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | from models import GCN, ResnetFCN, DeepLabV3 8 | from utils import check_mkdir, bcolors 9 | from elfw import ELFWDataSet 10 | 11 | # Model type: fcn, gcn, or deeplab 12 | model_type = 'fcn' 13 | 14 | # I/O 15 | checkpoints_folder = "../checkpoints" 16 | input_folder = "../Datasets/elfw/elfw_01_basic/faces" 17 | output_folder = "../deploy" 18 | 19 | # Input From File 20 | f = open("../Datasets/elfw/elfw_01_basic/elfw_set_00.txt", "r") 21 | 22 | test_names = [] 23 | for line in f: 24 | # for some reason it's also loading the \n at the end of each line 25 | if line[-1:] == '\n': 26 | test_names.append(line[:-1]) 27 | else: 28 | test_names.append(line) 29 | 30 | # ----------------------------------------------------------------- 31 | 32 | # Dataset: just wanting some configuration params 33 | dataset = ELFWDataSet(split='test',excluded_classes=[6]) 34 | 35 | # ----------------------------------------------------------------- 36 | # Model loading 37 | 38 | if model_type == "fcn": 39 | model = torch.nn.DataParallel(ResnetFCN(dataset.num_classes)) 40 | elif model_type == "gcn": 41 | gcn_levels = 3 42 | model = torch.nn.DataParallel(GCN(dataset.num_classes, gcn_levels)) 43 | elif model_type == "deeplab": 44 | model = torch.nn.DataParallel(DeepLabV3(dataset.num_classes)) 45 | else: 46 | print('Model type not found.') 47 | exit(-1) 48 | 49 | # ----------------------------------------------------------------- 50 | 51 | def main(): 52 | 53 | print(bcolors.RED + "Checkpoints folder: " + checkpoints_folder + bcolors.ENDC) 54 | print(bcolors.YELLOW + "Deploy folder: " + output_folder + bcolors.ENDC) 55 | 56 | check_mkdir(output_folder) 57 | 58 | # List all checkpoint files in the folder 59 | checkpoints_files = os.listdir(checkpoints_folder) 60 | 61 | # Make predictions for all checkpoints in the deploy folder 62 | for checkpoint in checkpoints_files: 63 | 64 | print(bcolors.GREEN + " >> Deploying with " + checkpoint + "..." + bcolors.ENDC) 65 | checkpoint_filename = os.path.join(checkpoints_folder, checkpoint) 66 | 67 | model.load_state_dict(torch.load(checkpoint_filename)) 68 | model.cuda() 69 | model.eval() 70 | 71 | # ----------------------------------------------------------------- 72 | # Pass forward 73 | 74 | with torch.no_grad(): 75 | 76 | for i, image_name in enumerate(test_names): 77 | 78 | image_name_path = os.path.join(input_folder, image_name + ".jpg") 79 | image = Image.open(image_name_path).convert("RGB") 80 | img = dataset.img_transform(image) 81 | img = Variable(img).cuda().unsqueeze(0) 82 | scores = model(img) # first image in the batch 83 | label_probs = F.log_softmax(scores[0], dim=0).cpu().detach().numpy() 84 | 85 | # ----------------------------------------------------------------- 86 | # Composite 87 | 88 | # a = 0.3 # the smaller the more intense the blending is (more greenish) 89 | # composite = np.array(image) 90 | rgb = np.zeros((dataset.target_size, dataset.target_size, 3)) 91 | labels = np.argmax(label_probs, axis=0) 92 | 93 | for l in range(len(label_probs)): 94 | indexes = labels == l 95 | for c in range(3): 96 | rgb[:, :, c][indexes] = dataset.label_colors[l][c] 97 | # composite[:, :, c][indexes] = (1 - label_probs[l][indexes]) * composite[:, :, c][indexes] + (a * composite[:, :, c][indexes] + (1 - a) * label_colors[l][c]) * label_probs[l][indexes] 98 | 99 | # ----------------------------------------------------------------- 100 | # Save 101 | 102 | comp = Image.fromarray(rgb.astype('uint8')) 103 | output_file = os.path.join(output_folder, image_name + "_" + checkpoint[:-4] + ".png") 104 | comp.save(output_file) 105 | 106 | # ----------------------------------------------------------------- 107 | # Console output 108 | 109 | if i == 0: 110 | sys.stdout.flush() 111 | print('') 112 | sys.stdout.write('\x1b[1A') 113 | sys.stdout.write('\x1b[2K') 114 | 115 | print((bcolors.BLUE + " --- [%d / %d] Deployed image " + output_file + bcolors.ENDC) % (i, len(test_names))) 116 | 117 | sys.stdout.write('\x1b[1A') 118 | sys.stdout.write('\x1b[2K') 119 | print((bcolors.GREEN+" --- DONE!"+bcolors.ENDC)) 120 | 121 | if __name__ == '__main__': 122 | main() 123 | -------------------------------------------------------------------------------- /scripts/elfw-refineMe.py: -------------------------------------------------------------------------------- 1 | # This code fills superpixels by scribbling over the image with a given labeled color. 2 | # It requires all jpg faces stored in the same folder and the .dat super-pixels in the same LFW format. 3 | # R. Redondo, Eurecat 2019 (c). 4 | 5 | import numpy as np 6 | import operator 7 | import cv2 8 | import os 9 | import sys 10 | 11 | resize = 3 12 | pointer = (-1,-1) 13 | isDrawing = False 14 | radius = 10 15 | category = 1 16 | show_original = False 17 | 18 | label_colors = [ 19 | ( 0, 0, 0), 20 | ( 0,255, 0), 21 | ( 0, 0,255), 22 | (255,255, 0), 23 | (255, 0, 0), 24 | (255, 0,255)] 25 | 26 | label_names = [ 27 | "eraser", 28 | "skin", 29 | "hair", 30 | "beard-mustache", 31 | "sunglasses", 32 | "wearable"] 33 | 34 | def onClick(event,x,y,flags,param): 35 | 36 | global isDrawing, mode, radius, category, super_scribbles, pointer 37 | 38 | pointer = (int(x/resize), int(y/resize)) 39 | 40 | if event == cv2.EVENT_LBUTTONDOWN: 41 | 42 | isDrawing = True 43 | 44 | elif event == cv2.EVENT_LBUTTONUP: 45 | 46 | isDrawing = False 47 | 48 | if isDrawing and (event == cv2.EVENT_LBUTTONDOWN or event == cv2.EVENT_MOUSEMOVE): 49 | 50 | cv2.circle(labels, pointer, radius, label_colors[category], -1) 51 | 52 | # --------------------------------------------------------------------------------------- 53 | 54 | if len(sys.argv) < 3 or len(sys.argv) > 4: 55 | print("Usage: $ elfw-refineMe folder_per_person [optional boolean]") 56 | exit(0) 57 | 58 | # faces_folder = '../Datasets/lfw-deepfunneled/' 59 | # labels_folder = '../Datasets/elfw/elfw_01_basic/labels' 60 | 61 | faces_folder = sys.argv[1] 62 | labels_folder = sys.argv[2] 63 | 64 | folder_per_person = False 65 | if (len(sys.argv) == 4): 66 | folder_per_person = sys.argv[3] in '1tTrueYesy' 67 | 68 | for labels_file in sorted(os.listdir(labels_folder)): 69 | 70 | if not labels_file.endswith(".png"): 71 | continue 72 | 73 | file_name = os.path.splitext(labels_file)[0] 74 | person_name = file_name[:-5] 75 | labels = cv2.imread(os.path.join(labels_folder, labels_file)) 76 | 77 | if folder_per_person: 78 | path_name = os.path.join(os.path.join(faces_folder, person_name), file_name + '.jpg') 79 | else: 80 | path_name = os.path.join(faces_folder, file_name + '.jpg') 81 | 82 | if not os.path.exists(path_name): 83 | print('File not found: %s' % path_name) 84 | continue 85 | 86 | face = cv2.imread(path_name) 87 | 88 | print('Editing ' + '\033[1m' + labels_file + '\033[0m' + "...") 89 | 90 | # Mouse events callback 91 | cv2.namedWindow(file_name) 92 | cv2.setMouseCallback(file_name, onClick) 93 | 94 | # Defaults 95 | radius = 3 96 | category = 1 97 | 98 | while True: 99 | 100 | # Key handlers 101 | k = cv2.waitKey(1) & 0xFF 102 | if k >= 48 and k <= 53: 103 | category = k - 48 104 | elif k == ord('e'): 105 | category = 0 106 | elif k == ord('q'): 107 | radius = min(radius + 2, 16) 108 | elif k == ord('a'): 109 | radius = max(radius - 2, 1) 110 | elif k == ord('x'): 111 | show_original = not show_original 112 | elif k == 32: 113 | if radius < 10: 114 | radius = 16 115 | else: 116 | radius = 1 117 | elif k == 13 or k == 10 or k == 141: 118 | break 119 | elif k == 27: 120 | exit(0) 121 | 122 | # Compositing 123 | alpha = 0.12 124 | face_canvas = face.copy() 125 | 126 | if not show_original: 127 | face_canvas[labels != 0] = face_canvas[labels != 0] * alpha + labels[labels != 0] * (1-alpha) 128 | cv2.circle(face_canvas, pointer, radius, label_colors[category], -1) 129 | 130 | vis = np.concatenate((face_canvas, labels), axis=1) 131 | vis = cv2.resize(vis, (vis.shape[1] * resize, vis.shape[0] * resize), interpolation = cv2.INTER_NEAREST) 132 | 133 | # Info 134 | font_size = 0.6 135 | font_thickness = 2 136 | hstep = 25 137 | info = "Label (0-5,e): " 138 | cv2.putText(vis, info, (10, hstep * 1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 139 | info = " " + label_names[category] 140 | cv2.putText(vis, info, (10, hstep * 1), cv2.FONT_HERSHEY_SIMPLEX, font_size, label_colors[category], font_thickness) 141 | info = "Stroke (q-a,space): " + str(radius) 142 | cv2.putText(vis, info, (10, hstep * 2), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 143 | info = "Show Original (x)" 144 | cv2.putText(vis, info, (10, hstep * 3), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 145 | info = "Save and give me more (enter)" 146 | cv2.putText(vis, info, (10, hstep * 4), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 147 | info = "Exit (esc)" 148 | cv2.putText(vis, info, (10, hstep * 5), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 149 | 150 | cv2.imshow(file_name, vis) 151 | 152 | cv2.destroyWindow(file_name) 153 | 154 | # Save output 155 | overwrite_file = os.path.join(labels_folder, labels_file) 156 | cv2.imwrite(overwrite_file, labels) 157 | print("Labels saved in " + overwrite_file) 158 | 159 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /run_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import sys 3 | import math, time 4 | import argparse 5 | from elfw import * 6 | from utils import * 7 | from trainer import TrainVal 8 | 9 | # Rafael Redondo, Jaume Gibert - Eurecat (c) 2019 10 | # ---------------------------------------------------------------------- 11 | 12 | # ---------------------------------------------------------------------- 13 | # Arguments 14 | 15 | ap = argparse.ArgumentParser(prog="trainer_elfw.py") 16 | 17 | ap.add_argument("-Vs", 18 | "--validation_set_id", 19 | type=int, 20 | help="Id of the validation set", # check elfw.py for details 21 | default=0) 22 | 23 | ap.add_argument("-S", 24 | "--synthetic_augmentation_rate", 25 | type=str, 26 | help="Rate for synthetic augmentation.", 27 | default=0) 28 | 29 | ap.add_argument("-St", 30 | "--synthetic_augmentation_types", 31 | type=str, 32 | help="Configuration of which synthetic objects are used: input must be a comma-separated string of integers such as 0,1,2", 33 | default=None) 34 | 35 | ap.add_argument("-e", 36 | "--excluded_classes", 37 | type=str, 38 | help="List of classes that won't be used for training nor validation: input must be a comma-separated string of integers such as 0,1,2", 39 | default=None) 40 | 41 | ap.add_argument("-M", 42 | "--model", 43 | type=str, 44 | help="Segmentation model: fcn, gcn or deeplab.", 45 | default="fcn") 46 | 47 | ap.add_argument("-bs", 48 | "--batch_size", 49 | type=str, 50 | help="The batch size", 51 | default=1) 52 | 53 | ap.add_argument("-K", 54 | "--checkpoints_path", 55 | type=str, 56 | help="Path to store the checkpoints", 57 | default="/media/hd/elfw/checkpoints") 58 | 59 | ap.add_argument("-R", 60 | "--resume_checkpoint", 61 | type=str, 62 | help="Resumes training at this checkpoint", 63 | default=None) 64 | 65 | args = vars(ap.parse_args()) 66 | 67 | # ------------------------------------------------------------------------- 68 | # CUDA availability 69 | 70 | if not torch.cuda.is_available(): 71 | print("Error: CUDA not available") 72 | exit(0) 73 | 74 | # ------------------------------------------------------------------------- 75 | # Command line arguments 76 | 77 | batch_size = int(args['batch_size']) 78 | Vs = args['validation_set_id'] # Index of the validation set file, see elfw.py. 79 | K = args['checkpoints_path'] 80 | M = args['model'] 81 | R = args['resume_checkpoint'] 82 | S = float(args['synthetic_augmentation_rate']) 83 | 84 | St = args['synthetic_augmentation_types'] 85 | St = St if not St else list(map(int, St.split(','))) 86 | 87 | if S and not St: 88 | sys.exit("check your parameters: if the augmentation ratio (-S) is positive, there should be at least one augmentation type (-St)") 89 | if S==0 and St: 90 | sys.exit("check your parameters: if the augmentation ratio (-S) is zero, you should specify the augmentation types (-St)") 91 | 92 | e = args['excluded_classes'] 93 | e = e if not e else list(map(int, e.split(','))) 94 | 95 | # ------------------------------------------------------------------------- 96 | # Some other hyperparameters 97 | 98 | gcn_levels = 3 # Number of GCN levels, typically 3 for 256x256 and 4 for 512x512 image sizes 99 | max_epochs = 301 # Maximum number of epochs 100 | lr = 1E-3 # Learning rate 101 | lr_decay = 0.2 # Learning rate decay factor 102 | w_decay = 5E-4 # Weight decay, typically [5e-4] 103 | momentum = 0.99 # Momentum, typically [0.9-0.99] 104 | lr_milestones = [35,90,180] # lr milestones for a multistep lr scheduler 105 | augment = True # random transformations for data augmentation 106 | 107 | # ------------------------------------------------------------------------- 108 | # Train and Validation data sets and data loaders 109 | 110 | ELFW_train = ELFWDataSet(split="train", 111 | valset=Vs, 112 | random_transform=augment, 113 | synth_augmen_types=St, 114 | synth_augmen_ratio=S, 115 | compute_class_weights=True, 116 | excluded_classes=e) 117 | 118 | trainLoader = data.DataLoader(ELFW_train, 119 | batch_size=batch_size, 120 | num_workers=16, 121 | shuffle=True, 122 | pin_memory=True) 123 | 124 | # The VALIDATION dataset and the corresponding data loader 125 | ELFW_validation = ELFWDataSet(split="validation", 126 | valset=Vs, 127 | excluded_classes=e) 128 | 129 | valLoader = data.DataLoader(ELFW_validation, 130 | batch_size=batch_size, 131 | num_workers=16, 132 | shuffle=False, 133 | pin_memory=True) 134 | 135 | start_time = time.time() 136 | 137 | TrainVal(trainLoader, 138 | valLoader, 139 | max_epochs, 140 | lr, 141 | lr_decay, 142 | lr_milestones, 143 | w_decay, 144 | momentum, 145 | augment, 146 | S, 147 | K, 148 | R, 149 | M, 150 | gcn_levels) 151 | 152 | elapsed_time = time.time() - start_time 153 | hours = int(math.floor(elapsed_time / 3600)) 154 | minutes = int(math.floor(elapsed_time / 60 - hours * 60)) 155 | seconds = int(math.floor(elapsed_time - hours * 3600 - minutes * 60)) 156 | print('Training finished in \033[1m' + str(hours) + 'h ' + str(minutes) + 'm ' + str(seconds) + 's\033[0m') 157 | 158 | print("\n") 159 | -------------------------------------------------------------------------------- /scripts/elfw-plotResults.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.patches import Polygon 5 | # from matplotlib.ticker import MaxNLocator 6 | # Rafael Redondo (c) Eurecat 2020 7 | 8 | colors = np.array([ 9 | [[247, 197, 188],[242, 151, 136],[242, 120, 99],[237, 85, 59]], 10 | [[255, 242, 196], [247, 232, 176], [250, 225, 135], [246, 213, 93]], 11 | [[180, 224, 220],[123, 209, 201],[83, 194, 183],[60, 174, 163]], 12 | [[159, 189, 214],[118, 160, 194],[72, 125, 168],[32, 99, 155]] 13 | ]) / 255.0 14 | 15 | class_colors = np.array([ 16 | [[184, 184, 184],[125, 125, 125],[71, 71, 71], [000, 000, 000]], 17 | [[196, 255, 196],[178, 255, 178],[126, 255, 126],[000, 255, 000]], 18 | [[252, 189, 189],[255, 133, 133],[255, 77, 77], [255, 000, 000]], 19 | [[207, 255, 255],[176, 255, 245],[144, 255, 255],[000, 255, 255]], 20 | [[212, 212, 255],[149, 149, 255],[94, 94, 255],[000, 000, 255]], 21 | [[255, 209, 255],[255, 156, 255],[255, 101, 255],[255, 000, 255]] 22 | ]) / 255.0 23 | 24 | 25 | def draw(row, axis, data, method, labels, colors, xlim, hsep, radius): 26 | 27 | augment = data.shape[0] 28 | 29 | labels = list(labels) 30 | num_elements = len(labels) 31 | # labels.insert(0,'') 32 | 33 | titles = ['Sunglasses Augmentation', 'Hands Augmentation', 'Sunglasses+Hands Aug.'] 34 | 35 | for c in range(len(axis[row])): 36 | 37 | axis[row,c].set_aspect(1) 38 | axis[row,c].set_xlim(-1, xlim) 39 | # axis[row,c].set_ylim(-0.4, num_elements*0.5-0.1) 40 | axis[row, c].set_ylim(-0.4, num_elements * hsep - 0.1) 41 | 42 | axis[row, c].set_yticklabels(labels) 43 | axis[row, c].set_yticks(np.arange(num_elements) * hsep) 44 | # axis[row, c].yaxis.set_major_locator(MaxNLocator(integer=True)) 45 | if c == 0: 46 | axis[row, c].set_ylabel(method, fontsize=fontsizeLarge) 47 | else: 48 | axis[row, c].get_yaxis().set_visible(False) 49 | if row == 0: 50 | axis[row, c].set_title(titles[c], fontsize=fontsizeMedium) 51 | 52 | if row == axis.shape[0] - 1: 53 | axis[row, c].set_xlabel('Gain', fontsize=fontsizeMedium) 54 | else: 55 | axis[row, c].get_xaxis().set_visible(False) 56 | 57 | axis[row, c].add_patch(Polygon([[-1, -1], [0, -1], [0, 10], [-1, 10]], closed=True, fill=True, facecolor=[0.92,0.9,0.9])) 58 | 59 | for m in range(num_elements): 60 | for s in reversed(range(augment)): 61 | 62 | sigma = s / float(augment-1) 63 | r = math.sqrt(1 + sigma) * radius 64 | t = m + c * num_elements 65 | circle = plt.Circle((data[s,t] - data[0,t], m * hsep), r, color=colors[m,augment-s-1], edgecolor=None) 66 | axis[row,c].add_artist(circle) 67 | 68 | # ---------------------------------------------------------------------------------------------- 69 | 70 | 71 | fcn = np.array([ 72 | [94.86221, 89.94708, 78.54365, 90.62491, 94.86221, 89.94708, 78.54365, 90.62491, 94.86221, 89.94708, 78.54365, 90.62491], 73 | [94.87768, 89.34935, 78.5738, 90.65072, 94.91543, 90.04007, 78.89132, 90.71592, 94.87198, 90.01351, 79.12107, 90.64796], 74 | [94.82212, 89.07311, 78.57936, 90.55048, 95.01015, 89.30039, 79.2808, 90.85555, 94.92132, 89.81723, 79.51949, 90.71459], 75 | [94.91106, 88.96342, 79.1459, 90.67938, 94.94046, 89.36422, 79.07776, 90.75119, 94.9023, 90.05563, 79.41762, 90.69509] 76 | ]) 77 | 78 | fcn_classes = np.array([ 79 | [94.75722, 86.38252, 71.85863, 61.34205, 72.44731, 84.47418, 94.75722, 86.38252, 71.85863, 61.34205, 72.44731, 84.47418, 94.75722, 86.38252, 71.85863, 61.34205, 72.44731, 84.47418], 80 | [94.74529, 86.52661, 71.92953, 60.08587, 73.81507, 84.34044, 94.78213, 86.77009, 71.78261, 61.24385, 74.51008, 84.25919, 94.74461, 86.72296, 71.41357, 63.01671, 74.86192, 83.96667], 81 | [94.70561, 86.4855, 71.23186, 60.26997, 74.76319, 84.02004, 94.91729, 86.89263, 72.04419, 62.55301, 75.33231, 83.94535, 94.789, 86.70316, 71.52878, 63.19029, 75.79578, 85.10994], 82 | [94.74795, 86.68139, 71.68878, 62.33461, 74.78171, 84.64096, 94.79637, 86.70211, 71.9773, 61.76077, 73.98104, 85.249, 94.72738, 86.72993, 71.767, 62.76649, 75.47534, 85.03957] 83 | 84 | ]) 85 | 86 | deeplab = np.array([ 87 | [94.6848, 89.71417, 77.94909, 90.37054, 94.6848, 89.71417, 77.94909, 90.37054, 94.6848, 89.71417, 77.94909, 90.37054], 88 | [94.78537, 89.59541, 78.56921, 90.51187, 94.86725, 89.7494, 78.62243, 90.63049, 94.81017, 89.91979, 78.41131, 90.55676], 89 | [94.82899, 90.35099, 79.05202, 90.57593, 94.86047, 90.18027, 78.72145, 90.63608, 94.90303, 90.12334, 79.14438, 90.70572], 90 | [94.89329, 90.06537, 79.38813, 90.67735, 94.9435, 90.07861, 78.87746, 90.75484, 94.89794, 90.37945, 79.37854, 90.70328] 91 | ]) 92 | 93 | deeplab_classes = np.array([ 94 | [94.51156, 86.31067, 71.33108, 60.57315, 71.34837, 83.61973, 94.51156, 86.31067, 71.33108, 60.57315, 71.34837, 83.61973, 94.51156, 86.31067, 71.33108, 60.57315, 71.34837, 83.61973], 95 | [94.63511, 86.41761, 71.65853, 61.14348, 74.20494, 83.35558, 94.77132, 86.44033, 71.84831, 62.07319, 72.95129, 83.65015, 94.66345, 86.50839, 71.66187, 59.54201, 74.23029, 83.86187], 96 | [94.66677, 86.3675, 71.90078, 61.5994, 75.30071, 84.47695, 94.73285, 86.59962, 71.78432, 62.6076, 72.75062, 83.8537, 94.75528, 86.66354, 71.85329, 61.44883, 74.97043, 85.17492], 97 | [94.75281, 86.6263, 71.72533, 63.26911, 75.43251, 84.52274, 94.81422, 86.6394, 72.28983, 61.98376, 73.2231, 84.31447, 94.72211, 86.83926, 71.83235, 63.31498, 75.25782, 84.30474] 98 | ]) 99 | 100 | 101 | fontsizeSmall = 12 102 | fontsizeMedium = 16 103 | fontsizeLarge = 18 104 | 105 | font = {'family':'normal', 'weight':'normal', 'size': fontsizeSmall} 106 | plt.rc('font', **font) 107 | 108 | metrics_fig, metrics_axis = plt.subplots(2, 3, sharey=True, sharex=True) 109 | draw(0, metrics_axis, fcn, 'FCN', ('Pixel Acc.', 'Mean Acc.', 'Mean IU', 'Freq.W. IU'), colors, 1.8, 0.4, 0.1) 110 | draw(1, metrics_axis, deeplab, 'DeepLabV3', ('Pixel Acc.', 'Mean Acc.', 'Mean IU', 'Freq.W. IU'), colors, 1.8, 0.4, 0.1) 111 | 112 | # class_fig, class_axis = plt.subplots(2, 3, sharey=True) 113 | # draw(0, class_axis, fcn_classes, 'FCN', ('Bkgnd', 'Skin', 'Hair', 'Beard', 'Snglss', 'Wear'), class_colors, 4.5, 0.5, 0.15) 114 | # draw(1, class_axis, deeplab_classes,'DeepLabV3',('Bkgnd', 'Skin', 'Hair', 'Beard', 'Snglss', 'Wear'), class_colors, 4.5, 0.5, 0.15) 115 | 116 | plt.show() -------------------------------------------------------------------------------- /demos/EUT_ELFW_webcam_jupyter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Faces in the Wild with GCN Semantic Segmentation\n", 8 | "This code snippet runs the gcn semantic segmentation network over the webcam.\n", 9 | "\n", 10 | "Play with:\n", 11 | "\n", 12 | "1. A hat, a cap, or any wearable on top of your head.\n", 13 | "2. Take some sunglasses on.\n", 14 | "3. Try with moustache or beard.\n", 15 | "4. Put a mouth-mask on (or simulate it with a napkin).\n", 16 | "\n", 17 | "*Eurecat 2019 - Rafael Redondo*" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 118, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "Stream stopped\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "# Make sure your network has been trained with this architectural parameters\n", 35 | "target_size = 256\n", 36 | "num_classes = 7\n", 37 | "num_levels = 3\n", 38 | "\n", 39 | "# Download from https://eurecatcloud.sharepoint.com/:u:/r/sites/audiovisualteam/Shared%20Documents/ELOF/FacesInTheWild/Checkpoints/gcn-dataset_3-levels_3-lr_0.0001-lrdecay_0.1-lrpatience_10-wdecay_0.0005-momentum_0.99-dataaugmen/gcn-epoch_0480.pth?csf=1&e=vjEPyY\n", 40 | "checkpoint = \"./data/gcn-epoch_0480.pth\"\n", 41 | "\n", 42 | "# Colorize your labeled classes\n", 43 | "label_colors = [\n", 44 | " (0, 0, 0),\n", 45 | " (0, 255, 0),\n", 46 | " (0, 0, 255),\n", 47 | " (255, 255, 0),\n", 48 | " (255, 0, 0),\n", 49 | " (255, 0, 255),\n", 50 | " (0, 255, 255)]\n", 51 | "\n", 52 | "# Model loading (Resnet may take a while to download)\n", 53 | "import torch\n", 54 | "from torch.autograd import Variable\n", 55 | "from torchvision.transforms import ToTensor, Normalize\n", 56 | "import torch.nn.functional as F\n", 57 | "import sys\n", 58 | "sys.path.insert(1,'../pytorch-segmentation')\n", 59 | "from models import GCN\n", 60 | "\n", 61 | "model = torch.nn.DataParallel(GCN(num_classes, num_levels))\n", 62 | "model.load_state_dict(torch.load(checkpoint))\n", 63 | "model.cuda()\n", 64 | "model.eval()\n", 65 | ";\n", 66 | "\n", 67 | "from PIL import Image\n", 68 | "import numpy as np\n", 69 | "import cv2\n", 70 | "from io import BytesIO as StringIO\n", 71 | "from IPython import display\n", 72 | "\n", 73 | "def showFrame(a, fmt='jpeg'):\n", 74 | " f = StringIO()\n", 75 | " Image.fromarray(a).save(f, fmt)\n", 76 | " display.display(display.Image(data=f.getvalue()))\n", 77 | " \n", 78 | "vc = cv2.VideoCapture(0)\n", 79 | "\n", 80 | "if not vc.isOpened():\n", 81 | " \n", 82 | " print(\"Error opening webcam\")\n", 83 | " \n", 84 | "else:\n", 85 | " try:\n", 86 | " while True:\n", 87 | "\n", 88 | " capturing, image = vc.read()\n", 89 | " \n", 90 | " if not capturing:\n", 91 | " break\n", 92 | " \n", 93 | " offset = int((image.shape[1] - image.shape[0]) * 0.5)\n", 94 | " image = image[:, offset:offset + image.shape[0]]\n", 95 | " image = np.flip(image,1)\n", 96 | " image = Image.fromarray(np.uint8(image))\n", 97 | " image = image.resize((target_size, target_size), Image.BILINEAR)\n", 98 | "\n", 99 | " # Pass forward\n", 100 | " img = ToTensor()(image)\n", 101 | " img = Normalize([.485, .456, .406], [.229, .224, .225])(img)\n", 102 | " img = Variable(img).cuda().unsqueeze(0)\n", 103 | " scores = model(img) # first image in batch\n", 104 | " label_probs = F.log_softmax(scores[0], dim=0).cpu().detach().numpy()\n", 105 | "\n", 106 | " # Composite\n", 107 | " segments = np.zeros((target_size, target_size, 3))\n", 108 | " labels = np.argmax(label_probs, axis=0)\n", 109 | " composite = np.array(image)\n", 110 | " for l in range(len(label_probs)):\n", 111 | " indexes = labels == l\n", 112 | " for c in range(3):\n", 113 | " segments[:, :, c][indexes] = label_colors[l][c] \n", 114 | " #composite[:,:,c][indexes] = (1-label_probs[l][indexes]) * composite[:,:,c][indexes] + \\\n", 115 | " # (a * composite[:,:,c][indexes] + (1-a) * label_colors[l][c]) * label_probs[l][indexes]\n", 116 | " \n", 117 | "\n", 118 | " a = 0.6 # the smaller the more intense the blending is (more greenish)\n", 119 | " image = np.array(image)\n", 120 | " composite = segments * (1-a) + image * a\n", 121 | " for c in range(3):\n", 122 | " indexes = labels == 0\n", 123 | " composite[:,:,c][indexes] = image[:,:,c][indexes]\n", 124 | " \n", 125 | " composite = np.flip(np.array(composite),2)\n", 126 | " showFrame(composite.astype('uint8'))\n", 127 | " display.clear_output(wait=True)\n", 128 | " \n", 129 | " except KeyboardInterrupt:\n", 130 | " vc.release()\n", 131 | " print \"Stream stopped\"" 132 | ] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 2", 138 | "language": "python", 139 | "name": "python2" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 2 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython2", 151 | "version": "2.7.16" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 2 156 | } -------------------------------------------------------------------------------- /scripts/headPose.py: -------------------------------------------------------------------------------- 1 | # This code runs dlib-based head pose estimation. 2 | # 3 | # Modified from: KwanHua Lee (lincolnhard) Taiwan, lincolnhardabc@gmail.com 4 | # By: R. Redondo (c) Eurecat 2019 5 | 6 | import math 7 | import cv2 8 | import numpy as np 9 | import dlib 10 | from imutils import face_utils 11 | 12 | K = [6.5308391993466671e+002, 0.0, 3.1950000000000000e+002, 13 | 0.0, 6.5308391993466671e+002, 2.3950000000000000e+002, 14 | 0.0, 0.0, 1.0] 15 | D = [7.0834633684407095e-002, 6.9140193737175351e-002, 0.0, 0.0, -1.3073460323689292e+000] 16 | 17 | cam_matrix = np.array(K).reshape(3, 3).astype(np.float32) 18 | dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32) 19 | 20 | object_pts = np.float32([[6.825897, 6.760612, 4.402142], 21 | [1.330353, 7.122144, 6.903745], 22 | [-1.330353, 7.122144, 6.903745], 23 | [-6.825897, 6.760612, 4.402142], 24 | [5.311432, 5.485328, 3.987654], 25 | [1.789930, 5.393625, 4.413414], 26 | [-1.789930, 5.393625, 4.413414], 27 | [-5.311432, 5.485328, 3.987654], 28 | [2.005628, 1.409845, 6.165652], 29 | [-2.005628, 1.409845, 6.165652], 30 | [2.774015, -2.080775, 5.048531], 31 | [-2.774015, -2.080775, 5.048531], 32 | [0.000000, -3.116408, 6.097667], 33 | [0.000000, -7.415691, 4.070434]]) 34 | 35 | reprojectsrc = np.float32([[10.0, 10.0, 10.0], 36 | [10.0, 10.0, -10.0], 37 | [10.0, -10.0, -10.0], 38 | [10.0, -10.0, 10.0], 39 | [-10.0, 10.0, 10.0], 40 | [-10.0, 10.0, -10.0], 41 | [-10.0, -10.0, -10.0], 42 | [-10.0, -10.0, 10.0]]) 43 | 44 | line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], 45 | [4, 5], [5, 6], [6, 7], [7, 4], 46 | [0, 4], [1, 5], [2, 6], [3, 7]] 47 | 48 | 49 | def get_head_pose(shape): 50 | 51 | image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36], 52 | shape[39], shape[42], shape[45], shape[31], shape[35], 53 | shape[48], shape[54], shape[57], shape[8]]) 54 | 55 | _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs) 56 | 57 | reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix, dist_coeffs) 58 | 59 | reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2))) 60 | 61 | # calc euler angle 62 | rotation_mat, _ = cv2.Rodrigues(rotation_vec) 63 | pose_mat = cv2.hconcat((rotation_mat, translation_vec)) 64 | _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat) 65 | 66 | return reprojectdst, euler_angle 67 | 68 | 69 | def main(): 70 | 71 | shape_predictor = './facedetectors/dlib/shape_predictor_68_face_landmarks.dat' 72 | print('\033[1m' + 'Initiating Dlib from ' + shape_predictor + '\033[0m') 73 | dlib_detector = dlib.get_frontal_face_detector() 74 | dlib_predictor = dlib.shape_predictor(shape_predictor) 75 | 76 | cap = cv2.VideoCapture(0) 77 | 78 | frame_count = 0 79 | 80 | while True: 81 | 82 | frame_count += 1 83 | ret, frame = cap.read() 84 | 85 | if not ret or cv2.waitKey(1) & 0xFF == 27: # esc to exit 86 | cap.release() 87 | break 88 | 89 | # Face detection: requires grayscale, rect contains as many bounding boxes as faces detected 90 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 91 | face_rects = dlib_detector(gray, 1) 92 | 93 | # If no detection, then continue 94 | if not len(face_rects): 95 | cv2.imshow("Dlib head pose estimation", frame) 96 | continue 97 | 98 | # determine the facial landmarks for the face region, then 99 | # convert the facial landmark (x, y)-coordinates to a NumPy array 100 | shape = dlib_predictor(gray, face_rects[0]) 101 | shape = face_utils.shape_to_np(shape) 102 | 103 | reprojectdst, euler_angle = get_head_pose(shape) 104 | 105 | # Convert dlib's rectangle to a OpenCV-style bounding box 106 | # [i.e., (x, y, w, h)], then draw the face bounding box 107 | # (x, y, w, h) = face_utils.rect_to_bb(face_rects[0]) 108 | # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) 109 | 110 | # Draw (x,y)-landmark coordinates 111 | for (x, y) in shape: 112 | cv2.circle(frame, (x, y), 1, (0, 255, 0), -1) 113 | 114 | 115 | for start, end in line_pairs: 116 | cv2.line(frame, reprojectdst[start], reprojectdst[end], (255, 0, 0)) 117 | 118 | elevation = euler_angle[0, 0] 119 | azimuth = euler_angle[1, 0] 120 | rotation = euler_angle[2, 0] 121 | # x = math.cos(math.radians(azimuth)) * math.cos(math.radians(elevation)) 122 | # y = math.sin(math.radians(azimuth)) * math.cos(math.radians(elevation)) 123 | # z = math.sin(math.radians(elevation)) 124 | solid_angle = math.degrees(math.acos(math.cos(math.radians(azimuth)) * math.cos(math.radians(elevation)))) # simplified dot product [1,0,0] \dot [x,y,z] 125 | 126 | cv2.putText(frame, "Elevation: " + "{:7.2f}".format(-elevation), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), thickness=2) 127 | cv2.putText(frame, "Azimuth: " + "{:7.2f}".format(azimuth), (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), thickness=2) 128 | cv2.putText(frame, "Rotation: " + "{:7.2f}".format(rotation), (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), thickness=2) 129 | cv2.putText(frame, "Solid Angle: " + "{:7.2f}".format(solid_angle), (20, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), thickness=2) 130 | 131 | # Show the output image with the face detections + facial landmarks 132 | cv2.imshow("Dlib head pose estimation", frame) 133 | 134 | cv2.destroyAllWindows() 135 | print('Done!') 136 | 137 | if __name__ == '__main__': 138 | main() -------------------------------------------------------------------------------- /demos/EUT_ELFW_singleimage_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "EUT-ELFW-singleimage.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python2", 13 | "display_name": "Python 2" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "ApeuxnQx7qqC", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "# Faces in the Wild with GCN Semantic Segmentation\n", 26 | "\n", 27 | "This code snippet runs the gcn semantic segmentation network on a single image. \n", 28 | "*Eurecat 2019 - Rafael Redondo*" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "id": "FMObRUZ23T5y", 35 | "colab_type": "code", 36 | "colab": {} 37 | }, 38 | "source": [ 39 | "# Make sure your network has been trained with this architectural parameters\n", 40 | "target_size = 256\n", 41 | "num_classes = 7\n", 42 | "num_levels = 3\n", 43 | "\n", 44 | "# I/O\n", 45 | "checkpoint = \"./gcn-epoch_0480.pth\"\n", 46 | "input_file = \"./Bettina_Rheims_0001.jpg\"\n", 47 | "result_file = \"./Bettina_Rheims_0001_results.png\"\n", 48 | "\n", 49 | "# Colorize your labeled classes\n", 50 | "label_colors = [\n", 51 | " (0, 0, 0),\n", 52 | " (0, 255, 0),\n", 53 | " (255, 0, 0),\n", 54 | " (0, 255, 255),\n", 55 | " (0, 0, 255),\n", 56 | " (255, 0, 255),\n", 57 | " (255, 255, 0)]" 58 | ], 59 | "execution_count": 0, 60 | "outputs": [] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "metadata": { 65 | "id": "mcN49XD83cLD", 66 | "colab_type": "code", 67 | "outputId": "ccfe8d19-71f1-4d80-850f-8979bb70ded0", 68 | "colab": { 69 | "base_uri": "https://localhost:8080/", 70 | "height": 122 71 | } 72 | }, 73 | "source": [ 74 | "# Prepare Drive by following the instructions.\n", 75 | "from google.colab import drive\n", 76 | "drive.mount('/content/drive') " 77 | ], 78 | "execution_count": 0, 79 | "outputs": [ 80 | { 81 | "output_type": "stream", 82 | "text": [ 83 | "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n", 84 | "\n", 85 | "Enter your authorization code:\n", 86 | "··········\n", 87 | "Mounted at /content/drive\n" 88 | ], 89 | "name": "stdout" 90 | } 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "metadata": { 96 | "id": "8zBI6a_u4hDK", 97 | "colab_type": "code", 98 | "outputId": "6add5fe8-480b-44df-e073-bed07b963c18", 99 | "colab": { 100 | "base_uri": "https://localhost:8080/", 101 | "height": 34 102 | } 103 | }, 104 | "source": [ 105 | "# Go to Drive contents\n", 106 | "cd drive/My\\ Drive/ELFW" 107 | ], 108 | "execution_count": 0, 109 | "outputs": [ 110 | { 111 | "output_type": "stream", 112 | "text": [ 113 | "/content/drive/My Drive/ELFW\n" 114 | ], 115 | "name": "stdout" 116 | } 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "metadata": { 122 | "id": "g7OEhZwp3dhc", 123 | "colab_type": "code", 124 | "colab": {} 125 | }, 126 | "source": [ 127 | "# Import\n", 128 | "import torch\n", 129 | "from torch.autograd import Variable\n", 130 | "from torchvision.transforms import ToTensor, Normalize\n", 131 | "import torch.nn.functional as F\n", 132 | "from models import GCN\n", 133 | "from PIL import Image\n", 134 | "import numpy as np\n", 135 | "\n", 136 | "# Model loading (Resnet may take a while to download)\n", 137 | "model = torch.nn.DataParallel(GCN(num_classes, num_levels))\n", 138 | "model.load_state_dict(torch.load(checkpoint))\n", 139 | "model.cuda()\n", 140 | "model.eval()\n", 141 | ";" 142 | ], 143 | "execution_count": 0, 144 | "outputs": [] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "metadata": { 149 | "id": "BuyRbF3O3gKl", 150 | "colab_type": "code", 151 | "outputId": "141bf613-7284-43b7-ec19-0d62622933c5", 152 | "colab": { 153 | "base_uri": "https://localhost:8080/", 154 | "height": 51 155 | } 156 | }, 157 | "source": [ 158 | "# Pass forward\n", 159 | "image = Image.open(input_file).convert(\"RGB\")\n", 160 | "image = image.resize((target_size, target_size), Image.BILINEAR)\n", 161 | "img = ToTensor()(image)\n", 162 | "img = Normalize([.485, .456, .406], [.229, .224, .225])(img)\n", 163 | "img = Variable(img).cuda().unsqueeze(0)\n", 164 | "scores = model(img) # first image in batch\n", 165 | "label_probs = F.log_softmax(scores[0], dim=0).cpu().detach().numpy()" 166 | ], 167 | "execution_count": 0, 168 | "outputs": [ 169 | { 170 | "output_type": "stream", 171 | "text": [ 172 | "/usr/local/lib/python2.7/dist-packages/torch/nn/functional.py:2622: UserWarning: nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.\n", 173 | " warnings.warn(\"nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.\")\n" 174 | ], 175 | "name": "stderr" 176 | } 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "metadata": { 182 | "id": "TmmVcJEE3hbW", 183 | "colab_type": "code", 184 | "outputId": "5063e617-c53f-4a69-d510-a0862ab511ce", 185 | "colab": { 186 | "base_uri": "https://localhost:8080/", 187 | "height": 34 188 | } 189 | }, 190 | "source": [ 191 | "# Composite\n", 192 | "rgb = np.zeros((target_size, target_size, 3))\n", 193 | "labels = np.argmax(label_probs, axis=0)\n", 194 | "\n", 195 | "for l in range(len(label_probs)):\n", 196 | " indexes = labels == l\n", 197 | " for c in range(3):\n", 198 | " rgb[:, :, c][indexes] = label_colors[l][c]\n", 199 | "\n", 200 | "result = Image.fromarray(rgb.astype('uint8'))\n", 201 | "result.save(result_file)\n", 202 | "print('Results saved.')" 203 | ], 204 | "execution_count": 0, 205 | "outputs": [ 206 | { 207 | "output_type": "stream", 208 | "text": [ 209 | "Results saved.\n" 210 | ], 211 | "name": "stdout" 212 | } 213 | ] 214 | } 215 | ] 216 | } -------------------------------------------------------------------------------- /scripts/elfw-evaluateOnValidation.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import os 4 | #from import check_mkdir, bcolors 5 | import sys 6 | import matplotlib.pyplot as plt 7 | import fnmatch 8 | 9 | class bcolors: 10 | PURPLE = '\033[95m' 11 | BLUE = '\033[94m' 12 | GREEN = '\033[92m' 13 | YELLOW = '\033[93m' 14 | RED = '\033[91m' 15 | CYAN = '\033[96m' 16 | ENDC = '\033[0m' 17 | BOLD = '\033[1m' 18 | CYAN = '\033[96m' 19 | 20 | # I/O 21 | images_folder = "/media/jaume.gibert/Data/elfw/elfw_01_basic/faces" 22 | labels_folder = "/media/jaume.gibert/Data/elfw/elfw_01_basic/labels" 23 | predictions_folder = "/media/jaume.gibert/Data/elfw/predictions" 24 | output_folder = "/media/jaume.gibert/Data/elfw/eval_curves" 25 | 26 | # Input From File 27 | f = open("/media/jaume.gibert/Data/elfw/elfw_01_basic/elfw_set_00.txt", "r") 28 | names = [] 29 | for line in f: 30 | # for some reason it's also loading the \n at the end of each line 31 | if line[-1:]=='\n': 32 | names.append(line[:-1]) 33 | else: 34 | names.append(line) 35 | 36 | target_size = 256 37 | 38 | label_colors = [ 39 | (0, 0, 0), 40 | (0, 255, 0), 41 | (255, 0, 0), 42 | (0, 255, 255), 43 | (0, 0, 255), 44 | (255, 0, 255), 45 | (255, 255, 0)] 46 | 47 | label_names = [ 48 | "background", 49 | "skin", 50 | "hair", 51 | "beard-mustache", 52 | "sunglasses", 53 | "wearable", 54 | "mouth-mask"] 55 | 56 | num_classes = len(label_colors) 57 | 58 | def ToELFWLabels(data): 59 | 60 | data = np.array(data) 61 | 62 | r = data[:, :, 0] 63 | g = data[:, :, 1] 64 | b = data[:, :, 2] 65 | 66 | output = np.zeros((data.shape[0], data.shape[1])) 67 | for c in range(0,len(label_colors)): 68 | color_mask = (r == label_colors[c][0]) & (g == label_colors[c][1]) & (b == label_colors[c][2]) 69 | output[color_mask] = c 70 | 71 | return output 72 | 73 | 74 | def main(): 75 | 76 | # list folders in checkpoints folder 77 | for config in os.listdir(predictions_folder): 78 | 79 | print(config) 80 | path = os.path.join(predictions_folder, config) 81 | 82 | # we need to find out how many epochs have been computed for this configuration 83 | # this should not be like this, since we expect to have the same number of epochs for 84 | # each configuration, but the experiment crashed and we want to find out how many epochs 85 | # we have 86 | # HOW: take the first image of the validation set, and see which epochs are there 87 | # for each image, the epochs should be exactly the same 88 | image_name = names[0] 89 | epochs = [] 90 | for file in os.listdir(path): 91 | if fnmatch.fnmatch(file, "*"+image_name+"*"): 92 | epochs.append(int(file[-8:-4])) 93 | epochs.sort() 94 | 95 | #epochs = [0, 10] 96 | 97 | accuracy_curves = np.zeros((num_classes, len(epochs))) 98 | precision_curves = np.zeros((num_classes, len(epochs))) 99 | recall_curves = np.zeros((num_classes, len(epochs))) 100 | fscore_curves = np.zeros((num_classes, len(epochs))) 101 | 102 | 103 | for idx_ep, ep in enumerate(epochs): 104 | 105 | #print(" > Epoch " + str(ep)) 106 | 107 | count = np.zeros((num_classes, 1)) 108 | 109 | for image_name in names: 110 | 111 | # Read the labels 112 | label_name_path = os.path.join(labels_folder, image_name + ".png") 113 | label = Image.open(label_name_path).convert("RGB") 114 | label = label.resize((target_size, target_size), Image.NEAREST) 115 | label = ToELFWLabels(label) 116 | 117 | prediction_path = os.path.join(path, image_name+"_fcn-epoch_"+str(ep).zfill(4)+".png") 118 | prediction = Image.open(prediction_path).convert("RGB") 119 | prediction = prediction.resize((target_size, target_size), Image.NEAREST) # this should not be necessary since predictions are already in target_size size 120 | prediction = ToELFWLabels(prediction) 121 | 122 | for c in range(0, num_classes): 123 | 124 | A = (prediction == c) 125 | B = (label == c) 126 | C = (prediction != c) 127 | D = (label != c) 128 | 129 | # True Positive (TP): we predict a label of class cl (positive), and the true label is cl. 130 | TP_mask = np.logical_and(A, B) 131 | TP = np.sum(TP_mask) 132 | # True Negative (TN): we predict a label that it's not 0 (negative), and the true label is not cl. 133 | TN_mask = np.logical_and(C, D) 134 | TN = np.sum(TN_mask) 135 | # False Positive (FP): we predict a label of class cl (positive), but the true label is not cl. 136 | FP_mask = np.logical_and(A, D) 137 | FP = np.sum(FP_mask) 138 | # False Negative (FN): we predict a label that it's not cl (negative), but the true label is cl. 139 | FN_mask = np.logical_and(C, B) 140 | FN = np.sum(FN_mask) 141 | 142 | if np.sum(np.logical_or(A, B)): 143 | 144 | Accuracy = (TP+TN) / (TP+TN+FP+FN+1E-8) 145 | Precision = (TP) / (TP+FP+1E-8) 146 | Recall = (TP) / (TP+FN+1E-8) 147 | F_Score = (2*Precision*Recall) / (Precision+Recall+1E-8) 148 | 149 | accuracy_curves [c, idx_ep] += Accuracy 150 | precision_curves[c, idx_ep] += Precision 151 | recall_curves [c, idx_ep] += Recall 152 | fscore_curves [c, idx_ep] += F_Score 153 | 154 | count[c] += 1 155 | 156 | for c in range(num_classes): 157 | if count[c]: 158 | accuracy_curves[c, idx_ep] /= float(count[c]) 159 | precision_curves[c, idx_ep] /= float(count[c]) 160 | recall_curves[c, idx_ep] /= float(count[c]) 161 | fscore_curves[c, idx_ep] /= float(count[c]) 162 | 163 | 164 | output_path = os.path.join(output_folder, config) 165 | if not os.path.exists(output_path): 166 | os.mkdir(output_path) 167 | 168 | # Accuracy 169 | fig = plt.figure() 170 | for c in range(num_classes): 171 | plt.plot(epochs, accuracy_curves[c, :], label=label_names[c]) 172 | plt.title('Accuracy') 173 | plt.legend() 174 | plt.grid(axis='y') 175 | plt.savefig(os.path.join(output_path, '00-Accuracy.png')) 176 | 177 | # Precision 178 | fig = plt.figure() 179 | for c in range(num_classes): 180 | plt.plot(epochs, precision_curves[c, :], label=label_names[c]) 181 | plt.title('Precision') 182 | plt.legend() 183 | plt.grid(axis='y') 184 | plt.savefig(os.path.join(output_path, '01-Precision.png')) 185 | 186 | # Recall 187 | fig = plt.figure() 188 | for c in range(num_classes): 189 | plt.plot(epochs, recall_curves[c, :], label=label_names[c]) 190 | plt.title('Recall') 191 | plt.legend() 192 | plt.grid(axis='y') 193 | plt.savefig(os.path.join(output_path, '02-Recall.png')) 194 | 195 | # F-Score 196 | fig = plt.figure() 197 | for c in range(num_classes): 198 | plt.plot(epochs, fscore_curves[c, :], label=label_names[c]) 199 | plt.title('F-Score') 200 | plt.legend() 201 | plt.grid(axis='y') 202 | plt.savefig(os.path.join(output_path, '03-F_Score.png')) 203 | 204 | 205 | # Display max values of f-score 206 | print("") 207 | for c in range(num_classes-1): 208 | curve = fscore_curves[c,:] 209 | idx = np.argmax(curve) 210 | if c==1 or c==2: 211 | print(bcolors.BOLD + label_names[c] + bcolors.ENDC + "\t\tMax F-Score: "+ bcolors.BLUE+str(curve[idx])+ bcolors.ENDC + " at epoch " + str(epochs[idx])) 212 | else: 213 | print(bcolors.BOLD + label_names[c] + bcolors.ENDC + "\tMax F-Score: "+ bcolors.BLUE+str(curve[idx])+ bcolors.ENDC + " at epoch " + str(epochs[idx])) 214 | print("") 215 | 216 | 217 | if __name__ == '__main__': 218 | main() 219 | -------------------------------------------------------------------------------- /scripts/elfw-scribbleMe.py: -------------------------------------------------------------------------------- 1 | # This code fills superpixels by scribbling over the image with a given labeled color. 2 | # It requires all jpg faces storaged in the same folder and the .dat super-pixels in the same LFW format. 3 | # R. Redondo, Eurecat 2019 (c). 4 | 5 | import numpy as np 6 | import operator 7 | import cv2 8 | import os 9 | import sys 10 | 11 | resize = 3 12 | pointer = (-1,-1) 13 | super_scribbles = [] 14 | isDrawing = False 15 | radius = 10 16 | category = 1 17 | 18 | label_colors = [ 19 | ( 0, 0, 0), 20 | ( 0,255, 0), 21 | ( 0, 0,255), 22 | (255,255, 0), 23 | (255, 0, 0), 24 | (255, 0,255)] 25 | 26 | label_names = [ 27 | "eraser", 28 | "skin", 29 | "hair", 30 | "beard-mustache", 31 | "sunglasses", 32 | "wearable"] 33 | 34 | def onClick(event,x,y,flags,param): 35 | 36 | global isDrawing, mode, radius, category, super_scribbles, pointer 37 | 38 | pointer = (int(x/resize), int(y/resize)) 39 | 40 | if event == cv2.EVENT_LBUTTONDOWN: 41 | 42 | isDrawing = True 43 | 44 | elif event == cv2.EVENT_LBUTTONUP: 45 | 46 | isDrawing = False 47 | 48 | # Scribbles to SP elections 49 | super_scribbles = np.zeros(scribbles.shape) 50 | sp_votes = {} 51 | sp_areas = np.zeros(index+1) 52 | h, w = sp_reindex.shape 53 | 54 | for y in range(0, h): 55 | for x in range(0, w): 56 | 57 | s = sp_reindex[y, x] 58 | sp_areas[s] = sp_areas[s] + 1 59 | 60 | vote_rgb = scribbles[y,x] 61 | if vote_rgb.any(): 62 | if s not in sp_votes: 63 | sp_votes[s] = {} 64 | 65 | vote_rgb = tuple(vote_rgb) 66 | if vote_rgb in sp_votes[s]: 67 | sp_votes[s][vote_rgb] = sp_votes[s][vote_rgb] + 1 68 | else: 69 | sp_votes[s][vote_rgb] = 1 70 | 71 | 72 | for s in sp_votes.keys(): 73 | winner, votes = max(sp_votes[s].items(), key=operator.itemgetter(1)) 74 | super_scribbles[sp_reindex == s] = np.array(winner)# (0,255,0) 75 | 76 | if isDrawing and (event == cv2.EVENT_LBUTTONDOWN or event == cv2.EVENT_MOUSEMOVE): 77 | 78 | cv2.circle(scribbles, pointer, radius, label_colors[category], -1) 79 | 80 | # --------------------------------------------------------------------------------------- 81 | 82 | if len(sys.argv) != 4: 83 | print("Usage: $ elfw-scribbleMe ") 84 | exit(0) 85 | 86 | faces_folder = sys.argv[1] 87 | sp_folder = sys.argv[2] 88 | output_folder = sys.argv[3] 89 | 90 | if not os.path.exists(output_folder): 91 | os.mkdir(output_folder) 92 | 93 | # faces_folder = '../Datasets/lfw-deepfunneled/' 94 | # sp_folder = '../Datasets/lfw-deepfunneled-sp/' 95 | # output_folder = '../Datasets/lfw-deepfunneled-sp-overlay/' 96 | 97 | for face_file in sorted(os.listdir(faces_folder)): 98 | 99 | if not face_file.endswith(".jpg"): 100 | continue 101 | 102 | file_name = os.path.splitext(face_file)[0] 103 | super_scribbles_file = os.path.join(output_folder, file_name + '.png') 104 | if os.path.exists(super_scribbles_file): 105 | continue 106 | 107 | face = cv2.imread(os.path.join(faces_folder, face_file)) 108 | person_name = file_name[:-5] 109 | sp_file = os.path.join(os.path.join(sp_folder, person_name), file_name + '.dat') 110 | 111 | if not os.path.exists( sp_file ): 112 | print('\033[1m' + 'Superpixels not found in ' + sp_file + '\033[0m') 113 | exit(0) 114 | 115 | print('Editing ' + '\033[1m' + file_name + '\033[0m' + "...") 116 | 117 | # Superpixels: watch out, SP do not have univoque numbering 118 | sp = np.fromfile(sp_file, dtype=int, count=-1, sep=' ') 119 | sp = np.array(sp, dtype=np.uint8) 120 | sp = np.reshape(sp, (250, -1)) 121 | h, w = sp.shape 122 | 123 | # Superpixels bounds 124 | bounds = np.zeros(sp.shape) 125 | for y in range(0, h): 126 | for x in range(0, w): 127 | if y > 0: 128 | if sp[y, x] != sp[y-1, x ]: 129 | bounds[y,x] = 255; 130 | continue 131 | if y < h-1: 132 | if sp[y, x] != sp[y+1, x ]: 133 | bounds[y,x] = 255; 134 | continue 135 | if y < h-1 and x > 0: 136 | if sp[y, x] != sp[y+1, x-1]: 137 | bounds[y,x] = 255; 138 | continue 139 | if y < h-1 and x < w-1: 140 | if sp[y, x] != sp[y+1, x+1]: 141 | bounds[y,x] = 255; 142 | continue 143 | if y > 0 and x > 0: 144 | if sp[y, x] != sp[y-1, x-1]: 145 | bounds[y,x] = 255; 146 | continue 147 | if y > 0 and x < w-1: 148 | if sp[y, x] != sp[y-1, x+1]: 149 | bounds[y,x] = 255; 150 | continue 151 | if x > 0: 152 | if sp[y, x] != sp[y , x-1]: 153 | bounds[y,x] = 255; 154 | continue 155 | if x < w-1: 156 | if sp[y, x] != sp[y , x+1]: 157 | bounds[y,x] = 255; 158 | continue 159 | 160 | # Erode 161 | kernel = np.ones((2,2),np.uint8) 162 | bounds = cv2.erode(bounds, kernel, iterations = 1) 163 | 164 | # Boundaries visualization 165 | b,g,r = cv2.split(face) 166 | r[bounds > 0] = r[bounds > 0] * 0.2 + 255 * 0.8; 167 | bounds = cv2.merge((b,g,r)) 168 | 169 | ## SP re-indexing: there could be several superpixels for each SP index label 170 | index = 0 171 | sp_reindex = np.zeros(sp.shape, dtype='uint32') 172 | for s in range(0,np.amax(sp)+1): 173 | mask = np.zeros(sp.shape, dtype='uint8') 174 | mask[sp == s] = 255 175 | _, components = cv2.connectedComponents(mask, connectivity=4) 176 | 177 | if np.amax(components): 178 | for c in range(1,np.amax(components)+1): 179 | index = index + 1 180 | sp_reindex[components == c] = index 181 | 182 | # Scribbles 183 | scribbles = np.zeros(face.shape) 184 | super_scribbles = scribbles.copy() 185 | face_canvas = face.copy() 186 | 187 | # Mouse events callback 188 | cv2.namedWindow(file_name) 189 | cv2.setMouseCallback(file_name, onClick) 190 | 191 | # Defaults 192 | radius = 2 193 | category = 1 194 | 195 | while True: 196 | 197 | # Key handlers 198 | k = cv2.waitKey(1) & 0xFF 199 | if k >= 48 and k <= 53: 200 | category = k - 48 201 | elif k == ord('e'): 202 | category = 0 203 | elif k == ord('q'): 204 | radius = min(radius + 2, 16) 205 | elif k == ord('a'): 206 | radius = max(radius - 2, 2) 207 | elif k == 32: 208 | if radius < 10: 209 | radius = 16 210 | else: 211 | radius = 2 212 | elif k == 13: 213 | break 214 | elif k == 27: 215 | exit(0) 216 | 217 | # Compositing 218 | alpha = 0.12 219 | face_canvas = face.copy() 220 | face_canvas[super_scribbles != 0] = face_canvas[super_scribbles != 0] * alpha + super_scribbles[super_scribbles != 0] * (1-alpha) 221 | 222 | alpha = 0.12 223 | bounds_canvas = bounds.copy() 224 | bounds_canvas[scribbles != 0] = bounds_canvas[scribbles != 0] * alpha + scribbles[scribbles != 0] * (1-alpha) 225 | 226 | alpha = 0.5 227 | overlay = bounds_canvas.copy() 228 | cv2.circle(overlay, pointer, radius, label_colors[category], -1) 229 | bounds_canvas = cv2.addWeighted(bounds_canvas, alpha, overlay, 1 - alpha, 0) 230 | 231 | vis = np.concatenate((bounds_canvas, face_canvas), axis=1) 232 | vis = cv2.resize(vis, (vis.shape[1] * resize, vis.shape[0] * resize), cv2.INTER_NEAREST) 233 | 234 | # Info 235 | font_size = 0.6 236 | font_thickness = 2 237 | hstep = 25 238 | info = "Label (0-5,e): " 239 | cv2.putText(vis, info, (10, hstep * 1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 240 | info = " " + label_names[category] 241 | cv2.putText(vis, info, (10, hstep * 1), cv2.FONT_HERSHEY_SIMPLEX, font_size, label_colors[category], font_thickness) 242 | info = "Stroke (q-a,space): " + str(radius) 243 | cv2.putText(vis, info, (10, hstep * 2), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 244 | info = "Save and give me more (enter)" 245 | cv2.putText(vis, info, (10, hstep * 3), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 246 | info = "Exit (esc)" 247 | cv2.putText(vis, info, (10, hstep * 4), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255,255,255)) 248 | 249 | cv2.imshow(file_name, vis) 250 | 251 | cv2.destroyWindow(file_name) 252 | 253 | # Save output 254 | cv2.imwrite(super_scribbles_file, super_scribbles) 255 | print("Labels saved in " + super_scribbles_file) 256 | 257 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /list_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # -M is the model 4 | # options are fcn, deeplab and gcn 5 | # don't use the gcn option. we haven't test it properly 6 | 7 | # -e are the excluded classes 8 | # this will typically be only #6 (mouth-mask) or none 9 | 10 | # -bs is the model batch size 11 | 12 | # -Vs is the validation set ID 13 | # depending on the experiment we will either use a validation set or another 14 | # in particular, if we are testing for (only) sunglasses augmentation 15 | # we have a validation set with half the images with sunglasses from ELFW. etc. 16 | # 0 is for sunglasses 17 | # 1 is for hands 18 | # 2 is a general validation set with random images 19 | 20 | # -S synthetic augmentation ratio 21 | # it has to be positive and is the ratio of training images that are used from the 22 | # augmentation folders (next parameter) 23 | # if there are 90 images in the train set and we set -S 0.1 we are asking for 90*0.1=9 (synthethically) augmented images 24 | # these images are taken from the augmentation folders uniformly, this is, if there is 1 folder, the 9 are taken from it 25 | # if there are 2 folders, we take int(9/2) from 1 and int(9/2) from the other, 26 | # if there are 3 folders, we take int(9/3) from 1, int(9/3) from the second one and int(9/3) from the last one 27 | 28 | # -St is the augmentation types configuration 29 | # 0 is for Sunglasses 30 | # 1 is for Hands 31 | # 2 is for Masks 32 | # we can combine them as we with, for example, -St 0,1,2 or -St 0 or -St 1,2 33 | # be sure you give the numbers in increasing ordre. nothing bad will happen but the -St 1,2 and -St 2,1 experiments are 34 | # technically the same although it won't be handled 35 | 36 | ################################## 37 | # LIST OF EXPERIMENTS 38 | # 39 | # Sunglasses augmentations: 40 | # * St will only be "-St 0" 41 | # * we exclude the mouth-mask class, so always "-e 6" 42 | # * the model is either fcn or deeplab 43 | # * batch size will depend on the GPU capacity. for the fcn is 16, deeplab is a little lighter so 16 will be ok 44 | # * Validation set will be "-Vs 0" 45 | # * Different augmentation ratios "-S x" for x in [0, 0.25, 0.5, 0.75, 1] (for -S 0, -St is none) 46 | # 47 | # Hands augmentations: 48 | # * St will only be "-St 1" 49 | # * we exclude the mouth-mask class, so always "-e 6" 50 | # * the model is either fcn or deeplab 51 | # * batch size will depend on the GPU capacity. for the fcn is 16, deeplab is a little lighter so 16 will be ok 52 | # * Validation set will be "-Vs 1" 53 | # * Different augmentation ratios "-S x" for x in [0, 0.25, 0.5, 0.75, 1] (for -S 0, -St is none) 54 | # 55 | # All types of augmentations: 56 | # * St will be "-St 0,1,2" 57 | # * we have all classes so -e is none: problem here, evaluation will consider mouth-masks although there is none in the validation set 58 | # * the model is either fcn or deeplab 59 | # * batch size will depend on the GPU capacity. for the fcn is 16, deeplab is a little lighter so 16 will be ok 60 | # * Validation set will be "-Vs 2" 61 | # * Different augmentation ratios "-S x" for x in [0, 0.25, 0.5, 0.75, 1] (for -S 0, -St is none) 62 | 63 | 64 | # Sunglasses augmentations 65 | # these first 4 exps will have different names 66 | [2.55] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 # 67 | [3.21] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 -St 0 -S 0.25 # 68 | [3.85] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 -St 0 -S 0.5 # 69 | [5.13] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 -St 0 -S 1.0 # 70 | 71 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 72 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 -St 0 -S 0.25 73 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 -St 0 -S 0.5 74 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 -St 0 -S 1.0 75 | 76 | # Hands augmentations 77 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 78 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 -St 1 -S 0.25 79 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 -St 1 -S 0.5 80 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 -St 1 -S 1.0 81 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 82 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 -St 1 -S 0.25 83 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 -St 1 -S 0.5 84 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 -St 1 -S 1.0 85 | 86 | # All augmentations - mouth-masks included (for webcam validation - results on val set won't be reported!) 87 | # results will only be qualitative so we only train a single model 88 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -Vs 2 -St 0,1,2 -S 0.5 89 | 90 | # All augmentations - mouth-masks excluded (for validating that several augmentations also help) 91 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 92 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.25 93 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.5 94 | python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 -St 0,1 -S 1.0 95 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 96 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.25 97 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.5 98 | python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 -St 0,1 -S 1.0 99 | 100 | 101 | # Sort them all! 102 | 103 | # @ GPU0 104 | [3.85] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.5 105 | [3.21] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 -St 1 -S 0.25 106 | [5.13] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 -St 0 -S 1.0 # 107 | [4.84] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 -St 0 -S 0.25 108 | [5.81] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 -St 0 -S 0.5 109 | [5.81] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.5 110 | #Total time = 3.85+3.21+4.84+5.81+5.81+5.13=28.65 111 | 112 | # @ GPU1 113 | [3.21] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 -St 0 -S 0.25 # 114 | [5.13] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 -St 1 -S 1.0 115 | [3.85] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 -St 1 -S 0.5 116 | [7.74] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 -St 0 -S 1.0 117 | [3.87] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 118 | [4.84] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 -St 1 -S 0.25 119 | #Total time = 5.13+3.85+7.74+3.87+4.84+3.21=28.64 120 | 121 | # @ GPU2 122 | [2.55] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 123 | [2.57] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 124 | [3.21] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.25 125 | [5.13] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 1 126 | [5.81] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 -St 1 -S 0.5 127 | [7.74] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 1 -St 1 -S 1.0 128 | [3.87] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 129 | # Total time = 2.57+3.21+5.13+5.81+7.74+3.87+2.55=30.88 130 | 131 | # @ GPU3 132 | [3.85] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 0 -St 0 -S 0.5 133 | [5.13] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -e 6 -Vs 2 -St 0,1 -S 1.0 134 | [3.85] python pytorch-segmentation/run_trainer.py -M fcn -bs 16 -Vs 2 -St 0,1,2 -S 0.5 135 | [3.87] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 0 136 | [4.84] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 -St 0,1 -S 0.25 137 | [7.74] python pytorch-segmentation/run_trainer.py -M deeplab -bs 16 -e 6 -Vs 2 -St 0,1 -S 1.0 138 | # Total time = 5.13+3.85+3.87+4.84+7.74+3.85=29.28 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | # Mostly borrowed from https://github.com/ZijunDeng/pytorch-semantic-segmentation 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from torchvision import models 5 | 6 | def initialize_weights(*models): 7 | for model in models: 8 | for module in model.modules(): 9 | if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear): 10 | # nn.init.kaiming_normal(module.weight) # initialization used originally in Resnet 11 | nn.init.xavier_uniform_(module.weight) 12 | if module.bias is not None: 13 | module.bias.data.zero_() 14 | elif isinstance(module, nn.BatchNorm2d): 15 | module.weight.data.fill_(1) 16 | module.bias.data.zero_() 17 | 18 | # Much borrowed from https://github.com/ycszen/pytorch-ss/blob/master/gcn.py 19 | class _GlobalConvModule(nn.Module): 20 | def __init__(self, in_dim, out_dim, kernel_size): 21 | super(_GlobalConvModule, self).__init__() 22 | pad0 = int( (kernel_size[0] - 1) / 2 ) 23 | pad1 = int( (kernel_size[1] - 1) / 2 ) 24 | # kernel size had better be odd number so as to avoid alignment error 25 | super(_GlobalConvModule, self).__init__() 26 | self.conv_l1 = nn.Conv2d(in_dim, out_dim, kernel_size=(kernel_size[0], 1), padding=(pad0, 0)) 27 | self.conv_l2 = nn.Conv2d(out_dim, out_dim, kernel_size=(1, kernel_size[1]), padding=(0, pad1)) 28 | self.conv_r1 = nn.Conv2d(in_dim, out_dim, kernel_size=(1, kernel_size[1]), padding=(0, pad1)) 29 | self.conv_r2 = nn.Conv2d(out_dim, out_dim, kernel_size=(kernel_size[0], 1), padding=(pad0, 0)) 30 | 31 | def forward(self, x): 32 | x_l = self.conv_l1(x) 33 | x_l = self.conv_l2(x_l) 34 | x_r = self.conv_r1(x) 35 | x_r = self.conv_r2(x_r) 36 | x = x_l + x_r 37 | return x 38 | 39 | 40 | class _BoundaryRefineModule(nn.Module): 41 | def __init__(self, dim): 42 | super(_BoundaryRefineModule, self).__init__() 43 | self.relu = nn.ReLU(inplace=True) 44 | self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, padding=1) 45 | self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, padding=1) 46 | 47 | def forward(self, x): 48 | residual = x 49 | x = self.conv1(x) 50 | x = self.relu(x) 51 | x = self.conv2(x) 52 | out = x + residual 53 | return out 54 | 55 | 56 | class GCN(nn.Module): 57 | def __init__(self, num_classes, num_levels=4): 58 | super(GCN, self).__init__() 59 | 60 | self.num_levels = num_levels 61 | 62 | # resnet = models.resnet152(pretrained=True) 63 | resnet = models.resnet101(pretrained=True) 64 | 65 | # Resnet-GCN not implemented, instead original Resnet layers are used 66 | self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu) 67 | self.layer1 = nn.Sequential(resnet.maxpool, resnet.layer1) 68 | self.layer2 = resnet.layer2 69 | self.layer3 = resnet.layer3 70 | self.layer4 = resnet.layer4 71 | 72 | kernel_size = 7 # set this value according to the smallest resolution which depends upon the image size and the number of scales in the net 73 | self.gcm1 = _GlobalConvModule(2048, num_classes, (kernel_size, kernel_size)) 74 | self.gcm2 = _GlobalConvModule(1024, num_classes, (kernel_size, kernel_size)) 75 | self.gcm3 = _GlobalConvModule(512, num_classes, (kernel_size, kernel_size)) 76 | self.gcm4 = _GlobalConvModule(256, num_classes, (kernel_size, kernel_size)) 77 | 78 | self.brm1 = _BoundaryRefineModule(num_classes) 79 | self.brm2 = _BoundaryRefineModule(num_classes) 80 | self.brm3 = _BoundaryRefineModule(num_classes) 81 | self.brm4 = _BoundaryRefineModule(num_classes) 82 | self.brm5 = _BoundaryRefineModule(num_classes) 83 | self.brm6 = _BoundaryRefineModule(num_classes) 84 | self.brm7 = _BoundaryRefineModule(num_classes) 85 | self.brm8 = _BoundaryRefineModule(num_classes) 86 | self.brm9 = _BoundaryRefineModule(num_classes) 87 | 88 | initialize_weights(self.gcm1, self.gcm2, self.gcm3, self.gcm4, 89 | self.brm1, self.brm2, self.brm3, self.brm4, self.brm5, self.brm6, self.brm7, self.brm8, self.brm9) 90 | 91 | def forward(self, x): 92 | 93 | if self.num_levels == 2: 94 | # if x: 512 95 | fm0 = self.layer0(x) # 256 96 | fm1 = self.layer1(fm0) # 128 97 | fm2 = self.layer2(fm1) # 64 98 | 99 | gcfm1 = self.brm3(self.gcm3(fm2)) # 64 100 | gcfm2 = self.brm4(self.gcm4(fm1)) # 128 101 | 102 | fs1 = self.brm7(F.upsample_bilinear(gcfm1, fm1.size()[2:]) + gcfm2) # 128 103 | fs2 = self.brm8(F.upsample_bilinear(fs1, fm0.size()[2:])) # 256 104 | out = self.brm9(F.upsample_bilinear(fs2, x.size()[2:])) # 512 105 | 106 | return out 107 | 108 | elif self.num_levels == 3: 109 | # if x: 512 110 | fm0 = self.layer0(x) # 256 111 | fm1 = self.layer1(fm0) # 128 112 | fm2 = self.layer2(fm1) # 64 113 | fm3 = self.layer3(fm2) # 32 114 | 115 | gcfm1 = self.brm2(self.gcm2(fm3)) # 32 116 | gcfm2 = self.brm3(self.gcm3(fm2)) # 64 117 | gcfm3 = self.brm4(self.gcm4(fm1)) # 128 118 | 119 | fs1 = self.brm6(F.upsample_bilinear(gcfm1, fm2.size()[2:]) + gcfm2) # 64 120 | fs2 = self.brm7(F.upsample_bilinear(fs1, fm1.size()[2:]) + gcfm3) # 128 121 | fs3 = self.brm8(F.upsample_bilinear(fs2, fm0.size()[2:])) # 256 122 | out = self.brm9(F.upsample_bilinear(fs3, x.size()[2:])) # 512 123 | 124 | return out 125 | 126 | else: 127 | # if x: 512 128 | fm0 = self.layer0(x) # 256 129 | fm1 = self.layer1(fm0) # 128 130 | fm2 = self.layer2(fm1) # 64 131 | fm3 = self.layer3(fm2) # 32 132 | fm4 = self.layer4(fm3) # 16 133 | 134 | gcfm1 = self.brm1(self.gcm1(fm4)) # 16 135 | gcfm2 = self.brm2(self.gcm2(fm3)) # 32 136 | gcfm3 = self.brm3(self.gcm3(fm2)) # 64 137 | gcfm4 = self.brm4(self.gcm4(fm1)) # 128 138 | 139 | fs1 = self.brm5(F.upsample_bilinear(gcfm1, fm3.size()[2:]) + gcfm2) # 32 140 | fs2 = self.brm6(F.upsample_bilinear(fs1, fm2.size()[2:]) + gcfm3) # 64 141 | fs3 = self.brm7(F.upsample_bilinear(fs2, fm1.size()[2:]) + gcfm4) # 128 142 | fs4 = self.brm8(F.upsample_bilinear(fs3, fm0.size()[2:])) # 256 143 | out = self.brm9(F.upsample_bilinear(fs4, x.size()[2:])) # 512 144 | 145 | return out 146 | 147 | class ResnetFCN(nn.Module): 148 | 149 | def __init__(self, num_classes): 150 | super(ResnetFCN, self).__init__() 151 | 152 | # Load the model and change the last layer 153 | fcn = models.segmentation.fcn_resnet101(pretrained=True) 154 | 155 | conv_classifier = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1)) 156 | conv_auxiliar = nn.Conv2d(256, num_classes, kernel_size=(1,1), stride=(1,1)) 157 | 158 | nn.init.xavier_uniform_(conv_classifier.weight) 159 | if conv_classifier.bias is not None: 160 | conv_classifier.bias.data.zero_() 161 | 162 | nn.init.xavier_uniform_(conv_auxiliar.weight) 163 | if conv_auxiliar.bias is not None: 164 | conv_auxiliar.bias.data.zero_() 165 | 166 | fcn.classifier[4] = conv_classifier 167 | fcn.aux_classifier[4] = conv_auxiliar 168 | 169 | self.fcn = fcn 170 | 171 | def forward(self, x): 172 | 173 | return self.fcn(x)['out'] 174 | 175 | 176 | class DeepLabV3(nn.Module): 177 | 178 | def __init__(self, num_classes): 179 | super(DeepLabV3, self).__init__() 180 | 181 | # Load the model and change the last layer 182 | net = models.segmentation.deeplabv3_resnet101(pretrained=True) 183 | 184 | conv_classifier = nn.Conv2d(256, num_classes, kernel_size=(1,1), stride=(1,1)) 185 | conv_auxiliar = nn.Conv2d(256, num_classes, kernel_size=(1,1), stride=(1,1)) 186 | 187 | nn.init.xavier_uniform_(conv_classifier.weight) 188 | if conv_classifier.bias is not None: 189 | conv_classifier.bias.data.zero_() 190 | 191 | nn.init.xavier_uniform_(conv_auxiliar.weight) 192 | if conv_auxiliar.bias is not None: 193 | conv_auxiliar.bias.data.zero_() 194 | 195 | net.classifier[4] = conv_classifier 196 | net.aux_classifier[4] = conv_auxiliar 197 | 198 | self.net = net 199 | 200 | def forward(self, x): 201 | 202 | return self.net(x)['out'] 203 | 204 | -------------------------------------------------------------------------------- /scripts/elfw-makeThemWearSunglasses.py: -------------------------------------------------------------------------------- 1 | # This code augments the Labeled Faces in the Wild dataset with sunglasses! 2 | # J.Gibert based on code by R.Redondo, Eurecat 2019 (c). 3 | 4 | 5 | import numpy as np 6 | import sys 7 | import cv2 8 | import os 9 | import fnmatch 10 | # import imutils 11 | from random import randint 12 | from random import random 13 | from random import shuffle 14 | 15 | label_colors = [ 16 | ( 0, 0, 0), # black - background 17 | ( 0,255, 0), # green - skin 18 | ( 0, 0,255), # red - hair 19 | (255,255, 0), # light blue - beard-mustache 20 | (255, 0, 0), # blue - sunglasses 21 | (255, 0,255), # pink - wearable 22 | ( 0,255,255)] # yellow - mouth-mask 23 | 24 | label_names = [ 25 | "background", 26 | "skin", 27 | "hair", 28 | "beard-mustache", 29 | "sunglasses", 30 | "wearable", 31 | "mouth-mask"] 32 | 33 | class bcolors: 34 | PURPLE = '\033[95m' 35 | BLUE = '\033[94m' 36 | GREEN = '\033[92m' 37 | YELLOW = '\033[93m' 38 | RED = '\033[91m' 39 | CYAN = '\033[96m' 40 | ENDC = '\033[0m' 41 | BOLD = '\033[1m' 42 | CYAN = '\033[96m' 43 | 44 | # Make him wear it 45 | def objectOverlay(canvas, item, reference_distance, reference_center, labels, item_type): 46 | 47 | obj = item.copy() 48 | 49 | # Item size adjustment 50 | resize_factor = ( reference_distance ) / (obj.shape[1] * 0.25) 51 | new_size = np.array([int(obj.shape[1] * resize_factor), int(obj.shape[0] * resize_factor)]) 52 | new_size = np.array([new_size[0] + new_size[0] % 2, new_size[1] + new_size[1] % 2]) 53 | obj = cv2.resize(obj, tuple(new_size)) 54 | yc, xc = [int(reference_center[1] - 0.5 * obj.shape[0]), int(reference_center[0] - 0.5 * obj.shape[1])] 55 | b, g, r, a = cv2.split(obj) 56 | a3 = cv2.merge((a,a,a)) 57 | obj = cv2.merge((b,g,r)) 58 | 59 | # Margin crops 60 | left_top = np.array([ max(xc,0), max(yc,0)]) 61 | right_bottom = np.array([ min(xc + obj.shape[1],canvas.shape[1]), min(yc + obj.shape[0],canvas.shape[0])]) 62 | left_top_item = np.array([left_top[0]-xc,left_top[1]-yc]) 63 | right_bottom_item = right_bottom - left_top + left_top_item 64 | a3 = a3[left_top_item[1]:right_bottom_item[1], left_top_item[0]:right_bottom_item[0]] 65 | obj = obj[left_top_item[1]:right_bottom_item[1], left_top_item[0]:right_bottom_item[0]] 66 | canvas_crop = canvas[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] 67 | labels_crop = labels[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] 68 | 69 | # Blending 70 | canvas_crop[a3>0] = obj[a3>0] * 0.92 + canvas_crop[a3>0] * 0.08 71 | t = label_names.index(item_type) 72 | lb, lg, lr = cv2.split(labels_crop) 73 | lb[a>0] = label_colors[t][0] 74 | lg[a>0] = label_colors[t][1] 75 | lr[a>0] = label_colors[t][2] 76 | labels[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] = cv2.merge((lb,lg,lr)) 77 | 78 | #---------------------------------------------------------------------------------------------------- 79 | 80 | if len(sys.argv) != 5: 81 | print("Usage: $ elfw-makeThemWearSunglasses ") 82 | exit(0) 83 | 84 | faces_folder = sys.argv[1] 85 | labels_folder = sys.argv[2] 86 | wearables_folder = sys.argv[3] 87 | output_folder = sys.argv[4] 88 | 89 | output_folder_faces = os.path.join(output_folder, 'faces') 90 | output_folder_labels = os.path.join(output_folder, 'labels') 91 | # output_folder_debug = os.path.join(output_folder, 'debug') 92 | 93 | if not os.path.exists(output_folder): 94 | os.mkdir(output_folder) 95 | if not os.path.exists(output_folder_faces): 96 | os.mkdir(output_folder_faces) 97 | if not os.path.exists(output_folder_labels): 98 | os.mkdir(output_folder_labels) 99 | # if not os.path.exists(output_folder_debug): 100 | # os.mkdir(output_folder_debug) 101 | 102 | # Not sure if the following will work with all opencv installation type 103 | # I'm currently working with a virtual environment in which I have installed opencv 4.1.0 using pip 104 | # The opencv location is the following ('env' is the virtual environment name): 105 | # "/home/jaume.gibert/Code/facesinthewild/env/lib/python3.5/site-packages/cv2/" 106 | haar_folder = os.path.join(os.path.dirname(cv2.__file__), 'data') 107 | haar_face_ddbb = os.path.join(haar_folder, "haarcascade_frontalface_default.xml") 108 | haar_eye_ddbb = os.path.join(haar_folder, "haarcascade_eye.xml") 109 | 110 | print('\n' + bcolors.BOLD + 'Initiating Haar detector from ' + haar_folder + bcolors.ENDC) 111 | 112 | face_cascade = cv2.CascadeClassifier() 113 | if not face_cascade.load(haar_face_ddbb): 114 | print('--(!)Error loading face cascade') 115 | exit(0) 116 | 117 | eye_cascade = cv2.CascadeClassifier() 118 | if not eye_cascade.load(haar_eye_ddbb): 119 | print('--(!)Error loading eye cascade') 120 | exit(0) 121 | 122 | print(bcolors.GREEN + 'DONE!' + bcolors.ENDC) 123 | print("") 124 | 125 | #----------------------------------------------------------------------------------------------------- 126 | # Keep sunglasses around in a list of images 127 | sunglasses = [] 128 | for wearable_file in os.listdir(wearables_folder): 129 | 130 | if not wearable_file.endswith(".png"): 131 | continue 132 | 133 | if fnmatch.fnmatch(wearable_file, '*sunglasses*'): 134 | img = cv2.imread(os.path.join(wearables_folder, wearable_file), cv2.IMREAD_UNCHANGED) 135 | sunglasses.append([img, os.path.splitext(wearable_file)[0]]) 136 | 137 | #----------------------------------------------------------------------------------------------------- 138 | # For each image, look for a face and, if it does not already include sunglasses, paste one on the (detected) eyes 139 | 140 | counter_all_images = 0 141 | counter_no_jpg = 0 142 | counter_with_glasses = 0 143 | counter_no_face = 0 144 | counter_multiple_faces = 0 145 | counter_no_eyes = 0 146 | counter_saved_images = 0 147 | 148 | N = len(os.listdir(faces_folder)) 149 | 150 | for n, face_file in enumerate(os.listdir(faces_folder)): 151 | 152 | counter_all_images += 1 153 | base_name = os.path.splitext(face_file)[0] 154 | 155 | # Print the image number and name 156 | if not n: 157 | sys.stdout.flush() 158 | print("") 159 | sys.stdout.write('\x1b[1A') 160 | sys.stdout.write('\x1b[2K') 161 | print(bcolors.BLUE + "["+ str(n).zfill(4) +"/"+ str(N) +"] " + base_name + bcolors.ENDC) 162 | 163 | if not face_file.endswith(".jpg"): 164 | counter_no_jpg += 1 165 | continue 166 | 167 | # # Use this to debug for a specific image or images... 168 | # if not fnmatch.fnmatch(face_file, '*Amer_al*'): 169 | # continue 170 | 171 | # Before doing anything, we should check whether the original image contains already some 172 | # pixels labeled as sunglasses. If so, we will not augment this image 173 | 174 | # Load labels image 175 | labels = cv2.imread(os.path.join(labels_folder, base_name+'.png')) 176 | 177 | # Build up a mask for the sunglasses class 178 | sunglasses_color = label_colors[4] 179 | mask = np.ones((labels.shape[0],labels.shape[1])) 180 | for c in [0,1,2]: 181 | mask_c = np.zeros((labels.shape[0],labels.shape[1])) 182 | index = (labels[:,:,c] == sunglasses_color[c]) 183 | mask_c[index] = 1 184 | mask = mask * mask_c 185 | 186 | if np.sum(mask) > 0: 187 | # print(bcolors.BLUE + "Already has sunglasses: " + base_name + bcolors.ENDC) 188 | # cv2.imwrite(output_folder_debug + base_name + '.ppm', labels) 189 | counter_with_glasses += 1 190 | continue 191 | 192 | # Face image 193 | image = cv2.imread(os.path.join(faces_folder, face_file)) 194 | 195 | # Face pre-processing for detection of face and eyes 196 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 197 | cv2.equalizeHist(gray) 198 | # ih, iw = gray.shape 199 | # center = [iw*0.5, ih*0.5] 200 | 201 | # Detect faces in image 202 | faces = face_cascade.detectMultiScale(gray, 1.1, 6) 203 | if not len(faces): 204 | counter_no_face += 1 205 | continue 206 | #print(bcolors.YELLOW + " -- Number of faces detected: " + str(len(faces)) + bcolors.ENDC) 207 | 208 | # When there are multiple detections, it is hard to tell which is the proper one 209 | # since we have lots of images to augment, we will discard these cases 210 | if len(faces) > 1: 211 | counter_multiple_faces += 1 212 | continue 213 | 214 | # Put sunglasses on the different detected face - actually we only have one face, 215 | # since other cases have been discarded 216 | for face_id, (x,y,w,h) in enumerate(faces): 217 | face_center = [x + w * 0.5, y + h * 0.5] 218 | roi_gray = gray[y:y+h, x:x+w] 219 | roi_color = image[y:y+h, x:x+w] 220 | # cv2.line(image,(0,int(face_center[1])),(250,int(face_center[1])),(255, 0, 0), 1) 221 | # cv2.line(image,(int(face_center[0]),0),(int(face_center[0]),250),(255, 0, 0), 1) 222 | 223 | # Eyes detection on the current face 224 | eyes = eye_cascade.detectMultiScale(roi_gray, 1.05, 5) 225 | right_eye = [] 226 | left_eye = [] 227 | for (ex, ey, ew, eh) in eyes: 228 | eye_center = np.array([x + ex + ew * 0.5, y + ey + eh * 0.5]) 229 | if eye_center[1] < face_center[1]: 230 | # cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2) 231 | if eye_center[0] > face_center[0]: 232 | left_eye = eye_center 233 | else: 234 | right_eye = eye_center 235 | 236 | if not len(left_eye) or not len(right_eye): 237 | counter_no_eyes += 1 238 | continue 239 | 240 | # Eyes are more reliable to estimate face size, even for masks 241 | reference_size = (left_eye[0] - right_eye[0]) * 0.5 242 | middle_eye = (left_eye + right_eye) * 0.5 243 | 244 | # Paste sunglasses on the eyes 245 | # Use only a random number of the available: shuffle the list and take the k first 246 | shuffle(sunglasses) 247 | for i in range(10): 248 | 249 | # create copies so we don't keep pasting items on the same image all the time 250 | im = image.copy() 251 | lb = labels.copy() 252 | 253 | # augmentation id for storing the file 254 | G = sunglasses[i][0] 255 | aug_id = sunglasses[i][1] 256 | #aug_id = str(i).zfill(4) 257 | 258 | # overlay item 259 | objectOverlay(im, G, reference_size, middle_eye, lb, "sunglasses" ) 260 | 261 | # save image and labels 262 | augmented_face_file = os.path.join(output_folder_faces, base_name+'_'+aug_id+'.jpg') 263 | augmented_labels_file = os.path.join(output_folder_labels, base_name+'_'+aug_id+'.png') 264 | if not os.path.isfile(augmented_face_file): 265 | cv2.imwrite(augmented_face_file, im) 266 | cv2.imwrite(augmented_labels_file, lb) 267 | counter_saved_images += 1 268 | else: 269 | print(bcolors.RED + "File already exists: " + augmented_face_file + bcolors.ENDC) 270 | 271 | print("\n" + bcolors.RED + "Total number of files .... " + bcolors.ENDC + str(counter_all_images)) 272 | print("\n" + bcolors.BOLD + "No jpg images ............ " + bcolors.ENDC + str(counter_no_jpg)) 273 | print( bcolors.BOLD + "With real sunglasses ..... " + bcolors.ENDC + str(counter_with_glasses)) 274 | print( bcolors.BOLD + "No face detected ......... " + bcolors.ENDC + str(counter_no_face)) 275 | print( bcolors.BOLD + "Several faces detected ... " + bcolors.ENDC + str(counter_multiple_faces)) 276 | print( bcolors.BOLD + "No eyes detected ......... " + bcolors.ENDC + str(counter_no_eyes)) 277 | print( bcolors.BOLD + "Saved images ............. " + bcolors.ENDC + str(counter_saved_images)) 278 | print("\n") 279 | 280 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /scripts/elfw-makeThemWearMasks.py: -------------------------------------------------------------------------------- 1 | # This code augments the Labeled Faces in the Wild dataset with Masks! 2 | # J.Gibert based on code by R.Redondo, Eurecat 2019 (c). 3 | 4 | 5 | import numpy as np 6 | import sys 7 | import cv2 8 | import os 9 | import fnmatch 10 | # import imutils 11 | from random import randint 12 | from random import random 13 | from random import shuffle 14 | 15 | label_colors = [ 16 | ( 0, 0, 0), # black - background 17 | ( 0,255, 0), # green - skin 18 | ( 0, 0,255), # red - hair 19 | (255,255, 0), # light blue - beard-mustache 20 | (255, 0, 0), # blue - sunglasses 21 | (255, 0,255), # pink - wearable 22 | ( 0,255,255)] # yellow - mouth-mask 23 | 24 | label_names = [ 25 | "background", 26 | "skin", 27 | "hair", 28 | "beard-mustache", 29 | "sunglasses", 30 | "wearable", 31 | "mouth-mask"] 32 | 33 | class bcolors: 34 | PURPLE = '\033[95m' 35 | BLUE = '\033[94m' 36 | GREEN = '\033[92m' 37 | YELLOW = '\033[93m' 38 | RED = '\033[91m' 39 | CYAN = '\033[96m' 40 | ENDC = '\033[0m' 41 | BOLD = '\033[1m' 42 | CYAN = '\033[96m' 43 | 44 | # Make him wear it 45 | def objectOverlay(canvas, item, reference_distance, reference_center, labels, item_type): 46 | 47 | obj = item.copy() 48 | 49 | # Item size adjustment 50 | resize_factor = ( reference_distance ) / (obj.shape[1] * 0.25) 51 | new_size = np.array([int(obj.shape[1] * resize_factor), int(obj.shape[0] * resize_factor)]) 52 | new_size = np.array([new_size[0] + new_size[0] % 2, new_size[1] + new_size[1] % 2]) 53 | obj = cv2.resize(obj, tuple(new_size)) 54 | yc, xc = [int(reference_center[1] - 0.5 * obj.shape[0]), int(reference_center[0] - 0.5 * obj.shape[1])] 55 | b, g, r, a = cv2.split(obj) 56 | a3 = cv2.merge((a,a,a)) 57 | obj = cv2.merge((b,g,r)) 58 | 59 | # Margin crops 60 | left_top = np.array([ max(xc,0), max(yc,0)]) 61 | right_bottom = np.array([ min(xc + obj.shape[1],canvas.shape[1]), min(yc + obj.shape[0],canvas.shape[0])]) 62 | left_top_item = np.array([left_top[0]-xc,left_top[1]-yc]) 63 | right_bottom_item = right_bottom - left_top + left_top_item 64 | a3 = a3[left_top_item[1]:right_bottom_item[1], left_top_item[0]:right_bottom_item[0]] 65 | obj = obj[left_top_item[1]:right_bottom_item[1], left_top_item[0]:right_bottom_item[0]] 66 | canvas_crop = canvas[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] 67 | labels_crop = labels[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] 68 | 69 | # Blending 70 | canvas_crop[a3>0] = obj[a3>0] * 0.92 + canvas_crop[a3>0] * 0.08 71 | t = label_names.index(item_type) 72 | lb, lg, lr = cv2.split(labels_crop) 73 | lb[a>0] = label_colors[t][0] 74 | lg[a>0] = label_colors[t][1] 75 | lr[a>0] = label_colors[t][2] 76 | labels[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] = cv2.merge((lb,lg,lr)) 77 | 78 | #---------------------------------------------------------------------------------------------------- 79 | 80 | if len(sys.argv) != 5: 81 | print("Usage: $ elfw-makeThemWearMasks ") 82 | exit(0) 83 | 84 | faces_folder = sys.argv[1] 85 | labels_folder = sys.argv[2] 86 | wearables_folder = sys.argv[3] 87 | output_folder = sys.argv[4] 88 | 89 | output_folder_faces = os.path.join(output_folder, 'faces') 90 | output_folder_labels = os.path.join(output_folder, 'labels') 91 | #output_folder_debug = os.path.join(output_folder, 'debug') 92 | 93 | if not os.path.exists(output_folder): 94 | os.mkdir(output_folder) 95 | if not os.path.exists(output_folder_faces): 96 | os.mkdir(output_folder_faces) 97 | if not os.path.exists(output_folder_labels): 98 | os.mkdir(output_folder_labels) 99 | # if not os.path.exists(output_folder_debug): 100 | # os.mkdir(output_folder_debug) 101 | 102 | # Not sure if the following will work with all opencv installation type 103 | # I'm currently working with a virtual environment in which I have installed opencv 4.1.0 using pip 104 | # The opencv location is the following ('env' is the virtual environment name): 105 | # "/home/jaume.gibert/Code/facesinthewild/env/lib/python3.5/site-packages/cv2/" 106 | haar_folder = os.path.join(os.path.dirname(cv2.__file__), 'data') 107 | haar_face_ddbb = os.path.join(haar_folder, "haarcascade_frontalface_default.xml") 108 | haar_eye_ddbb = os.path.join(haar_folder, "haarcascade_eye.xml") 109 | haar_mouth_ddbb = os.path.join(haar_folder, "haarcascade_smile.xml") 110 | 111 | 112 | print('\n' + bcolors.BOLD + 'Initiating Haar detector from ' + haar_folder + bcolors.ENDC) 113 | 114 | face_cascade = cv2.CascadeClassifier() 115 | if not face_cascade.load(haar_face_ddbb): 116 | print('--(!)Error loading face cascade') 117 | exit(0) 118 | 119 | eye_cascade = cv2.CascadeClassifier() 120 | if not eye_cascade.load(haar_eye_ddbb): 121 | print('--(!)Error loading eye cascade') 122 | exit(0) 123 | 124 | mouth_cascade = cv2.CascadeClassifier() 125 | if not mouth_cascade.load(haar_mouth_ddbb): 126 | print('--(!)Error loading mouth cascade') 127 | exit(0) 128 | 129 | print(bcolors.GREEN + 'DONE!' + bcolors.ENDC) 130 | print("") 131 | 132 | #----------------------------------------------------------------------------------------------------- 133 | # Keep masks around in a list of images 134 | masks = [] 135 | for wearable_file in os.listdir(wearables_folder): 136 | 137 | if not wearable_file.endswith(".png"): 138 | continue 139 | 140 | if fnmatch.fnmatch(wearable_file, '*mask*'): 141 | img = cv2.imread(os.path.join(wearables_folder, wearable_file), cv2.IMREAD_UNCHANGED) 142 | masks.append([img, os.path.splitext(wearable_file)[0]]) 143 | 144 | 145 | #----------------------------------------------------------------------------------------------------- 146 | # For each image, look for a face and paste a mouth-mask on the (detected) mouth 147 | 148 | counter_all_images = 0 149 | counter_no_jpg = 0 150 | counter_with_glasses = 0 151 | counter_no_face = 0 152 | counter_multiple_faces = 0 153 | counter_no_eyes = 0 154 | counter_no_mouth = 0 155 | counter_saved_images = 0 156 | 157 | N = len(os.listdir(faces_folder)) 158 | 159 | for n, face_file in enumerate(os.listdir(faces_folder)): 160 | 161 | counter_all_images += 1 162 | base_name = os.path.splitext(face_file)[0] 163 | 164 | # Print the image number and name 165 | if not n: 166 | sys.stdout.flush() 167 | print("") 168 | sys.stdout.write('\x1b[1A') 169 | sys.stdout.write('\x1b[2K') 170 | print(bcolors.BLUE + "["+ str(n).zfill(4) +"/"+ str(N) +"] " + base_name + bcolors.ENDC) 171 | 172 | if not face_file.endswith(".jpg"): 173 | counter_no_jpg += 1 174 | continue 175 | 176 | # # Use this to debug for a specific image or images... 177 | # if not fnmatch.fnmatch(face_file, '*Amer_al*'): 178 | # continue 179 | 180 | # Load labels image 181 | labels = cv2.imread(os.path.join(labels_folder, base_name+'.png')) 182 | 183 | # Face image 184 | image = cv2.imread(os.path.join(faces_folder, face_file)) 185 | 186 | # Face pre-processing for detection of face and eyes 187 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 188 | cv2.equalizeHist(gray) 189 | # ih, iw = gray.shape 190 | # center = [iw*0.5, ih*0.5] 191 | 192 | # Detect faces in image 193 | faces = face_cascade.detectMultiScale(gray, 1.1, 6) 194 | if not len(faces): 195 | counter_no_face += 1 196 | continue 197 | #print(bcolors.YELLOW + " -- Number of faces detected: " + str(len(faces)) + bcolors.ENDC) 198 | 199 | # When there are multiple detections, it is hard to tell which is the proper one 200 | # since we have lots of images to augment, we will discard these cases 201 | if len(faces) > 1: 202 | counter_multiple_faces += 1 203 | continue 204 | 205 | # Put sunglasses on the different detected face - actually we only have one face, 206 | # since other cases have been discarded 207 | for face_id, (x,y,w,h) in enumerate(faces): 208 | face_center = [x + w * 0.5, y + h * 0.5] 209 | roi_gray = gray[y:y+h, x:x+w] 210 | roi_color = image[y:y+h, x:x+w] 211 | # cv2.line(image,(0,int(face_center[1])),(250,int(face_center[1])),(255, 0, 0), 1) 212 | # cv2.line(image,(int(face_center[0]),0),(int(face_center[0]),250),(255, 0, 0), 1) 213 | 214 | # Eyes detection on the current face 215 | eyes = eye_cascade.detectMultiScale(roi_gray, 1.05, 5) 216 | right_eye = [] 217 | left_eye = [] 218 | for (ex, ey, ew, eh) in eyes: 219 | eye_center = np.array([x + ex + ew * 0.5, y + ey + eh * 0.5]) 220 | if eye_center[1] < face_center[1]: 221 | # cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2) 222 | if eye_center[0] > face_center[0]: 223 | left_eye = eye_center 224 | else: 225 | right_eye = eye_center 226 | 227 | if not len(left_eye) or not len(right_eye): 228 | counter_no_eyes += 1 229 | continue 230 | 231 | # Eyes are more reliable to estimate face size, even for masks 232 | reference_size = (left_eye[0] - right_eye[0]) * 0.5 233 | 234 | # Mouth detection 235 | mouths = mouth_cascade.detectMultiScale(roi_gray, 1.1, 7) 236 | if not len(mouths): 237 | counter_no_mouth += 1 238 | continue 239 | 240 | # Take the first apparently well-located mouth 241 | save_guard = 10 # pixels below face center 242 | for i, (mx, my, mw, mh) in enumerate(mouths): 243 | mouth_center = [x + mx + mw * 0.5, y + my + mh * 0.5] 244 | if mouth_center[1] > face_center[1] + save_guard: 245 | # cv2.rectangle(roi_color, (mx, my), (mx+mw, my+mh), (0, 255, 0), 2) 246 | break 247 | if (i+1) == len(mouths): 248 | counter_no_mouth += 1 249 | continue 250 | 251 | # Paste mask on the mouth 252 | # Use only a random number of the available: shuffle the list and take the k first 253 | shuffle(masks) 254 | for i in range(10): 255 | 256 | # create copies so we don't keep pasting items on the same image all the time 257 | im = image.copy() 258 | lb = labels.copy() 259 | 260 | # augmentation id for storing the file 261 | M = masks[i][0] 262 | aug_id = masks[i][1] 263 | #aug_id = str(i).zfill(4) 264 | 265 | # overlay item 266 | objectOverlay(im, M, reference_size, mouth_center, lb, "mouth-mask" ) 267 | 268 | # save image and labels 269 | augmented_face_file = os.path.join(output_folder_faces, base_name+'_'+aug_id+'.jpg') 270 | augmented_labels_file = os.path.join(output_folder_labels, base_name+'_'+aug_id+'.png') 271 | if not os.path.isfile(augmented_face_file): 272 | cv2.imwrite(augmented_face_file, im) 273 | cv2.imwrite(augmented_labels_file, lb) 274 | counter_saved_images += 1 275 | else: 276 | print(bcolors.RED + "File already exists: " + augmented_face_file + bcolors.ENDC) 277 | 278 | print("\n" + bcolors.RED + "Total number of files .... " + bcolors.ENDC + str(counter_all_images)) 279 | print("\n" + bcolors.BOLD + "No jpg images ............ " + bcolors.ENDC + str(counter_no_jpg)) 280 | print( bcolors.BOLD + "With real sunglasses ..... " + bcolors.ENDC + str(counter_with_glasses)) 281 | print( bcolors.BOLD + "No face detected ......... " + bcolors.ENDC + str(counter_no_face)) 282 | print( bcolors.BOLD + "Several faces detected ... " + bcolors.ENDC + str(counter_multiple_faces)) 283 | print( bcolors.BOLD + "No eyes detected ......... " + bcolors.ENDC + str(counter_no_eyes)) 284 | print( bcolors.BOLD + "No mouth detected ........ " + bcolors.ENDC + str(counter_no_mouth)) 285 | print( bcolors.BOLD + "Saved images ............. " + bcolors.ENDC + str(counter_saved_images)) 286 | print("\n") 287 | 288 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /scripts/elfw-makeItLookCool.py: -------------------------------------------------------------------------------- 1 | # [Deprecated, see augmentation scripts for each augmentation object (hands, sunglasses, and masks)] 2 | # 3 | # This code uses the Viola-Jones face detector to 4 | # augment face images with synthetic sunglasses, hands and color patches. 5 | # 6 | # R. Redondo, Eurecat 2019 (c). 7 | 8 | import numpy as np 9 | import sys 10 | import cv2 11 | import os 12 | import fnmatch 13 | import imutils 14 | from random import randint 15 | from random import random 16 | 17 | label_colors = [ 18 | ( 0, 0, 0), 19 | ( 0,255, 0), 20 | ( 0, 0,255), 21 | (255,255, 0), 22 | (255, 0, 0), 23 | (255, 0,255), 24 | ( 0,255,255)] 25 | 26 | label_names = [ 27 | "background", 28 | "skin", 29 | "hair", 30 | "beard-mustache", 31 | "sunglasses", 32 | "wearable", 33 | "mouth-mask"] 34 | 35 | # Make him wear it 36 | def objectOverlay(canvas, item, reference_distance, reference_center, labels, item_type): 37 | 38 | obj = item.copy() 39 | 40 | # Item size adjustment 41 | resize_factor = ( reference_distance ) / (obj.shape[1] * 0.25) 42 | new_size = np.array([int(obj.shape[1] * resize_factor), int(obj.shape[0] * resize_factor)]) 43 | new_size = np.array([new_size[0] + new_size[0] % 2, new_size[1] + new_size[1] % 2]) 44 | obj = cv2.resize(obj, tuple(new_size)) 45 | yc, xc = [int(reference_center[1] - 0.5 * obj.shape[0]), int(reference_center[0] - 0.5 * obj.shape[1])] 46 | b, g, r, a = cv2.split(obj) 47 | a3 = cv2.merge((a,a,a)) 48 | obj = cv2.merge((b,g,r)) 49 | 50 | # Margin crops 51 | left_top = np.array([ max(xc,0), max(yc,0)]) 52 | right_bottom = np.array([ min(xc + obj.shape[1],canvas.shape[1]), min(yc + obj.shape[0],canvas.shape[0])]) 53 | left_top_item = np.array([left_top[0]-xc,left_top[1]-yc]) 54 | right_bottom_item = right_bottom - left_top + left_top_item 55 | a3 = a3[left_top_item[1]:right_bottom_item[1], left_top_item[0]:right_bottom_item[0]] 56 | obj = obj[left_top_item[1]:right_bottom_item[1], left_top_item[0]:right_bottom_item[0]] 57 | canvas_crop = canvas[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] 58 | labels_crop = labels[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] 59 | 60 | # Blending 61 | canvas_crop[a3>0] = obj[a3>0] * 0.92 + canvas_crop[a3>0] * 0.08 62 | t = label_names.index(item_type) 63 | lb, lg, lr = cv2.split(labels_crop) 64 | lb[a>0] = label_colors[t][0] 65 | lg[a>0] = label_colors[t][1] 66 | lr[a>0] = label_colors[t][2] 67 | labels[left_top[1]:right_bottom[1], left_top[0]:right_bottom[0],:] = cv2.merge((lb,lg,lr)) 68 | 69 | #---------------------------------------------------------------------------------------------------- 70 | 71 | if len(sys.argv) != 6: 72 | print("Usage: $ elfw-makeItLookCool ") 73 | exit(0) 74 | 75 | faces_folder = sys.argv[1] + '/' 76 | labels_folder = sys.argv[2] + '/' 77 | wearables_folder = sys.argv[3] + '/' 78 | hands_folder = sys.argv[4] + '/' 79 | output_folder = sys.argv[5] + '/' 80 | 81 | # faces_folder = '../Datasets/lfw-deepfunneled/' 82 | # output_folder = '../Datasets/lfw-deepfunneled-wearables/' 83 | 84 | output_folder_faces = output_folder + '/faces/' 85 | output_folder_labels = output_folder + '/labels/' 86 | output_folder_faces_occluded = output_folder + '/faces_occluded/' 87 | output_folder_labels_occluded = output_folder + '/labels_occluded/' 88 | 89 | if not os.path.exists(output_folder): 90 | os.mkdir(output_folder) 91 | 92 | if not os.path.exists(output_folder_faces): 93 | os.mkdir(output_folder_faces) 94 | 95 | if not os.path.exists(output_folder_labels): 96 | os.mkdir(output_folder_labels) 97 | 98 | if not os.path.exists(output_folder_faces_occluded): 99 | os.mkdir(output_folder_faces_occluded) 100 | 101 | if not os.path.exists(output_folder_labels_occluded): 102 | os.mkdir(output_folder_labels_occluded) 103 | 104 | haar_folder = "facedetectors/opencv/haarcascades" 105 | haar_face_ddbb = haar_folder + "/haarcascade_frontalface_default.xml" 106 | haar_eye_ddbb = haar_folder + "/haarcascade_eye.xml" 107 | haar_mouth_ddbb = haar_folder + "/haarcascade_mouth.xml" 108 | print('\033[1m' + 'Initiating Haar detector from' + haar_folder + '\033[0m') 109 | face_cascade = cv2.CascadeClassifier(haar_face_ddbb) 110 | eye_cascade = cv2.CascadeClassifier(haar_eye_ddbb) 111 | mouth_cascade = cv2.CascadeClassifier(haar_mouth_ddbb) 112 | 113 | #----------------------------------------------------------------------------------------------------- 114 | # Wearables 115 | 116 | sunglasses = [] 117 | masks = [] 118 | 119 | for wearable_file in os.listdir(wearables_folder): 120 | 121 | if not wearable_file.endswith(".png"): 122 | continue 123 | 124 | img = cv2.imread(wearables_folder + wearable_file, cv2.IMREAD_UNCHANGED) 125 | 126 | if fnmatch.fnmatch(wearable_file, '*sunglasses*'): 127 | sunglasses.append(img) 128 | elif fnmatch.fnmatch(wearable_file, '*mask*'): 129 | masks.append(img) 130 | 131 | 132 | #----------------------------------------------------------------------------------------------------- 133 | # Occluders 134 | 135 | hands = [] 136 | 137 | for hand_file in os.listdir(hands_folder): 138 | 139 | if not hand_file.endswith(".png"): 140 | continue 141 | 142 | img = cv2.imread(hands_folder + hand_file, cv2.IMREAD_UNCHANGED) 143 | hands.append(img) 144 | 145 | #----------------------------------------------------------------------------------------------------- 146 | 147 | for face_file in os.listdir(faces_folder): 148 | 149 | if not face_file.endswith(".jpg"): 150 | continue 151 | 152 | name = os.path.splitext(face_file)[0] 153 | 154 | # Face image 155 | image = cv2.imread(faces_folder + face_file) 156 | # b_channel, g_channel, r_channel = cv2.split(image) 157 | # alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 255 158 | # image = cv2.merge((b_channel, g_channel, r_channel, alpha_channel)) 159 | 160 | # Face pre-processing 161 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 162 | cv2.equalizeHist(gray) 163 | ih, iw = gray.shape 164 | center = [iw*0.5,ih*0.5] 165 | 166 | # Face detection 167 | faces = face_cascade.detectMultiScale(gray, 1.1, 6) 168 | if faces is None: 169 | continue 170 | 171 | # Label image 172 | labels_name = os.path.splitext(face_file)[0] 173 | labels = cv2.imread(labels_folder + labels_name + '.ppm') 174 | 175 | # Change default background color 176 | default_background_color = (255,0,0) 177 | mask = np.ones((labels.shape[0],labels.shape[1])) 178 | for c in [0,1,2]: 179 | mask_c = np.zeros((labels.shape[0],labels.shape[1])) 180 | index = labels[:,:,c] == default_background_color[c] 181 | mask_c[index] = 1 182 | mask = mask * mask_c 183 | 184 | for c in [0,1,2]: 185 | labels[:,:,c] = labels[:,:,c] * (1-mask) 186 | 187 | # Put objets on the face 188 | for (x,y,w,h) in faces: 189 | roi_gray = gray[y:y+h, x:x+w] 190 | roi_color = image[y:y+h, x:x+w] 191 | #cv2.line(image,(0,125),(250,125),(255, 0, 0), 1) 192 | 193 | # Eyes 194 | eyes = eye_cascade.detectMultiScale(roi_gray,1.1, 6) 195 | num_eyes = 0 196 | right_eye = 0 197 | left_eye = 0 198 | middle_eye = np.array([0,0]) 199 | 200 | for (ex, ey, ew, eh) in eyes: 201 | eye_center = [x + ex + ew * 0.5, y + ey + eh * 0.5] 202 | if eye_center[1] < center[0]: 203 | # cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2) 204 | middle_eye = middle_eye + eye_center 205 | if eye_center[0] > center[0]: 206 | left_eye = eye_center 207 | else : 208 | right_eye = eye_center 209 | num_eyes = num_eyes + 1 210 | 211 | if num_eyes != 2 or not left_eye or not right_eye: 212 | continue 213 | 214 | middle_eye = middle_eye / num_eyes 215 | #cv2.circle(image, (int(left_eye[0]),int(left_eye[1])), 4, (0, 255, 255), 4) 216 | #cv2.circle(image, (int(right_eye[0]),int(right_eye[1])), 4, (0, 255, 255), 4) 217 | #cv2.circle(image, (int(middle_eye[0]),int(middle_eye[1])), 4, (0, 0, 255), 4) 218 | 219 | occluded = random() < 0.5 220 | has_object = False 221 | 222 | # Sunglasses 223 | if not occluded and random() < 0.8: 224 | 225 | i = randint(0, len(sunglasses)-1) 226 | objectOverlay(image, sunglasses[i], left_eye[0] - middle_eye[0], middle_eye, labels, "sunglasses" ) 227 | has_object = True 228 | 229 | # Mouth mask 230 | if not occluded and (random() < 0.2 or not has_object): 231 | 232 | mouths = mouth_cascade.detectMultiScale(roi_gray, 1.3, 7) 233 | 234 | for (mx, my, mw, mh) in mouths: 235 | mouth_center = [x + mx + mw * 0.5, y + my + mh * 0.5] 236 | if mouth_center[1] > center[1]: 237 | #cv2.rectangle(roi_color, (mx, my), (mx + mw, my + mh), (255, 255, 0), 2) 238 | i = randint(0, len(masks)-1) 239 | objectOverlay(image, masks[i], left_eye[0] - middle_eye[0], mouth_center, labels, "mouth-mask") 240 | break 241 | 242 | if occluded: 243 | 244 | # Hands 245 | if random() < 0.5: 246 | 247 | i = randint(0, len(hands)-1) 248 | hand_hsv = cv2.cvtColor(hands[i], cv2.COLOR_BGR2HSV) 249 | hh, hw, hc = hand_hsv.shape 250 | hand_mean_hsv = np.mean(hand_hsv[int(hh*0.4):int(hh*0.6),int(hw*0.4):int(hw*0.6),:], axis=(0,1)) 251 | 252 | fh, fw, fc = roi_color.shape 253 | face_mean = np.mean(roi_color[int(fh*0.25):int(fh*0.75),int(fw*0.25):int(fw*0.75),:], axis=(0,1)) 254 | face_mean_rgb = np.ones((1,1,3)) * face_mean 255 | face_mean_hsv = cv2.cvtColor(np.array(face_mean_rgb, dtype=np.uint8), cv2.COLOR_BGR2HSV) 256 | face_mean_hsv = face_mean_hsv.astype('float32') 257 | value_diff = face_mean_hsv[0,0,2] - hand_mean_hsv[2] 258 | 259 | for y in range(0, hh): 260 | for x in range(0, hw): 261 | hand_hsv[y,x,0] = face_mean_hsv[0,0,0] 262 | hand_hsv[y,x,2] = max(0,min(hand_hsv[y,x,2] + value_diff,255)) 263 | 264 | hand_bgr = cv2.cvtColor(hand_hsv, cv2.COLOR_HSV2BGR) 265 | b, g, r, a = cv2.split(hands[i]) 266 | hand_bgra = cv2.merge((hand_bgr,a)) 267 | hand_bgra = cv2.resize(hand_bgra, tuple(image.shape[:2])) 268 | hand_bgra = imutils.rotate_bound(hand_bgra, randint(0, 360)) 269 | hand_center = (iw * (0.25 + 0.5 * random()), ih * (0.25 + 0.5 * random())) 270 | objectOverlay(image, hand_bgra, left_eye[0] - middle_eye[0], hand_center, labels, "background") 271 | 272 | # Stripe occluder 273 | else: 274 | 275 | occluded = True 276 | stripe_top_left = (int(iw * (0.0 + 0.5 * random())), int(ih * (0.0 + 0.5 * random()))) 277 | stripe_size = (int(iw * (0.25 + 0.33 * random())), int(ih * (0.25 + 0.33 * random()))) 278 | stripe_bottom_left = (stripe_size[0] + stripe_top_left[0], stripe_size[1] + stripe_top_left[1]) 279 | stripe_color = (255 * random(), 255 * random(), 255 * random()) 280 | cv2.rectangle(image, stripe_top_left, stripe_bottom_left, stripe_color, -1) 281 | cv2.rectangle(labels, stripe_top_left, stripe_bottom_left, label_colors[0], -1) 282 | 283 | # Save output 284 | rndstamp = str( int(random() * 1E8) ) 285 | rndstamp = rndstamp.zfill(8) 286 | 287 | if occluded: 288 | augmented_face_file = output_folder_faces_occluded + name + '_' + rndstamp + '.jpg' 289 | labels_file = output_folder_labels_occluded + name + '_' + rndstamp + '.ppm' 290 | else: 291 | augmented_face_file = output_folder_faces + name + '_' + rndstamp + '.jpg' 292 | labels_file = output_folder_labels + name + '_' + rndstamp + '.ppm' 293 | 294 | cv2.imwrite(augmented_face_file, image) 295 | cv2.imwrite(labels_file, labels) 296 | 297 | continue 298 | 299 | print("Processed " + face_file) 300 | 301 | # Show output 302 | # cv2.imshow(face_file, image) 303 | # if cv2.waitKey(0) & 0xFF == 27: 304 | # exit(0) 305 | # cv2.destroyWindow(face_file) 306 | 307 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import os, math, time 4 | from torch.autograd import Variable 5 | from torch.optim.lr_scheduler import MultiStepLR 6 | from visualize import LinePlotter 7 | from models import GCN, ResnetFCN, DeepLabV3 8 | from elfw import * 9 | from utils import * 10 | from metrics import * 11 | 12 | # Rafael Redondo, Jaume Gibert - Eurecat (c) 2019 13 | # ------------------------------------------------------------------------- 14 | 15 | # ------------------------------------------------------------------------- 16 | 17 | def TrainVal(trainLoader,valLoader,e,r,d,lr_m,w,m,a,S,K,R,M,l=None): 18 | 19 | # -------------------------------------------------------------------------< 20 | # Hyper parameters 21 | 22 | max_epochs = e 23 | lr = r 24 | lr_decay = d 25 | lr_milestones = lr_m 26 | weight_decay = w 27 | momentum = m 28 | data_augmen = a 29 | synth_augmen_ratio = S 30 | model_type = M 31 | gcn_levels = l 32 | 33 | hyper_str = model_type 34 | if model_type == "gcn": 35 | hyper_str += '-levels_' + str(gcn_levels) 36 | 37 | 38 | hyper_str += "-classes_" + trainLoader.dataset.classes_code + \ 39 | "-valset_" + str(trainLoader.dataset.valset) + \ 40 | "-lr_" + str(lr) + \ 41 | "-lrdecay_" + str(lr_decay) + \ 42 | "-lrmilestones" 43 | 44 | for ms in lr_milestones: 45 | hyper_str += "_" + str(ms) 46 | 47 | hyper_str += "-wdecay_" + str(weight_decay) + \ 48 | "-momentum_" + str(momentum) 49 | 50 | if data_augmen: 51 | hyper_str += "-dataaugment" 52 | 53 | synth_aug_str = '' 54 | if synth_augmen_ratio > 0 and trainLoader.dataset.augmentation_folders_id: 55 | synth_aug_str = "-" + trainLoader.dataset.augmentation_folders_id + "_" + str(synth_augmen_ratio) 56 | hyper_str += synth_aug_str 57 | 58 | resume_str = '' 59 | if R: 60 | resume_str = "-resumed_" + os.path.split(R)[-1] 61 | hyper_str += resume_str 62 | 63 | # ------------------------------------------------------------------------- 64 | 65 | print("Hyper parameters:\n" + \ 66 | " model type............... \033[1m" + str(model_type) + "\033[0m\n"\ 67 | " classes used............. \033[1m" + trainLoader.dataset.classes_code + "\033[0m\n"\ 68 | " validation set .......... \033[1m" + str(trainLoader.dataset.valset) + "\033[0m\n"\ 69 | " max epochs............... \033[1m" + str(max_epochs) + "\033[0m\n"\ 70 | " learning rate............ \033[1m" + str(lr) + "\033[0m\n"\ 71 | " lr decay................. \033[1m" + str(lr_decay) + "\033[0m\n"\ 72 | " lr milestones............ \033[1m" + str(lr_milestones) + "\033[0m\n"\ 73 | " weight_decay............. \033[1m" + str(weight_decay) + "\033[0m\n"\ 74 | " momentum................. \033[1m" + str(momentum) + "\033[0m\n"\ 75 | " data augmentation........ \033[1m" + str(data_augmen) + "\033[0m") 76 | 77 | if synth_augmen_ratio > 0: 78 | print(" synthetic augmentation... \033[1m" + str(synth_augmen_ratio) + "\033[0m") 79 | print(" synthetic folders........ \033[1m" + trainLoader.dataset.augmentation_folders_id + "\033[0m") 80 | 81 | if model_type == "gcn": 82 | print(\ 83 | " GCN levels............... \033[1m" + str(gcn_levels) + "\033[0m\n") 84 | if R: 85 | print(\ 86 | " Resumed from............. \033[1m" + str(R) + "\033[0m\n") 87 | 88 | # ------------------------------------------------------------------------- 89 | # Checkpoints storage 90 | 91 | check_mkdir(K) 92 | checkpoints = os.path.join(K, hyper_str) 93 | 94 | if check_mkdir(checkpoints): 95 | for filename in os.listdir(checkpoints): 96 | if filename.endswith('.pth'): 97 | os.remove(os.path.join(checkpoints, filename)) 98 | 99 | # ------------------------------------------------------------------------- 100 | # Classes 101 | 102 | num_classes = trainLoader.dataset.num_classes 103 | label_names = trainLoader.dataset.label_names 104 | 105 | # ------------------------------------------------------------------------- 106 | # Network Model 107 | 108 | if model_type == "fcn": 109 | model = torch.nn.DataParallel(ResnetFCN(num_classes)) 110 | elif model_type == "gcn": 111 | model = torch.nn.DataParallel(GCN(num_classes,gcn_levels)) 112 | elif model_type == "deeplab": 113 | model = torch.nn.DataParallel(DeepLabV3(num_classes)) 114 | else: 115 | print('Model type not found.') 116 | exit(-1) 117 | 118 | if R: 119 | model.load_state_dict(torch.load(R)) 120 | 121 | model.cuda() 122 | 123 | # ------------------------------------------------------------------------- 124 | # Class weights: make sure weights are Float, otherwise the torch's loss will complain 125 | 126 | class_weights = torch.tensor(trainLoader.dataset.get_class_balance_weights()).type(torch.FloatTensor) 127 | 128 | # ------------------------------------------------------------------------- 129 | # Optimization criterion 130 | 131 | criterion = torch.nn.CrossEntropyLoss(class_weights.cuda()) 132 | optimizer = torch.optim.SGD(model.parameters(), 133 | lr=lr, 134 | momentum=momentum, 135 | weight_decay=weight_decay) 136 | 137 | # ------------------------------------------------------------------------- 138 | # Schedulers 139 | 140 | scheduler = MultiStepLR(optimizer, milestones=lr_milestones, gamma=lr_decay) 141 | early_stop = EarlyStop(30, aim='maximum') 142 | 143 | # ------------------------------------------------------------------------- 144 | # Visdom: custom your environment title 145 | 146 | visdom_environment = "ELFW-" + model_type + \ 147 | "-classes_%s" % trainLoader.dataset.classes_code + \ 148 | "-vs_" + str(trainLoader.dataset.valset) + \ 149 | synth_aug_str + resume_str 150 | 151 | plotter = LinePlotter(visdom_environment) 152 | 153 | # ------------------------------------------------------------------------- 154 | 155 | for epoch in range(max_epochs): 156 | 157 | model.train() 158 | console = AverageConsole('Train', len(trainLoader)) 159 | train_loss = AverageMeter() 160 | train_acc = AverageMeter() 161 | 162 | for i, (images, labels) in enumerate(trainLoader): 163 | 164 | console.snap() 165 | 166 | images = Variable(images).cuda() 167 | labels = Variable(labels).cuda() 168 | optimizer.zero_grad() 169 | outputs = model(images) 170 | loss = criterion(outputs, labels) 171 | loss.backward() 172 | optimizer.step() 173 | train_loss.update(loss.data.cpu()) 174 | _, predicted = torch.max(outputs.data, 1) 175 | train_acc.update( 100 * (predicted == labels).sum().item() / np.prod(labels.size()) ) 176 | 177 | console.updateprint(i) 178 | 179 | plotter.plot(epoch, train_loss.avg, 'Loss', 'train') 180 | plotter.plot(epoch, train_acc.avg, 'Global Accuracy', 'train') 181 | 182 | # --------------------------------------------------------------------------------- 183 | model.eval() 184 | console = AverageConsole('Eval', len(valLoader)) 185 | val_loss = AverageMeter() 186 | val_acc = AverageMeter() 187 | TP, TN, FP, FN = ZerosTFPN(num_classes) 188 | 189 | with torch.no_grad(): 190 | 191 | for i, (images, labels) in enumerate(valLoader): 192 | 193 | console.snap() 194 | 195 | images = Variable(images).cuda() 196 | labels = Variable(labels).cuda() 197 | outputs = model(images) 198 | loss = criterion(outputs, labels) 199 | # No backward, No optimization 200 | val_loss.update(loss.data.cpu()) 201 | _, predictions = torch.max(outputs.data, 1) 202 | val_acc.update( 100 * (predictions == labels).sum().item() / np.prod(labels.size()) ) 203 | 204 | tp, tn, fp, fn = TrueFalsePositiveNegatives(labels, predictions, num_classes) 205 | TP += tp 206 | TN += tn 207 | FP += fp 208 | FN += fn 209 | console.updateprint(i) 210 | 211 | # Extended metrics 212 | val_pixel_acc = PixelAccuracy(TP, FN) 213 | val_mean_acc, val_class_acc = MeanAccuracy(TP, FN) 214 | val_mean_iu, val_class_iu = MeanIU(TP, FN, FP) 215 | val_freq_iu = FrequencyWeightedIU(TP, FN, FP) 216 | val_mean_f1, val_class_f1 = MeanF1Score(TP, FN, FP) 217 | 218 | plotter.plot(epoch, optimizer.param_groups[0]['lr'], 'Learning Rate', 'Learning Rate') 219 | plotter.plot(epoch, val_loss.avg, 'Loss', 'validation') 220 | plotter.plot(epoch, val_acc.avg, 'Global Accuracy', 'validation') 221 | plotter.plot(epoch, val_pixel_acc, 'Pixel Accuracy', 'validation') 222 | plotter.plot(epoch, val_mean_acc, 'Mean Accuracy', 'validation') 223 | plotter.plot(epoch, val_mean_iu, 'Mean IU', 'validation') 224 | plotter.plot(epoch, val_freq_iu, 'Freq Weighted IU', 'validation') 225 | plotter.plot(epoch, val_mean_f1, 'Mean F1Score', 'validation') 226 | 227 | for c in range(0,num_classes): 228 | plotter.plot(epoch, val_class_acc[c], 'Class Accuracy', label_names[c]) 229 | plotter.plot(epoch, val_class_iu[c], 'Class IU', label_names[c]) 230 | plotter.plot(epoch, val_class_f1[c], 'Class F1Score', label_names[c]) 231 | 232 | 233 | print("Epoch [\033[1m%d\033[0m] Loss: \033[1m%.5f\033[0m, Acc: \033[1m%.2f\033[0m" % (epoch, val_loss.avg, val_acc.avg)) 234 | 235 | # --------------------------------------------------------------------------------- 236 | # LR update 237 | 238 | scheduler.step() 239 | 240 | # --------------------------------------------------------------------------------- 241 | # Saves checkpoints 242 | 243 | if not epoch % 10: 244 | checkpoint_name = os.path.join(checkpoints, model_type + "-epoch_" + str(epoch).zfill(4) + ".pth") 245 | torch.save(model.state_dict(), checkpoint_name) 246 | print("Saved checkpoint at " + checkpoint_name) 247 | 248 | # --------------------------------------------------------------------------------- 249 | # Exit conditions 250 | 251 | # Early stop 252 | if early_stop.step(val_mean_iu): 253 | print("It's been a long time since we do not improve the training. Let's early stop it.") 254 | return 255 | 256 | # Divergence 257 | if math.isnan(train_loss.avg) or math.isnan(val_loss.avg): 258 | print("Loss is out of range o_0. Let's stop.") 259 | return 260 | 261 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /elfw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import sys 4 | import numpy as np 5 | import random 6 | from PIL import Image 7 | import collections 8 | import torch 9 | from torch.utils import data 10 | import torchvision.transforms.functional as F 11 | from torchvision.transforms import Compose, Normalize, ToTensor 12 | from transform import Scale 13 | from utils import bcolors 14 | 15 | # This file contains the ELFW Dataset. 16 | # 17 | # Important variables: 18 | # 19 | # dataset_path -> path to the dataset containing the ELFW faces and labels. 20 | # synthetic_paths -> a dictionary of name-paths pairs containing synthetic datasets (faces and labels). 21 | # 22 | # Organize your datasets as follows: 23 | # 24 | # dataset/faces -> a folder containing all input faces. 25 | # dataset/labels -> a folder containing all labels paired to the faces. 26 | # 27 | # Also, populate the file named as 'dataset_path/elfw_set_{valset}.txt' a list of names to be used for validation, and so excluded from training. 28 | # 29 | # Overfitting with a single image: 30 | # 31 | # 1. Place the image you want to do overfitting with in the 'faces' folder. 32 | # 2. Do the same with its label image in the 'labels' folder. 33 | # 3. Make a copy of them, rename them and place the in the same 'faces' and 'labels' folder respectively. 34 | # 4. Make sure one of the two faces file names is listed in 'elfw_set_{valset}.txt'. 35 | # 36 | # Rafael Redondo & Jaume Gibert - Eurecat (c) 2019 37 | 38 | # Cluster 39 | dataset_path = "/media/ssd2/elfw/elfw_01_basic" 40 | synthetic_paths = [{"name": "Sunglasses", 41 | "path": "/media/ssd2/elfw/elfw_AugmentedGlasses"}, 42 | {"name": "Hands", 43 | "path": "/media/ssd2/elfw/elfw_AugmentedHands"}, 44 | {"name": "Masks", 45 | "path": "/media/ssd2/elfw/elfw_AugmentedMasks"}] 46 | 47 | # # Local 48 | # dataset_path = "/media/jaume.gibert/Data/elfw/elfw_01_basic" 49 | # synthetic_paths = [{"name": "Sunglasses", 50 | # "path": "/media/jaume.gibert/Data/elfw/elfw_AugmentedGlasses"}, 51 | # {"name": "Hands", 52 | # "path": "/media/jaume.gibert/Data/elfw/elfw_AugmentedHands"}, 53 | # {"name": "Masks", 54 | # "path": "/media/jaume.gibert/Data/elfw/elfw_AugmentedMasks"}] 55 | 56 | # # Local 57 | # dataset_path = "/media/jaume.gibert/Data/elfw/debug/train" 58 | # synthetic_paths = [{"name": "Sunglasses", 59 | # "path": "/media/jaume.gibert/Data/elfw/debug/aug0"}, 60 | # {"name": "Hands", 61 | # "path": "/media/jaume.gibert/Data/elfw/debug/aug1"}, 62 | # {"name": "Masks", 63 | # "path": "/media/jaume.gibert/Data/elfw/debug/aug2"}] 64 | 65 | class ELFWDataSet(data.Dataset): 66 | 67 | def __init__(self, 68 | split='train', # Either train or validation 69 | valset=0, # Specifies the number partition for validation images 70 | random_transform=False, # Boolean for random data augmentation 71 | synth_augmen_types=None, # List of indices for folders of images that will be used as class augmentation 72 | synth_augmen_ratio=0, # Percentage (wrt the training images) from augmentation folder that will be included in the train set 73 | compute_class_weights=False, # If True it computes class weights for the whole dataset (original + augmented) 74 | excluded_classes=None): # List of integers specifying the classes that are not gonna be used 75 | 76 | # Dataset Labels: number of categories, names, and associated colors (the very first to be computed) 77 | self.update_classes(excluded=excluded_classes) 78 | 79 | self.root = dataset_path 80 | self.split = split 81 | self.valset = valset 82 | self.files = collections.defaultdict(list) # pairs of image+label names separated in train and validation 83 | self.random_transform = random_transform 84 | self.median_frequencies = np.ones(self.num_classes) 85 | 86 | # Image and label transformations 87 | self.target_size = 256 88 | self.img_transform = Compose([ 89 | Scale((self.target_size, self.target_size), Image.BILINEAR), 90 | ToTensor(), 91 | Normalize([.485, .456, .406], [.229, .224, .225]) # Useful when using pre-trained nets 92 | ]) 93 | 94 | self.source_size = 250 95 | self.label_transform = Compose([ 96 | ToELFWLabel(self.label_colors, self.source_size), 97 | Scale((self.target_size, self.target_size), Image.NEAREST), 98 | ]) 99 | 100 | # Populating the dataset 101 | val_set_name = "elfw_set" 102 | faces_folder = "faces" 103 | labels_folder = "labels" 104 | 105 | val_set = osp.join(self.root, val_set_name + "_%s.txt" % str(self.valset).zfill(2)) 106 | val_file = open(val_set,"r") 107 | val_names = [osp.splitext(file.strip())[0] for file in val_file] 108 | val_file.close() 109 | 110 | face_files = [] 111 | 112 | if self.split == 'train': 113 | face_files = os.listdir(osp.join(self.root, faces_folder)) 114 | elif self.split == 'validation': 115 | face_files = val_names 116 | elif self.split == 'test': 117 | return 118 | else: 119 | print("Error: undefined split type!") 120 | exit(1) 121 | 122 | print(bcolors.YELLOW + "Loading ELFW split \'%s\' from %s" % (split, self.root) + bcolors.ENDC) 123 | 124 | for filename in face_files: 125 | name = osp.splitext(filename)[0] 126 | 127 | if self.split == 'train': 128 | if name in val_names: # Skip validation images from training 129 | continue 130 | 131 | img_file = osp.join(self.root, osp.join(faces_folder, "%s.jpg" % name)) 132 | label_file = osp.join(self.root, osp.join(labels_folder, "%s.png" % name)) 133 | 134 | if not osp.exists(label_file): 135 | print(bcolors.BOLD + 'Labels not found in ' + label_file + bcolors.ENDC) 136 | continue 137 | 138 | self.files[self.split].append({ 139 | "img": img_file, 140 | "label": label_file 141 | }) 142 | 143 | # Define the augmentation folders that to be used 144 | if self.split == 'train': 145 | self.augmentation_folders(synth_augmen_types) 146 | 147 | # Add images from the synth_augmen_folder (if requested) 148 | if self.split == 'train' and self.synth_augmen_folders and synth_augmen_ratio > 0: 149 | 150 | n_train_images = len(self.files[self.split]) 151 | n_aug_images_total = int(synth_augmen_ratio * n_train_images) 152 | n_aug_images_part = int(n_aug_images_total / len(self.synth_augmen_folders)) 153 | 154 | for sf in self.synth_augmen_folders: 155 | 156 | synth_augmen_folder = sf['path'] 157 | synth_aug_files = os.listdir(osp.join(synth_augmen_folder, 'faces')) 158 | print((bcolors.BLUE + "Synthetic augmentation: %d out of %d images for %s at %s" + bcolors.ENDC) % \ 159 | (n_aug_images_part, len(synth_aug_files), sf['name'], synth_augmen_folder)) 160 | 161 | # Shuffle all augmentation images and keep adding them until we have as much as n_aug_images 162 | random.shuffle(synth_aug_files) 163 | 164 | c = 0 165 | for aug_filename in synth_aug_files: 166 | 167 | # remove the extension 168 | name = osp.splitext(aug_filename)[0] 169 | 170 | # Check if this image belongs to the validation set also, this is, if it is an image from 171 | # the validation set that has been augmented. In this case, we discard it: 172 | # All augmented images are composed of the original name of the person 173 | # and an augmentatio ID for the different assets. this ID always starts with '_elfw' 174 | # so it's something like name_surname_0003_elfw-sunglasses-12 175 | # we need to know if the name_surname_0003 part is in the val_names list 176 | if name[:name.find("_elfw")] in val_names: 177 | continue 178 | 179 | # Get image and labels names 180 | img_file = osp.join(synth_augmen_folder, 'faces', "%s.jpg" % name) 181 | label_file = osp.join(synth_augmen_folder, 'labels', "%s.png" % name) 182 | 183 | # Check existence of label file - for security 184 | if not osp.exists(label_file): 185 | print(bcolors.RED + 'Labels not found in ' + label_file + bcolors.ENDC) 186 | continue 187 | 188 | # Add pair into the training list 189 | self.files[self.split].append({ 190 | "img": img_file, 191 | "label": label_file 192 | }) 193 | 194 | # Check how many augmentation images have been used so far. If max is reached, then break 195 | c += 1 196 | if (c == n_aug_images_part): 197 | break 198 | 199 | # Shuffle images so they get mixed across different synthetic augmentation folders 200 | random.shuffle(self.files[self.split]) 201 | 202 | if compute_class_weights and self.split == 'train': 203 | print(bcolors.GREEN + "Computing class balancing weights..." + bcolors.ENDC) 204 | self.__compute_class_balance_weights__() 205 | 206 | print(("Loaded " + bcolors.BOLD + "%s" + bcolors.ENDC + " split with " + bcolors.BOLD + "%d" + bcolors.ENDC + " items") 207 | % (self.split, len(self.files[self.split])) ) 208 | 209 | def __len__(self): 210 | return len(self.files[self.split]) 211 | 212 | def __getitem__(self, index): 213 | datafiles = self.files[self.split][index] 214 | 215 | image_file = datafiles["img"] 216 | label_file = datafiles["label"] 217 | 218 | image = Image.open(image_file).convert('RGB') 219 | label = Image.open(label_file).convert("RGB") 220 | 221 | if self.random_transform: 222 | # Flip 223 | if random.random() > 0.5: 224 | image = F.hflip(image) 225 | label = F.hflip(label) 226 | # Shift 227 | shift = [0, 0] 228 | if random.random() > 0.5: 229 | shift[0] = (random.random() - 0.5) * image.size[0] * 0.5 230 | shift[1] = (random.random() - 0.5) * image.size[1] * 0.5 231 | # Resize 232 | scale = 1 233 | if random.random() > 0.5: 234 | scale = random.random() * 0.5 + 0.5 235 | image = F.affine(image, 0, shift, scale, 0) # Fills image with to black color 236 | label = F.affine(label, 0, shift, scale, 0) # Fills with background label=0 237 | 238 | 239 | if self.img_transform is not None: 240 | image = self.img_transform(image) 241 | 242 | if self.label_transform is not None: 243 | label = self.label_transform(label) 244 | 245 | # TODO: why ToTensor() works for images but not for labels? 246 | label = torch.from_numpy(np.array(label, dtype=np.uint8)).long() 247 | 248 | return image, label.long() 249 | 250 | """ 251 | Creates the target label names and associated colors from which the targeted number of classes is calculated. 252 | To exclude one or several classes from training and validation, just feed them as argument in a comma-separated list form, e.g. 0,1,2. 253 | """ 254 | def update_classes(self, excluded=None): 255 | 256 | label_names = [ 257 | "background", 258 | "skin", 259 | "hair", 260 | "beard-mustache", 261 | "sunglasses", 262 | "wearable", 263 | "mouth-mask"] 264 | 265 | label_colors = [ 266 | (0, 0, 0), 267 | (0, 255, 0), 268 | (255, 0, 0), 269 | (0, 255, 255), 270 | (0, 0, 255), 271 | (255, 0, 255), 272 | (255, 255, 0)] 273 | 274 | # Create a string code to keep track of the classes that are used 275 | if excluded: 276 | used_classes = [idx for idx in range(len(label_colors)) if idx not in excluded] 277 | else: 278 | used_classes = range(len(label_colors)) 279 | self.classes_code = '' 280 | for c in used_classes: 281 | self.classes_code += str(c) 282 | 283 | # Keep the colors and names of the used classes 284 | self.label_colors = [color for idx, color in enumerate(label_colors) if idx in used_classes] 285 | self.label_names = [name for idx, name in enumerate(label_names) if idx in used_classes] 286 | 287 | # The final number of classes 288 | self.num_classes = len(self.label_names) 289 | 290 | 291 | def augmentation_folders(self, synth_augmen_types): 292 | 293 | self.synth_augmen_folders = None 294 | self.augmentation_folders_id = None 295 | if synth_augmen_types: 296 | self.synth_augmen_folders = [synthetic_paths[idx] for idx in synth_augmen_types] 297 | self.augmentation_folders_id = '' 298 | for sp in self.synth_augmen_folders: 299 | self.augmentation_folders_id += sp['name'] 300 | 301 | def __ToELFWLabel__(self, data): 302 | 303 | r = data[:, :, 0] 304 | g = data[:, :, 1] 305 | b = data[:, :, 2] 306 | 307 | output = np.zeros((data.shape[0], data.shape[1])) 308 | for c in range(0,self.num_classes): 309 | color_mask = (r == self.label_colors[c][0]) & (g == self.label_colors[c][1]) & (b == self.label_colors[c][2]) 310 | output[color_mask] = c 311 | 312 | return output 313 | 314 | """ 315 | Private method to get and compute class balancing weights 316 | that will be used within the loss function for segmentation 317 | """ 318 | def __compute_class_balance_weights__(self): 319 | 320 | px_frequencies = np.zeros(self.num_classes) 321 | im_frequencies = np.zeros(self.num_classes) 322 | 323 | i = 0 324 | L = len(self.files[self.split]) 325 | for f in self.files[self.split]: 326 | 327 | if not i: 328 | sys.stdout.flush() 329 | print('') 330 | sys.stdout.write('\x1b[1A') 331 | sys.stdout.write('\x1b[2K') 332 | i+=1 333 | print((bcolors.GREEN+" --- Image [%d / %d]"+bcolors.ENDC)%(i, L)) 334 | 335 | file_name = f['label'] 336 | image = Image.open(file_name).convert("RGB") 337 | img = np.array(image) 338 | img = self.__ToELFWLabel__(img) 339 | for l in range(0, self.num_classes): 340 | px = np.sum(img==l) 341 | # label counts if it is present in the image 342 | if px > 0: 343 | px_frequencies[l] += px 344 | im_frequencies[l] += img.size 345 | 346 | sys.stdout.write('\x1b[1A') 347 | sys.stdout.write('\x1b[2K') 348 | print((bcolors.GREEN+" --- DONE!"+bcolors.ENDC)) 349 | 350 | # Mask for indices of appearing classes in the train set 351 | m = (px_frequencies>0) 352 | idx = np.where(m) 353 | 354 | frequencies = np.divide(px_frequencies[m], im_frequencies[m]) 355 | pos_median_frequencies = np.divide(np.median(frequencies), frequencies) 356 | #pos_median_frequencies = np.divide(1, frequencies) 357 | 358 | for l in range(0,len(pos_median_frequencies)): 359 | self.median_frequencies[idx[0][l]] = pos_median_frequencies[l] 360 | 361 | 362 | """ 363 | Getter of the class balancing weights 364 | """ 365 | def get_class_balance_weights(self): 366 | return self.median_frequencies.astype(np.float) 367 | 368 | 369 | class ToELFWLabel(object): 370 | 371 | def __init__(self, label_colors, size): 372 | 373 | self.size = size 374 | self.label_colors = label_colors 375 | 376 | def __call__(self, input): 377 | 378 | data = np.array(input) 379 | data = np.reshape(data, (self.size, self.size, 3)) 380 | r = data[:, :, 0] 381 | g = data[:, :, 1] 382 | b = data[:, :, 2] 383 | 384 | output = np.zeros((self.size, self.size)) 385 | for c in range(0, len(self.label_colors)): 386 | color_mask = (r == self.label_colors[c][0]) & (g == self.label_colors[c][1]) & (b == self.label_colors[c][2]) 387 | output[color_mask] = c 388 | 389 | return Image.fromarray(output) -------------------------------------------------------------------------------- /scripts/elfw-putYourHandsOnMeWithDlib.py: -------------------------------------------------------------------------------- 1 | # This code inserts hands from an image to another relative to their respective head poses. 2 | # 3 | # Dlib is used to estimate the head poses, stolen from GitHub: 4 | # KwanHua Lee (lincolnhard) Taiwan, lincolnhardabc@gmail.com 5 | # 6 | # The head pose estimation uses a PnP solver to match between 7 | # the detected facial landmarks and a predefined landmarks set (object_pts). 8 | # 9 | # Note that the model landmarks do not necessarily correspond to the 10 | # actual face, and therefore an accurate matching (regression) is not always guaranteed. 11 | # 12 | # Color correction is performed in the lab color space as explained in: 13 | # P. Shirley et al. 'Color transfer between images' IEEE Corn, vol, 21, pp. 34-41, 2001. 14 | # 15 | # R. Redondo (c) Eurecat 2019 16 | 17 | # IMPORTANT NOTE: some hands in Hand Over Faces and Hand2Face datasets present visual artifacts 18 | # after color correction. They have been identified in the script elfw-cleaner.py. 19 | # Run it immediately after hand data augmentation is done. 20 | 21 | import os 22 | import sys 23 | import math 24 | import cv2 25 | import dlib 26 | import argparse 27 | import numpy as np 28 | from imutils import face_utils 29 | 30 | K = [6.5308391993466671e+002, 0.0, 3.1950000000000000e+002, 31 | 0.0, 6.5308391993466671e+002, 2.3950000000000000e+002, 32 | 0.0, 0.0, 1.0] 33 | 34 | D = [7.0834633684407095e-002, 6.9140193737175351e-002, 0.0, 0.0, -1.3073460323689292e+000] 35 | 36 | cam_matrix = np.array(K).reshape(3, 3).astype(np.float32) 37 | dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32) 38 | 39 | object_pts = np.float32([[6.825897, 6.760612, 4.402142], 40 | [1.330353, 7.122144, 6.903745], 41 | [-1.330353, 7.122144, 6.903745], 42 | [-6.825897, 6.760612, 4.402142], 43 | [5.311432, 5.485328, 3.987654], 44 | [1.789930, 5.393625, 4.413414], 45 | [-1.789930, 5.393625, 4.413414], 46 | [-5.311432, 5.485328, 3.987654], 47 | [2.005628, 1.409845, 6.165652], 48 | [-2.005628, 1.409845, 6.165652], 49 | [2.774015, -2.080775, 5.048531], 50 | [-2.774015, -2.080775, 5.048531], 51 | [0.000000, -3.116408, 6.097667], 52 | [0.000000, -7.415691, 4.070434]]) 53 | 54 | reprojectsrc = np.float32([[10.0, 10.0, 10.0], 55 | [10.0, 10.0, -10.0], 56 | [10.0, -10.0, -10.0], 57 | [10.0, -10.0, 10.0], 58 | [-10.0, 10.0, 10.0], 59 | [-10.0, 10.0, -10.0], 60 | [-10.0, -10.0, -10.0], 61 | [-10.0, -10.0, 10.0]]) 62 | 63 | line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], 64 | [4, 5], [5, 6], [6, 7], [7, 4], 65 | [0, 4], [1, 5], [2, 6], [3, 7]] 66 | 67 | label_colors = [ 68 | ( 0, 0, 0), 69 | ( 0,255, 0), 70 | ( 0, 0,255), 71 | (255,255, 0), 72 | (255, 0, 0), 73 | (255, 0,255), 74 | ( 0,255,255), 75 | ( 0, 0, 0)] 76 | 77 | label_tags = [ 78 | "background", 79 | "skin", 80 | "hair", 81 | "beard-mustache", 82 | "sunglasses", 83 | "wearable", 84 | "mouth-mask", 85 | "hands"] 86 | 87 | dlib_detector = [] 88 | dlib_predictor = [] 89 | 90 | def get_pose_from_image(image): 91 | 92 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 93 | face_rects = dlib_detector(gray, 1) 94 | 95 | if not len(face_rects): 96 | return 97 | 98 | shape = dlib_predictor(gray, face_rects[0]) 99 | shape = face_utils.shape_to_np(shape) 100 | 101 | euler_angles, reprojection = get_head_pose(shape) 102 | return euler_angles, reprojection, shape, face_rects[0] 103 | 104 | def get_head_pose(shape): 105 | 106 | image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36], 107 | shape[39], shape[42], shape[45], shape[31], shape[35], 108 | shape[48], shape[54], shape[57], shape[8]]) 109 | 110 | _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs) 111 | reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix, dist_coeffs) 112 | reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2))) 113 | 114 | # Euler angle 115 | rotation_mat, _ = cv2.Rodrigues(rotation_vec) 116 | pose_mat = cv2.hconcat((rotation_mat, translation_vec)) 117 | _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat) 118 | 119 | return euler_angle, reprojectdst 120 | 121 | def get_solid_angle(poseA, poseB): 122 | 123 | # Degrees to radians 124 | elevationA = math.radians(poseA[0, 0]) 125 | azimuthA = math.radians(poseA[1, 0]) 126 | rotationA = math.radians(poseA[2, 0]) 127 | 128 | elevationB = math.radians(poseB[0, 0]) 129 | azimuthB = math.radians(poseB[1, 0]) 130 | rotationB = math.radians(poseB[2, 0]) 131 | 132 | # Spherical to Cartesian 133 | xA = math.cos(math.radians(azimuthA)) * math.sin(math.radians(elevationA)) 134 | yA = math.sin(math.radians(azimuthA)) * math.sin(math.radians(elevationA)) 135 | zA = math.cos(math.radians(elevationA)) 136 | 137 | xB = math.cos(math.radians(azimuthB)) * math.sin(math.radians(elevationB)) 138 | yB = math.sin(math.radians(azimuthB)) * math.sin(math.radians(elevationB)) 139 | zB = math.cos(math.radians(elevationB)) 140 | 141 | # Solid spherical angle 142 | solid_angle = math.acos( xA * xB + yA * yB + zA * zB ) 143 | 144 | # Head rotation angle 145 | solid_rotation = abs(rotationB - rotationA) 146 | 147 | return math.degrees(solid_angle + solid_rotation) 148 | 149 | 150 | def get_angle_diff_L2(poseA, poseB): 151 | 152 | elevation = poseA[0, 0] - poseB[0, 0] 153 | azimuth = poseA[1, 0] - poseB[1, 0] 154 | rotation = poseA[2, 0] - poseB[2, 0] 155 | 156 | # L2 157 | return math.sqrt( elevation * elevation + azimuth * azimuth + rotation * rotation ) 158 | 159 | 160 | def draw_pose(image, reprojection, shape): 161 | 162 | for (x, y) in shape: 163 | cv2.circle(image, (x, y), 1, (0, 255, 0), -1) 164 | 165 | h, w,_ = image.shape 166 | for start, end in line_pairs: 167 | if reprojection[start] >= (0,0) and reprojection[end] >= (0,0) and \ 168 | reprojection[start] < (w,h) and reprojection[end] < (w,h): 169 | cv2.line(image, reprojection[start], reprojection[end], (255, 0, 0)) 170 | 171 | return image 172 | 173 | def handOverlay(canvas, labels, item, canvas_center, item_center, resize_factor, item_type): 174 | 175 | # Arrays 176 | canvas_center = np.array(canvas_center).astype(int) 177 | item_center = np.array(item_center).astype(int) 178 | 179 | # Resize item 180 | item_center = (item_center * resize_factor).astype(int) 181 | new_size = (np.array(item.shape[1::-1]) * resize_factor).astype(int) 182 | item = cv2.resize(item, tuple(new_size)) 183 | 184 | # Coordinates 185 | lt_i = item_center - np.minimum(canvas_center, item_center) 186 | rb_i = item_center + np.minimum(canvas.shape[1::-1] - canvas_center - 1, item.shape[1::-1] - item_center - 1) 187 | 188 | lt_c = np.maximum(canvas_center - item_center, [0,0]) 189 | rb_c = lt_c + rb_i - lt_i 190 | 191 | # Alpha 192 | b, g, r, a = cv2.split(item) 193 | a = a.astype(np.float) / np.max(a) 194 | aaa = cv2.merge((a,a,a)) 195 | 196 | # Crops 197 | a = a[lt_i[1]:rb_i[1], lt_i[0]:rb_i[0]] 198 | aaa = aaa[lt_i[1]:rb_i[1], lt_i[0]:rb_i[0],:3] 199 | item = item[lt_i[1]:rb_i[1], lt_i[0]:rb_i[0],:3] 200 | canvas_crop = canvas[lt_c[1]:rb_c[1], lt_c[0]:rb_c[0],:3] 201 | labels_crop = labels[lt_c[1]:rb_c[1], lt_c[0]:rb_c[0],:3] 202 | 203 | # Blending 204 | canvas[lt_c[1]:rb_c[1], lt_c[0]:rb_c[0],:3] = canvas_crop * (1-aaa) + item * aaa 205 | t = label_tags.index(item_type) 206 | lb, lg, lr = cv2.split(labels_crop) 207 | lb[a>0] = label_colors[t][0] 208 | lg[a>0] = label_colors[t][1] 209 | lr[a>0] = label_colors[t][2] 210 | labels[lt_c[1]:rb_c[1], lt_c[0]:rb_c[0],:] = cv2.merge((lb,lg,lr)) 211 | 212 | # ----------------------------------------------------------------------------------------------- 213 | # From P. Shirley, et al. 'Color transfer between images' IEEE Corn, vol, 21, pp. 34-41, 2001. 214 | 215 | def bgr_to_lab(image_bgr, mask=None): 216 | 217 | M_rgb2lms = [[0.3811, 0.5783, 0.0402],[0.1967, 0.7244, 0.0782],[0.0241, 0.1288, 0.8444]] 218 | M_lms2lab_1 = [[1.0,1.0,1.0],[1.0,1.0,-2.0],[1.0,-1.0,0.0]] 219 | M_lms2lab_2 = [[1.0/math.sqrt(3.0),0.0,0.0],[0.0,1.0/math.sqrt(6.0),0.0],[0.0,0.0,1.0/math.sqrt(2.0)]] 220 | M_lms2lab = np.dot(M_lms2lab_2, M_lms2lab_1) 221 | 222 | h, w, c = image_bgr.shape 223 | image_bgr = np.transpose( np.reshape(image_bgr, (h * w, c)) ) 224 | 225 | rgb = np.flip(image_bgr, 0) / 255.0 226 | lms = np.dot(M_rgb2lms, rgb) 227 | lms_log = np.log10(lms + 1E-10) 228 | lab = np.dot(M_lms2lab, lms_log) 229 | 230 | image_lab = np.reshape( np.transpose(lab), (h, w, c) ) 231 | 232 | return image_lab 233 | 234 | def lab_to_bgr(image_lab, mask=None): 235 | 236 | M_lms2rgb = [[4.4679, -3.5873, 0.1193],[-1.2186, 2.3809, -0.1624],[0.0497, -0.2439, 1.2045]] 237 | M_lab2lms_1 = [[1.0,1.0,1.0],[1.0,1.0,-1.0],[1.0,-2.0,0.0]] 238 | M_lab2lms_2 = [[math.sqrt(3.0)/3.0,0.0,0.0],[0.0,math.sqrt(6.0)/6.0,0.0],[0.0,0.0,math.sqrt(2.0)/2.0]] 239 | M_lab2lms = np.dot(M_lab2lms_1, M_lab2lms_2) 240 | 241 | h, w, c = image_lab.shape 242 | image_lab = np.transpose( np.reshape(image_lab, (h * w, c)) ) 243 | 244 | lms_log = np.dot(M_lab2lms, image_lab) 245 | lms = np.power(10, lms_log) 246 | rgb = np.dot(M_lms2rgb, lms) 247 | bgr = np.flip(np.clip(rgb, 0, 1), 0) * 255 248 | 249 | image_bgr = np.reshape( np.transpose(bgr), (h, w, c) ) 250 | 251 | return image_bgr.astype(np.uint8) 252 | 253 | def color_transfer(source_lab, target_lab, source_mask=None): 254 | 255 | if source_mask is not None: 256 | mask_channels = np.dstack((source_mask, source_mask, source_mask)) 257 | source_lab = np.ma.array(source_lab, mask=(mask_channels == 0)) 258 | 259 | source_lab_mean = np.mean(source_lab, axis=(0,1)) 260 | source_lab_std = np.std(source_lab, axis=(0,1)) 261 | 262 | target_lab_mean = np.mean(target_lab, axis=(0,1)) 263 | target_lab_std = np.std(target_lab, axis=(0,1)) 264 | 265 | s = source_lab.shape[:2] 266 | lab_std_factor = target_lab_std / source_lab_std 267 | 268 | source_lab -= np.dstack((np.full(s,source_lab_mean[0]), np.full(s,source_lab_mean[1]), np.full(s,source_lab_mean[2]))) 269 | source_lab *= np.dstack((np.full(s,lab_std_factor[0]), np.full(s,lab_std_factor[1]), np.full(s,lab_std_factor[2]))) 270 | source_lab += np.dstack((np.full(s,target_lab_mean[0]), np.full(s,target_lab_mean[1]), np.full(s,target_lab_mean[2]))) 271 | 272 | return source_lab 273 | 274 | # ----------------------------------------------------------------------------------------------- 275 | def check_mkdir(dir_name): 276 | if not os.path.exists(dir_name): 277 | os.mkdir(dir_name) 278 | return False 279 | else: 280 | return True 281 | 282 | def error(msg): 283 | print(msg) 284 | print('Type -h for help') 285 | sys.exit(0) 286 | 287 | def clean_console_line(): 288 | sys.stdout.write('\x1b[1A') 289 | sys.stdout.write('\x1b[2K') 290 | 291 | def main(): 292 | 293 | # ---------------------------------------------------------------------- 294 | # Arguments 295 | 296 | ap = argparse.ArgumentParser(prog="elfw-putYourHandsOnMeWithDlib") 297 | ap.add_argument("-i", "--import", type=str, help="Import folder with faces and labels to match in /faces and /labels folders, respectively.") 298 | ap.add_argument("-d", "--dataset", type=str, help="Path to the hands dataset (Hand2Face or hand_over_face).") 299 | ap.add_argument("-e", "--export", type=str, help="Export folder to save faces augmented with hands.") 300 | ap.add_argument("-t", "--tolerance", type=str, help="Maximum angle threshold to match a pair of faces (positive value in degrees).", default=5) 301 | args = vars(ap.parse_args()) 302 | 303 | angle_tolerance = args['tolerance'] 304 | 305 | import_folder = args['import'] 306 | if not import_folder or not os.path.exists(import_folder): 307 | error('Error: import folder not found') 308 | 309 | import_folder_faces = os.path.join(import_folder,'faces') 310 | import_folder_labels = os.path.join(import_folder,'labels') 311 | if not os.path.exists(import_folder_faces) or not os.path.exists(import_folder_labels): 312 | error('Error: \'/faces\' and \'/labels\' folders not found in the import folder') 313 | 314 | export_folder = args['export'] 315 | if not export_folder: 316 | error('Error: missing export folder') 317 | exit(-1) 318 | 319 | export_folder_faces = os.path.join(export_folder, 'faces') 320 | export_folder_labels = os.path.join(export_folder, 'labels') 321 | check_mkdir(export_folder) 322 | check_mkdir(export_folder_faces) 323 | check_mkdir(export_folder_labels) 324 | 325 | dataset = args['dataset'] 326 | if not os.path.exists(dataset): 327 | error('Error: dataset folder not found') 328 | 329 | # ---------------------------------------------------------------------- 330 | # Dataloader 331 | 332 | face_names = sorted(os.listdir(import_folder_faces)) 333 | label_names = sorted(os.listdir(import_folder_labels)) 334 | 335 | handface_pathnames = [] 336 | handmask_pathnames = [] 337 | if 'hand_over_face' in dataset: 338 | handfaces_folder = os.path.join(dataset, 'images_original_size') 339 | handmasks_folder = os.path.join(dataset, 'masks_highres') 340 | handface_pathnames = [os.path.join(handfaces_folder,f) for f in os.listdir(handfaces_folder)] 341 | handmask_pathnames = [os.path.join(handmasks_folder,m) for m in os.listdir(handmasks_folder)] 342 | dataset_label = 'hof' 343 | elif 'Hand2Face' in dataset: 344 | handfaces_folder = os.path.join(dataset, 'imgs') 345 | handmasks_folder = os.path.join(dataset, 'masks') 346 | handface_pathnames = [os.path.join(handfaces_folder,f) for f in os.listdir(handfaces_folder)] 347 | handmask_pathnames = [os.path.join(handmasks_folder,m) for m in os.listdir(handmasks_folder)] 348 | if 'EmoReact' in dataset: 349 | dataset_label = 'h2f_emo' 350 | elif 'Web' in dataset: 351 | dataset_label = 'h2f_web' 352 | else: 353 | error('Error: unrecognized Hand2Face dataset type') 354 | else: 355 | error('Error: unrecognized dataset type') 356 | 357 | handface_pathnames = sorted(handface_pathnames) 358 | handmask_pathnames = sorted(handmask_pathnames) 359 | 360 | # ---------------------------------------------------------------------- 361 | # Dlib init 362 | 363 | global dlib_detector, dlib_predictor 364 | shape_predictor = './dlib/shape_predictor_68_face_landmarks.dat' 365 | print('\033[1m' + 'Initiating Dlib from ' + shape_predictor + '\033[0m') 366 | global dlib_detector, dlib_predictor 367 | dlib_detector = dlib.get_frontal_face_detector() 368 | dlib_predictor = dlib.shape_predictor(shape_predictor) 369 | 370 | # ---------------------------------------------------------------------- 371 | # Dataset processing 372 | 373 | awkward = 0 374 | unrecognized = 0 375 | hints = np.zeros(len(handface_pathnames)) 376 | 377 | for i in range(len(handface_pathnames)): 378 | 379 | handface_pathname = handface_pathnames[i] 380 | _, handface_name = os.path.split(handface_pathname) 381 | handface_basename, _ = os.path.splitext(handface_name) 382 | print('Processing ' + '\033[33m' + handface_pathname + '\033[0m') 383 | 384 | # ---------------------------------------------------------------------- 385 | # Get reference pose 386 | 387 | handface = cv2.imread(handface_pathname) 388 | handface = handface[:, :, :3] # Leave the alpha channel out 389 | try: 390 | handface_angles, reprojection, shape, handface_rect = get_pose_from_image(handface) 391 | except: 392 | unrecognized += 1 393 | print('Face unrecognized') 394 | continue 395 | 396 | if abs(handface_angles[0,0]) > 90 or abs(handface_angles[1,0]) > 90 or abs(handface_angles[2,0]) > 90: 397 | awkward += 1 398 | print('Skipping awkward pose.') 399 | continue 400 | 401 | print('Reference pose (elevation,azimuth,rotation) = (%5.2f,%5.2f,%5.2f)' % (handface_angles[0,0],handface_angles[1,0],handface_angles[2,0])) 402 | # Uncomment the lines below to save the hand-face with pose overlay 403 | # handface = draw_pose(handface, reprojection, shape) 404 | # cv2.imwrite(os.path.join(export_folder, handface_name), handface) 405 | 406 | # ---------------------------------------------------------------------- 407 | # Hand masking 408 | 409 | hand_mask = cv2.imread(handmask_pathnames[i], cv2.IMREAD_UNCHANGED) 410 | if len(hand_mask.shape) > 2: 411 | hand_mask = hand_mask[:, :, -1] # take the last channel 412 | hand_mask = cv2.resize(hand_mask, handface.shape[1::-1], interpolation=cv2.INTER_NEAREST) # make sure dimensions agree 413 | 414 | handface_lab = bgr_to_lab(handface, hand_mask) 415 | 416 | hw, hh = (handface_rect.right() - handface_rect.left(), handface_rect.bottom() - handface_rect.top()) 417 | handface_center = (handface_rect.left() + hw * 0.5, handface_rect.top() + hh * 0.5) 418 | 419 | # ---------------------------------------------------------------------- 420 | # Iterate over faces in the import folder and paste the reference hands on pose-alike faces 421 | 422 | for t, face_name in enumerate(face_names): 423 | 424 | p_str = '[%d/%d]' % (t,len(face_names)) 425 | print(p_str) 426 | 427 | # ---------------------------------------------------------------------- 428 | # Get pose (on the firstly detected face) 429 | 430 | face = cv2.imread(os.path.join(import_folder_faces, face_name)) 431 | face = face[:, :, :3] # Leave the alpha channel out 432 | try: 433 | angles_face, reprojection, shape, face_rect = get_pose_from_image(face) 434 | except: 435 | clean_console_line() 436 | continue 437 | 438 | # ---------------------------------------------------------------------- 439 | # Difference angle between the reference and match poses 440 | 441 | angle_diff = get_angle_diff_L2(handface_angles, angles_face) 442 | clean_console_line() 443 | 444 | # Matches 445 | if angle_diff > angle_tolerance: 446 | continue 447 | print('%s Match (%5.2f,%5.2f,%5.2f) %s' % (p_str,angles_face[0,0],angles_face[1,0],angles_face[2,0], face_name)) 448 | 449 | # ---------------------------------------------------------------------- 450 | # Hand color correction 451 | 452 | face_roi = face[face_rect.top():face_rect.bottom(), face_rect.left():face_rect.right()] 453 | fh, fw, _ = face_roi.shape 454 | target_color_roi = [int(fw * 0.2), int(fh * 0.4), int(fw * 0.8), int(fh * 0.6)] 455 | target_color_roi_lab = bgr_to_lab(face_roi[target_color_roi[1]:target_color_roi[3],target_color_roi[0]:target_color_roi[2]]) 456 | hand_lab = color_transfer(handface_lab, target_color_roi_lab, hand_mask) 457 | hand = lab_to_bgr(hand_lab, hand_mask) 458 | hand_bgra = cv2.merge((hand, hand_mask)) 459 | 460 | # ---------------------------------------------------------------------- 461 | # Hand overlay 462 | 463 | # image = draw_pose(face, reprojection, shape) 464 | resize_factor = math.sqrt(fw*fw + fh*fh) / math.sqrt(hw*hw + hh*hh) # ratio between the two face diagonals 465 | face_center = [face_rect.left() + fw * 0.5, face_rect.top() + fh * 0.5] 466 | labels = cv2.imread(os.path.join(import_folder_labels, label_names[t])) 467 | handOverlay(face, labels, hand_bgra, face_center, handface_center, resize_factor, "hands") 468 | 469 | # Uncomment the lines below to draw a rectangle where the target color is measured (mean and std) 470 | # lefttop = (face_rect.left() + target_color_roi[0], face_rect.top() + target_color_roi[1]) 471 | # rightbottom = (face_rect.left() + target_color_roi[2], face_rect.top() + target_color_roi[3]) 472 | # cv2.rectangle(face, lefttop, rightbottom, (255,255,255), 2) 473 | 474 | face_basename, extension = os.path.splitext(face_name) 475 | face_filename = face_basename + '-' + dataset_label + '-' + handface_basename + '.jpg' 476 | labels_filename = face_basename + '-' + dataset_label + '-' + handface_basename + '.png' 477 | cv2.imwrite(os.path.join(export_folder_faces, face_filename), face, [int(cv2.IMWRITE_JPEG_QUALITY), 95]) 478 | cv2.imwrite(os.path.join(export_folder_labels, labels_filename), labels) 479 | 480 | hints[i] += 1 481 | 482 | print('Total facehands: %d' % len(handface_pathnames)) 483 | print('Unrecognized facehands: %d' % unrecognized) 484 | print('Awkward facehands: %d' % awkward) 485 | print('Total matches: %d' % np.sum(hints)) 486 | print('Usages:') 487 | print(hints) 488 | print('\nDone!') 489 | 490 | if __name__ == '__main__': 491 | main() --------------------------------------------------------------------------------