├── LICENSE ├── README.md ├── demo ├── demo.gif ├── dinner.mp4 └── report.pdf ├── emotions.py ├── models ├── emotion_model.hdf5 └── haarcascade_frontalface_default.xml └── utils ├── __init__.py ├── __init__.pyc ├── __pycache__ ├── __init__.cpython-36.pyc ├── datasets.cpython-36.pyc ├── inference.cpython-36.pyc └── preprocessor.cpython-36.pyc ├── data_augmentation.py ├── datasets.py ├── datasets.pyc ├── grad_cam.py ├── inference.py ├── inference.pyc ├── preprocessor.py ├── preprocessor.pyc └── visualizer.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Peter Cunha 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Emotion 2 | This software recognizes human faces and their corresponding emotions from a video or webcam feed. Powered by OpenCV and Deep Learning. 3 | 4 | ![Demo](https://github.com/petercunha/Emotion/blob/master/demo/demo.gif?raw=true) 5 | 6 | 7 | ## Installation 8 | 9 | Clone the repository: 10 | ``` 11 | git clone https://github.com/petercunha/Emotion.git 12 | cd Emotion/ 13 | ``` 14 | 15 | Install these dependencies with `pip3 install ` 16 | - tensorflow 17 | - numpy 18 | - scipy 19 | - opencv-python 20 | - pillow 21 | - pandas 22 | - matplotlib 23 | - h5py 24 | - keras 25 | 26 | Once the dependencies are installed, you can run the project. 27 | `python3 emotions.py` 28 | 29 | 30 | ## To train new models for emotion classification 31 | 32 | - Download the fer2013.tar.gz file from [here](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data) 33 | - Move the downloaded file to the datasets directory inside this repository. 34 | - Untar the file: 35 | `tar -xzf fer2013.tar` 36 | - Download train_emotion_classifier.py from orriaga's repo [here](https://github.com/oarriaga/face_classification/blob/master/src/train_emotion_classifier.py) 37 | - Run the train_emotion_classification.py file: 38 | `python3 train_emotion_classifier.py` 39 | 40 | 41 | ## Deep Learning Model 42 | 43 | The model used is from this [research paper](https://github.com/oarriaga/face_classification/blob/master/report.pdf) written by Octavio Arriaga, Paul G. Plöger, and Matias Valdenegro. 44 | 45 | ![Model](https://i.imgur.com/vr9yDaF.png?1) 46 | 47 | 48 | ## Credit 49 | 50 | * Computer vision powered by OpenCV. 51 | * Neural network scaffolding powered by Keras with Tensorflow. 52 | * Convolutional Neural Network (CNN) deep learning architecture is from this [research paper](https://github.com/oarriaga/face_classification/blob/master/report.pdf). 53 | * Pretrained Keras model and much of the OpenCV code provided by GitHub user [oarriaga](https://github.com/oarriaga). 54 | -------------------------------------------------------------------------------- /demo/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/demo/demo.gif -------------------------------------------------------------------------------- /demo/dinner.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/demo/dinner.mp4 -------------------------------------------------------------------------------- /demo/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/demo/report.pdf -------------------------------------------------------------------------------- /emotions.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from keras.models import load_model 4 | from statistics import mode 5 | from utils.datasets import get_labels 6 | from utils.inference import detect_faces 7 | from utils.inference import draw_text 8 | from utils.inference import draw_bounding_box 9 | from utils.inference import apply_offsets 10 | from utils.inference import load_detection_model 11 | from utils.preprocessor import preprocess_input 12 | 13 | USE_WEBCAM = True # If false, loads video file source 14 | 15 | # parameters for loading data and images 16 | emotion_model_path = './models/emotion_model.hdf5' 17 | emotion_labels = get_labels('fer2013') 18 | 19 | # hyper-parameters for bounding boxes shape 20 | frame_window = 10 21 | emotion_offsets = (20, 40) 22 | 23 | # loading models 24 | face_cascade = cv2.CascadeClassifier('./models/haarcascade_frontalface_default.xml') 25 | emotion_classifier = load_model(emotion_model_path) 26 | 27 | # getting input model shapes for inference 28 | emotion_target_size = emotion_classifier.input_shape[1:3] 29 | 30 | # starting lists for calculating modes 31 | emotion_window = [] 32 | 33 | # starting video streaming 34 | 35 | cv2.namedWindow('window_frame') 36 | video_capture = cv2.VideoCapture(0) 37 | 38 | # Select video or webcam feed 39 | cap = None 40 | if (USE_WEBCAM == True): 41 | cap = cv2.VideoCapture(0) # Webcam source 42 | else: 43 | cap = cv2.VideoCapture('./demo/dinner.mp4') # Video file source 44 | 45 | while cap.isOpened(): # True: 46 | ret, bgr_image = cap.read() 47 | 48 | #bgr_image = video_capture.read()[1] 49 | 50 | gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) 51 | rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) 52 | 53 | faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, 54 | minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE) 55 | 56 | for face_coordinates in faces: 57 | 58 | x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) 59 | gray_face = gray_image[y1:y2, x1:x2] 60 | try: 61 | gray_face = cv2.resize(gray_face, (emotion_target_size)) 62 | except: 63 | continue 64 | 65 | gray_face = preprocess_input(gray_face, True) 66 | gray_face = np.expand_dims(gray_face, 0) 67 | gray_face = np.expand_dims(gray_face, -1) 68 | emotion_prediction = emotion_classifier.predict(gray_face) 69 | emotion_probability = np.max(emotion_prediction) 70 | emotion_label_arg = np.argmax(emotion_prediction) 71 | emotion_text = emotion_labels[emotion_label_arg] 72 | emotion_window.append(emotion_text) 73 | 74 | if len(emotion_window) > frame_window: 75 | emotion_window.pop(0) 76 | try: 77 | emotion_mode = mode(emotion_window) 78 | except: 79 | continue 80 | 81 | if emotion_text == 'angry': 82 | color = emotion_probability * np.asarray((255, 0, 0)) 83 | elif emotion_text == 'sad': 84 | color = emotion_probability * np.asarray((0, 0, 255)) 85 | elif emotion_text == 'happy': 86 | color = emotion_probability * np.asarray((255, 255, 0)) 87 | elif emotion_text == 'surprise': 88 | color = emotion_probability * np.asarray((0, 255, 255)) 89 | else: 90 | color = emotion_probability * np.asarray((0, 255, 0)) 91 | 92 | color = color.astype(int) 93 | color = color.tolist() 94 | 95 | draw_bounding_box(face_coordinates, rgb_image, color) 96 | draw_text(face_coordinates, rgb_image, emotion_mode, 97 | color, 0, -45, 1, 1) 98 | 99 | bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) 100 | cv2.imshow('window_frame', bgr_image) 101 | if cv2.waitKey(1) & 0xFF == ord('q'): 102 | break 103 | 104 | cap.release() 105 | cv2.destroyAllWindows() 106 | -------------------------------------------------------------------------------- /models/emotion_model.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/models/emotion_model.hdf5 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/__init__.py -------------------------------------------------------------------------------- /utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/__init__.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/inference.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/__pycache__/inference.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/preprocessor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/__pycache__/preprocessor.cpython-36.pyc -------------------------------------------------------------------------------- /utils/data_augmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import shuffle 3 | from .preprocessor import preprocess_input 4 | from .preprocessor import _imread as imread 5 | from .preprocessor import _imresize as imresize 6 | from .preprocessor import to_categorical 7 | import scipy.ndimage as ndi 8 | import cv2 9 | 10 | class ImageGenerator(object): 11 | """ Image generator with saturation, brightness, lighting, contrast, 12 | horizontal flip and vertical flip transformations. It supports 13 | bounding boxes coordinates. 14 | 15 | TODO: 16 | - Finish support for not using bounding_boxes 17 | - Random crop 18 | - Test other transformations 19 | """ 20 | def __init__(self, ground_truth_data, batch_size, image_size, 21 | train_keys, validation_keys, 22 | ground_truth_transformer=None, 23 | path_prefix=None, 24 | saturation_var=0.5, 25 | brightness_var=0.5, 26 | contrast_var=0.5, 27 | lighting_std=0.5, 28 | horizontal_flip_probability=0.5, 29 | vertical_flip_probability=0.5, 30 | do_random_crop=False, 31 | grayscale=False, 32 | zoom_range=[0.75, 1.25], 33 | translation_factor=.3): 34 | 35 | self.ground_truth_data = ground_truth_data 36 | self.ground_truth_transformer = ground_truth_transformer 37 | self.batch_size = batch_size 38 | self.path_prefix = path_prefix 39 | self.train_keys = train_keys 40 | self.validation_keys = validation_keys 41 | self.image_size = image_size 42 | self.grayscale = grayscale 43 | self.color_jitter = [] 44 | if saturation_var: 45 | self.saturation_var = saturation_var 46 | self.color_jitter.append(self.saturation) 47 | if brightness_var: 48 | self.brightness_var = brightness_var 49 | self.color_jitter.append(self.brightness) 50 | if contrast_var: 51 | self.contrast_var = contrast_var 52 | self.color_jitter.append(self.contrast) 53 | self.lighting_std = lighting_std 54 | self.horizontal_flip_probability = horizontal_flip_probability 55 | self.vertical_flip_probability = vertical_flip_probability 56 | self.do_random_crop = do_random_crop 57 | self.zoom_range = zoom_range 58 | self.translation_factor = translation_factor 59 | 60 | def _do_random_crop(self, image_array): 61 | """IMPORTANT: random crop only works for classification since the 62 | current implementation does no transform bounding boxes""" 63 | height = image_array.shape[0] 64 | width = image_array.shape[1] 65 | x_offset = np.random.uniform(0, self.translation_factor * width) 66 | y_offset = np.random.uniform(0, self.translation_factor * height) 67 | offset = np.array([x_offset, y_offset]) 68 | scale_factor = np.random.uniform(self.zoom_range[0], 69 | self.zoom_range[1]) 70 | crop_matrix = np.array([[scale_factor, 0], 71 | [0, scale_factor]]) 72 | 73 | image_array = np.rollaxis(image_array, axis=-1, start=0) 74 | image_channel = [ndi.interpolation.affine_transform(image_channel, 75 | crop_matrix, offset=offset, order=0, mode='nearest', 76 | cval=0.0) for image_channel in image_array] 77 | 78 | image_array = np.stack(image_channel, axis=0) 79 | image_array = np.rollaxis(image_array, 0, 3) 80 | return image_array 81 | 82 | def do_random_rotation(self, image_array): 83 | """IMPORTANT: random rotation only works for classification since the 84 | current implementation does no transform bounding boxes""" 85 | height = image_array.shape[0] 86 | width = image_array.shape[1] 87 | x_offset = np.random.uniform(0, self.translation_factor * width) 88 | y_offset = np.random.uniform(0, self.translation_factor * height) 89 | offset = np.array([x_offset, y_offset]) 90 | scale_factor = np.random.uniform(self.zoom_range[0], 91 | self.zoom_range[1]) 92 | crop_matrix = np.array([[scale_factor, 0], 93 | [0, scale_factor]]) 94 | 95 | image_array = np.rollaxis(image_array, axis=-1, start=0) 96 | image_channel = [ndi.interpolation.affine_transform(image_channel, 97 | crop_matrix, offset=offset, order=0, mode='nearest', 98 | cval=0.0) for image_channel in image_array] 99 | 100 | image_array = np.stack(image_channel, axis=0) 101 | image_array = np.rollaxis(image_array, 0, 3) 102 | return image_array 103 | 104 | def _gray_scale(self, image_array): 105 | return image_array.dot([0.299, 0.587, 0.114]) 106 | 107 | def saturation(self, image_array): 108 | gray_scale = self._gray_scale(image_array) 109 | alpha = 2.0 * np.random.random() * self.brightness_var 110 | alpha = alpha + 1 - self.saturation_var 111 | image_array = alpha * image_array + (1 - alpha) * gray_scale[:, :, None] 112 | return np.clip(image_array, 0, 255) 113 | 114 | def brightness(self, image_array): 115 | alpha = 2 * np.random.random() * self.brightness_var 116 | alpha = alpha + 1 - self.saturation_var 117 | image_array = alpha * image_array 118 | return np.clip(image_array, 0, 255) 119 | 120 | def contrast(self, image_array): 121 | gray_scale = (self._gray_scale(image_array).mean() * 122 | np.ones_like(image_array)) 123 | alpha = 2 * np.random.random() * self.contrast_var 124 | alpha = alpha + 1 - self.contrast_var 125 | image_array = image_array * alpha + (1 - alpha) * gray_scale 126 | return np.clip(image_array, 0, 255) 127 | 128 | def lighting(self, image_array): 129 | covariance_matrix = np.cov(image_array.reshape(-1,3) / 130 | 255.0, rowvar=False) 131 | eigen_values, eigen_vectors = np.linalg.eigh(covariance_matrix) 132 | noise = np.random.randn(3) * self.lighting_std 133 | noise = eigen_vectors.dot(eigen_values * noise) * 255 134 | image_array = image_array + noise 135 | return np.clip(image_array, 0 ,255) 136 | 137 | def horizontal_flip(self, image_array, box_corners=None): 138 | if np.random.random() < self.horizontal_flip_probability: 139 | image_array = image_array[:, ::-1] 140 | if box_corners != None: 141 | box_corners[:, [0, 2]] = 1 - box_corners[:, [2, 0]] 142 | return image_array, box_corners 143 | 144 | def vertical_flip(self, image_array, box_corners=None): 145 | if (np.random.random() < self.vertical_flip_probability): 146 | image_array = image_array[::-1] 147 | if box_corners != None: 148 | box_corners[:, [1, 3]] = 1 - box_corners[:, [3, 1]] 149 | return image_array, box_corners 150 | 151 | def transform(self, image_array, box_corners=None): 152 | shuffle(self.color_jitter) 153 | for jitter in self.color_jitter: 154 | image_array = jitter(image_array) 155 | 156 | if self.lighting_std: 157 | image_array = self.lighting(image_array) 158 | 159 | if self.horizontal_flip_probability > 0: 160 | image_array, box_corners = self.horizontal_flip(image_array, 161 | box_corners) 162 | 163 | if self.vertical_flip_probability > 0: 164 | image_array, box_corners = self.vertical_flip(image_array, 165 | box_corners) 166 | return image_array, box_corners 167 | 168 | def preprocess_images(self, image_array): 169 | return preprocess_input(image_array) 170 | 171 | def flow(self, mode='train'): 172 | while True: 173 | if mode =='train': 174 | shuffle(self.train_keys) 175 | keys = self.train_keys 176 | elif mode == 'val' or mode == 'demo': 177 | shuffle(self.validation_keys) 178 | keys = self.validation_keys 179 | else: 180 | raise Exception('invalid mode: %s' % mode) 181 | 182 | inputs = [] 183 | targets = [] 184 | for key in keys: 185 | image_path = self.path_prefix + key 186 | image_array = imread(image_path) 187 | image_array = imresize(image_array, self.image_size) 188 | 189 | num_image_channels = len(image_array.shape) 190 | if num_image_channels != 3: 191 | continue 192 | 193 | ground_truth = self.ground_truth_data[key] 194 | 195 | if self.do_random_crop: 196 | image_array = self._do_random_crop(image_array) 197 | 198 | image_array = image_array.astype('float32') 199 | if mode == 'train' or mode == 'demo': 200 | if self.ground_truth_transformer != None: 201 | image_array, ground_truth = self.transform( 202 | image_array, 203 | ground_truth) 204 | ground_truth = ( 205 | self.ground_truth_transformer.assign_boxes( 206 | ground_truth)) 207 | else: 208 | image_array = self.transform(image_array)[0] 209 | 210 | if self.grayscale: 211 | image_array = cv2.cvtColor(image_array.astype('uint8'), 212 | cv2.COLOR_RGB2GRAY).astype('float32') 213 | image_array = np.expand_dims(image_array, -1) 214 | 215 | inputs.append(image_array) 216 | targets.append(ground_truth) 217 | if len(targets) == self.batch_size: 218 | inputs = np.asarray(inputs) 219 | targets = np.asarray(targets) 220 | # this will not work for boxes 221 | targets = to_categorical(targets) 222 | if mode == 'train' or mode == 'val': 223 | inputs = self.preprocess_images(inputs) 224 | yield self._wrap_in_dictionary(inputs, targets) 225 | if mode == 'demo': 226 | yield self._wrap_in_dictionary(inputs, targets) 227 | inputs = [] 228 | targets = [] 229 | 230 | def _wrap_in_dictionary(self, image_array, targets): 231 | return [{'input_1':image_array}, 232 | {'predictions':targets}] 233 | -------------------------------------------------------------------------------- /utils/datasets.py: -------------------------------------------------------------------------------- 1 | from scipy.io import loadmat 2 | import pandas as pd 3 | import numpy as np 4 | from random import shuffle 5 | import os 6 | import cv2 7 | 8 | class DataManager(object): 9 | """Class for loading fer2013 emotion classification dataset or 10 | imdb gender classification dataset.""" 11 | def __init__(self, dataset_name='imdb', dataset_path=None, image_size=(48, 48)): 12 | 13 | self.dataset_name = dataset_name 14 | self.dataset_path = dataset_path 15 | self.image_size = image_size 16 | if self.dataset_path != None: 17 | self.dataset_path = dataset_path 18 | elif self.dataset_name == 'imdb': 19 | self.dataset_path = '../datasets/imdb_crop/imdb.mat' 20 | elif self.dataset_name == 'fer2013': 21 | self.dataset_path = '../datasets/fer2013/fer2013.csv' 22 | elif self.dataset_name == 'KDEF': 23 | self.dataset_path = '../datasets/KDEF/' 24 | else: 25 | raise Exception('Incorrect dataset name, please input imdb or fer2013') 26 | 27 | def get_data(self): 28 | if self.dataset_name == 'imdb': 29 | ground_truth_data = self._load_imdb() 30 | elif self.dataset_name == 'fer2013': 31 | ground_truth_data = self._load_fer2013() 32 | elif self.dataset_name == 'KDEF': 33 | ground_truth_data = self._load_KDEF() 34 | return ground_truth_data 35 | 36 | def _load_imdb(self): 37 | face_score_treshold = 3 38 | dataset = loadmat(self.dataset_path) 39 | image_names_array = dataset['imdb']['full_path'][0, 0][0] 40 | gender_classes = dataset['imdb']['gender'][0, 0][0] 41 | face_score = dataset['imdb']['face_score'][0, 0][0] 42 | second_face_score = dataset['imdb']['second_face_score'][0, 0][0] 43 | face_score_mask = face_score > face_score_treshold 44 | second_face_score_mask = np.isnan(second_face_score) 45 | unknown_gender_mask = np.logical_not(np.isnan(gender_classes)) 46 | mask = np.logical_and(face_score_mask, second_face_score_mask) 47 | mask = np.logical_and(mask, unknown_gender_mask) 48 | image_names_array = image_names_array[mask] 49 | gender_classes = gender_classes[mask].tolist() 50 | image_names = [] 51 | for image_name_arg in range(image_names_array.shape[0]): 52 | image_name = image_names_array[image_name_arg][0] 53 | image_names.append(image_name) 54 | return dict(zip(image_names, gender_classes)) 55 | 56 | def _load_fer2013(self): 57 | data = pd.read_csv(self.dataset_path) 58 | pixels = data['pixels'].tolist() 59 | width, height = 48, 48 60 | faces = [] 61 | for pixel_sequence in pixels: 62 | face = [int(pixel) for pixel in pixel_sequence.split(' ')] 63 | face = np.asarray(face).reshape(width, height) 64 | face = cv2.resize(face.astype('uint8'), self.image_size) 65 | faces.append(face.astype('float32')) 66 | faces = np.asarray(faces) 67 | faces = np.expand_dims(faces, -1) 68 | emotions = pd.get_dummies(data['emotion']).as_matrix() 69 | return faces, emotions 70 | 71 | def _load_KDEF(self): 72 | class_to_arg = get_class_to_arg(self.dataset_name) 73 | num_classes = len(class_to_arg) 74 | 75 | file_paths = [] 76 | for folder, subfolders, filenames in os.walk(self.dataset_path): 77 | for filename in filenames: 78 | if filename.lower().endswith(('.jpg')): 79 | file_paths.append(os.path.join(folder, filename)) 80 | 81 | num_faces = len(file_paths) 82 | y_size, x_size = self.image_size 83 | faces = np.zeros(shape=(num_faces, y_size, x_size)) 84 | emotions = np.zeros(shape=(num_faces, num_classes)) 85 | for file_arg, file_path in enumerate(file_paths): 86 | image_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) 87 | image_array = cv2.resize(image_array, (y_size, x_size)) 88 | faces[file_arg] = image_array 89 | file_basename = os.path.basename(file_path) 90 | file_emotion = file_basename[4:6] 91 | # there are two file names in the dataset that don't match the given classes 92 | try: 93 | emotion_arg = class_to_arg[file_emotion] 94 | except: 95 | continue 96 | emotions[file_arg, emotion_arg] = 1 97 | faces = np.expand_dims(faces, -1) 98 | return faces, emotions 99 | 100 | def get_labels(dataset_name): 101 | if dataset_name == 'fer2013': 102 | return {0:'angry',1:'disgust',2:'fear',3:'happy', 103 | 4:'sad',5:'surprise',6:'neutral'} 104 | elif dataset_name == 'imdb': 105 | return {0:'woman', 1:'man'} 106 | elif dataset_name == 'KDEF': 107 | return {0:'AN', 1:'DI', 2:'AF', 3:'HA', 4:'SA', 5:'SU', 6:'NE'} 108 | else: 109 | raise Exception('Invalid dataset name') 110 | 111 | def get_class_to_arg(dataset_name='fer2013'): 112 | if dataset_name == 'fer2013': 113 | return {'angry':0, 'disgust':1, 'fear':2, 'happy':3, 'sad':4, 114 | 'surprise':5, 'neutral':6} 115 | elif dataset_name == 'imdb': 116 | return {'woman':0, 'man':1} 117 | elif dataset_name == 'KDEF': 118 | return {'AN':0, 'DI':1, 'AF':2, 'HA':3, 'SA':4, 'SU':5, 'NE':6} 119 | else: 120 | raise Exception('Invalid dataset name') 121 | 122 | def split_imdb_data(ground_truth_data, validation_split=.2, do_shuffle=False): 123 | ground_truth_keys = sorted(ground_truth_data.keys()) 124 | if do_shuffle == True: 125 | shuffle(ground_truth_keys) 126 | training_split = 1 - validation_split 127 | num_train = int(training_split * len(ground_truth_keys)) 128 | train_keys = ground_truth_keys[:num_train] 129 | validation_keys = ground_truth_keys[num_train:] 130 | return train_keys, validation_keys 131 | 132 | def split_data(x, y, validation_split=.2): 133 | num_samples = len(x) 134 | num_train_samples = int((1 - validation_split)*num_samples) 135 | train_x = x[:num_train_samples] 136 | train_y = y[:num_train_samples] 137 | val_x = x[num_train_samples:] 138 | val_y = y[num_train_samples:] 139 | train_data = (train_x, train_y) 140 | val_data = (val_x, val_y) 141 | return train_data, val_data 142 | 143 | -------------------------------------------------------------------------------- /utils/datasets.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/datasets.pyc -------------------------------------------------------------------------------- /utils/grad_cam.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import h5py 3 | import keras 4 | import keras.backend as K 5 | from keras.layers.core import Lambda 6 | from keras.models import Sequential 7 | from keras.models import load_model 8 | import numpy as np 9 | import tensorflow as tf 10 | from tensorflow.python.framework import ops 11 | 12 | from .preprocessor import preprocess_input 13 | 14 | 15 | def reset_optimizer_weights(model_filename): 16 | model = h5py.File(model_filename, 'r+') 17 | del model['optimizer_weights'] 18 | model.close() 19 | 20 | 21 | def target_category_loss(x, category_index, num_classes): 22 | return tf.multiply(x, K.one_hot([category_index], num_classes)) 23 | 24 | 25 | def target_category_loss_output_shape(input_shape): 26 | return input_shape 27 | 28 | 29 | def normalize(x): 30 | # utility function to normalize a tensor by its L2 norm 31 | return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) 32 | 33 | 34 | def load_image(image_array): 35 | image_array = np.expand_dims(image_array, axis=0) 36 | image_array = preprocess_input(image_array) 37 | return image_array 38 | 39 | 40 | def register_gradient(): 41 | if "GuidedBackProp" not in ops._gradient_registry._registry: 42 | @ops.RegisterGradient("GuidedBackProp") 43 | def _GuidedBackProp(op, gradient): 44 | dtype = op.inputs[0].dtype 45 | guided_gradient = (gradient * tf.cast(gradient > 0., dtype) * 46 | tf.cast(op.inputs[0] > 0., dtype)) 47 | return guided_gradient 48 | 49 | 50 | def compile_saliency_function(model, activation_layer='conv2d_7'): 51 | input_image = model.input 52 | layer_output = model.get_layer(activation_layer).output 53 | max_output = K.max(layer_output, axis=3) 54 | saliency = K.gradients(K.sum(max_output), input_image)[0] 55 | return K.function([input_image, K.learning_phase()], [saliency]) 56 | 57 | 58 | def modify_backprop(model, name, task): 59 | graph = tf.get_default_graph() 60 | with graph.gradient_override_map({'Relu': name}): 61 | 62 | # get layers that have an activation 63 | activation_layers = [layer for layer in model.layers 64 | if hasattr(layer, 'activation')] 65 | 66 | # replace relu activation 67 | for layer in activation_layers: 68 | if layer.activation == keras.activations.relu: 69 | layer.activation = tf.nn.relu 70 | 71 | # re-instanciate a new model 72 | if task == 'gender': 73 | model_path = '../trained_models/gender_models/gender_mini_XCEPTION.21-0.95.hdf5' 74 | elif task == 'emotion': 75 | model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' 76 | # model_path = '../trained_models/fer2013_mini_XCEPTION.119-0.65.hdf5' 77 | # model_path = '../trained_models/fer2013_big_XCEPTION.54-0.66.hdf5' 78 | new_model = load_model(model_path, compile=False) 79 | return new_model 80 | 81 | 82 | def deprocess_image(x): 83 | """ Same normalization as in: 84 | https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py 85 | """ 86 | if np.ndim(x) > 3: 87 | x = np.squeeze(x) 88 | # normalize tensor: center on 0., ensure std is 0.1 89 | x = x - x.mean() 90 | x = x / (x.std() + 1e-5) 91 | x = x * 0.1 92 | 93 | # clip to [0, 1] 94 | x = x + 0.5 95 | x = np.clip(x, 0, 1) 96 | 97 | # convert to RGB array 98 | x = x * 255 99 | if K.image_dim_ordering() == 'th': 100 | x = x.transpose((1, 2, 0)) 101 | x = np.clip(x, 0, 255).astype('uint8') 102 | return x 103 | 104 | def compile_gradient_function(input_model, category_index, layer_name): 105 | model = Sequential() 106 | model.add(input_model) 107 | 108 | num_classes = model.output_shape[1] 109 | target_layer = lambda x: target_category_loss(x, category_index, num_classes) 110 | model.add(Lambda(target_layer, 111 | output_shape = target_category_loss_output_shape)) 112 | 113 | loss = K.sum(model.layers[-1].output) 114 | conv_output = model.layers[0].get_layer(layer_name).output 115 | gradients = normalize(K.gradients(loss, conv_output)[0]) 116 | gradient_function = K.function([model.layers[0].input, K.learning_phase()], 117 | [conv_output, gradients]) 118 | return gradient_function 119 | 120 | def calculate_gradient_weighted_CAM(gradient_function, image): 121 | output, evaluated_gradients = gradient_function([image, False]) 122 | output, evaluated_gradients = output[0, :], evaluated_gradients[0, :, :, :] 123 | weights = np.mean(evaluated_gradients, axis = (0, 1)) 124 | CAM = np.ones(output.shape[0 : 2], dtype=np.float32) 125 | for weight_arg, weight in enumerate(weights): 126 | CAM = CAM + (weight * output[:, :, weight_arg]) 127 | CAM = cv2.resize(CAM, (64, 64)) 128 | CAM = np.maximum(CAM, 0) 129 | heatmap = CAM / np.max(CAM) 130 | 131 | #Return to BGR [0..255] from the preprocessed image 132 | image = image[0, :] 133 | image = image - np.min(image) 134 | image = np.minimum(image, 255) 135 | 136 | CAM = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET) 137 | CAM = np.float32(CAM) + np.float32(image) 138 | CAM = 255 * CAM / np.max(CAM) 139 | return np.uint8(CAM), heatmap 140 | 141 | def calculate_guided_gradient_CAM(preprocessed_input, gradient_function, saliency_function): 142 | CAM, heatmap = calculate_gradient_weighted_CAM(gradient_function, preprocessed_input) 143 | saliency = saliency_function([preprocessed_input, 0]) 144 | gradCAM = saliency[0] * heatmap[..., np.newaxis] 145 | #return deprocess_image(gradCAM) 146 | return deprocess_image(saliency[0]) 147 | #return saliency[0] 148 | 149 | def calculate_guided_gradient_CAM_v2(preprocessed_input, gradient_function, 150 | saliency_function, target_size=(128, 128)): 151 | CAM, heatmap = calculate_gradient_weighted_CAM(gradient_function, preprocessed_input) 152 | heatmap = np.squeeze(heatmap) 153 | heatmap = cv2.resize(heatmap.astype('uint8'), target_size) 154 | saliency = saliency_function([preprocessed_input, 0]) 155 | saliency = np.squeeze(saliency[0]) 156 | saliency = cv2.resize(saliency.astype('uint8'), target_size) 157 | gradCAM = saliency * heatmap 158 | gradCAM = deprocess_image(gradCAM) 159 | return np.expand_dims(gradCAM, -1) 160 | 161 | 162 | if __name__ == '__main__': 163 | import pickle 164 | faces = pickle.load(open('faces.pkl','rb')) 165 | face = faces[0] 166 | model_filename = '../../trained_models/emotion_models/mini_XCEPTION.523-0.65.hdf5' 167 | #reset_optimizer_weights(model_filename) 168 | model = load_model(model_filename) 169 | 170 | preprocessed_input = load_image(face) 171 | predictions = model.predict(preprocessed_input) 172 | predicted_class = np.argmax(predictions) 173 | gradient_function = compile_gradient_function(model, predicted_class, 'conv2d_6') 174 | register_gradient() 175 | guided_model = modify_backprop(model, 'GuidedBackProp') 176 | saliency_function = compile_saliency_function(guided_model) 177 | guided_gradCAM = calculate_guided_gradient_CAM(preprocessed_input, 178 | gradient_function, saliency_function) 179 | 180 | cv2.imwrite('guided_gradCAM.jpg', guided_gradCAM) 181 | 182 | 183 | -------------------------------------------------------------------------------- /utils/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from keras.preprocessing import image 5 | 6 | def load_image(image_path, grayscale=False, target_size=None): 7 | pil_image = image.load_img(image_path, grayscale, target_size) 8 | return image.img_to_array(pil_image) 9 | 10 | def load_detection_model(model_path): 11 | detection_model = cv2.CascadeClassifier(model_path) 12 | return detection_model 13 | 14 | def detect_faces(detection_model, gray_image_array): 15 | return detection_model.detectMultiScale(gray_image_array, 1.3, 5) 16 | 17 | def draw_bounding_box(face_coordinates, image_array, color): 18 | x, y, w, h = face_coordinates 19 | cv2.rectangle(image_array, (x, y), (x + w, y + h), color, 2) 20 | 21 | def apply_offsets(face_coordinates, offsets): 22 | x, y, width, height = face_coordinates 23 | x_off, y_off = offsets 24 | return (x - x_off, x + width + x_off, y - y_off, y + height + y_off) 25 | 26 | def draw_text(coordinates, image_array, text, color, x_offset=0, y_offset=0, 27 | font_scale=2, thickness=2): 28 | x, y = coordinates[:2] 29 | cv2.putText(image_array, text, (x + x_offset, y + y_offset), 30 | cv2.FONT_HERSHEY_SIMPLEX, 31 | font_scale, color, thickness, cv2.LINE_AA) 32 | 33 | def get_colors(num_classes): 34 | colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist() 35 | colors = np.asarray(colors) * 255 36 | return colors 37 | 38 | -------------------------------------------------------------------------------- /utils/inference.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/inference.pyc -------------------------------------------------------------------------------- /utils/preprocessor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.misc import imread, imresize 3 | 4 | 5 | def preprocess_input(x, v2=True): 6 | x = x.astype('float32') 7 | x = x / 255.0 8 | if v2: 9 | x = x - 0.5 10 | x = x * 2.0 11 | return x 12 | 13 | def _imread(image_name): 14 | return imread(image_name) 15 | 16 | def _imresize(image_array, size): 17 | return imresize(image_array, size) 18 | 19 | def to_categorical(integer_classes, num_classes=2): 20 | integer_classes = np.asarray(integer_classes, dtype='int') 21 | num_samples = integer_classes.shape[0] 22 | categorical = np.zeros((num_samples, num_classes)) 23 | categorical[np.arange(num_samples), integer_classes] = 1 24 | return categorical 25 | 26 | -------------------------------------------------------------------------------- /utils/preprocessor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petercunha/Emotion/ca64ddc6feade328f36a0cb606b7aaf5b479345d/utils/preprocessor.pyc -------------------------------------------------------------------------------- /utils/visualizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.cm as cm 3 | from mpl_toolkits.axes_grid1 import make_axes_locatable 4 | import matplotlib.pyplot as plt 5 | import numpy.ma as ma 6 | import cv2 7 | from .inference import draw_text 8 | 9 | def make_mosaic(images, num_rows, num_cols, border=1, class_names=None): 10 | num_images = len(images) 11 | image_shape = images.shape[1:] 12 | mosaic = ma.masked_all((num_rows * image_shape[0] + (num_rows - 1) * border, 13 | num_cols * image_shape[1] + (num_cols - 1) * border), 14 | dtype=np.float32) 15 | paddedh = image_shape[0] + border 16 | paddedw = image_shape[1] + border 17 | for image_arg in range(num_images): 18 | row = int(np.floor(image_arg / num_cols)) 19 | col = image_arg % num_cols 20 | image = np.squeeze(images[image_arg]) 21 | image_shape = image.shape 22 | mosaic[row * paddedh:row * paddedh + image_shape[0], 23 | col * paddedw:col * paddedw + image_shape[1]] = image 24 | return mosaic 25 | 26 | def make_mosaic_v2(images, num_mosaic_rows=None, 27 | num_mosaic_cols=None, border=1): 28 | images = np.squeeze(images) 29 | num_images, image_pixels_rows, image_pixels_cols = images.shape 30 | if num_mosaic_rows is None and num_mosaic_cols is None: 31 | box_size = int(np.ceil(np.sqrt(num_images))) 32 | num_mosaic_rows = num_mosaic_cols = box_size 33 | num_mosaic_pixel_rows = num_mosaic_rows * (image_pixels_rows + border) 34 | num_mosaic_pixel_cols = num_mosaic_cols * (image_pixels_cols + border) 35 | mosaic = np.empty(shape=(num_mosaic_pixel_rows, num_mosaic_pixel_cols)) 36 | mosaic_col_arg = 0 37 | mosaic_row_arg = 0 38 | for image_arg in range(num_images): 39 | if image_arg % num_mosaic_cols == 0 and image_arg != 0: 40 | mosaic_col_arg = mosaic_col_arg + 1 41 | mosaic_row_arg = 0 42 | x0 = image_pixels_cols * (mosaic_row_arg) 43 | x1 = image_pixels_cols * (mosaic_row_arg + 1) 44 | y0 = image_pixels_rows * (mosaic_col_arg) 45 | y1 = image_pixels_rows * (mosaic_col_arg + 1) 46 | image = images[image_arg] 47 | mosaic[y0:y1, x0:x1] = image 48 | mosaic_row_arg = mosaic_row_arg + 1 49 | return mosaic 50 | 51 | def pretty_imshow(axis, data, vmin=None, vmax=None, cmap=None): 52 | if cmap is None: 53 | cmap = cm.jet 54 | if vmin is None: 55 | vmin = data.min() 56 | if vmax is None: 57 | vmax = data.max() 58 | cax = None 59 | divider = make_axes_locatable(axis) 60 | cax = divider.append_axes('right', size='5%', pad=0.05) 61 | image = axis.imshow(data, vmin=vmin, vmax=vmax, 62 | interpolation='nearest', cmap=cmap) 63 | plt.colorbar(image, cax=cax) 64 | 65 | def normal_imshow(axis, data, vmin=None, vmax=None, 66 | cmap=None, axis_off=True): 67 | if cmap is None: 68 | cmap = cm.jet 69 | if vmin is None: 70 | vmin = data.min() 71 | if vmax is None: 72 | vmax = data.max() 73 | image = axis.imshow(data, vmin=vmin, vmax=vmax, 74 | interpolation='nearest', cmap=cmap) 75 | if axis_off: 76 | plt.axis('off') 77 | return image 78 | 79 | def display_image(face, class_vector=None, 80 | class_decoder=None, pretty=False): 81 | if class_vector is not None and class_decoder is None: 82 | raise Exception('Provide class decoder') 83 | face = np.squeeze(face) 84 | color_map = None 85 | if len(face.shape) < 3: 86 | color_map = 'gray' 87 | plt.figure() 88 | if class_vector is not None: 89 | class_arg = np.argmax(class_vector) 90 | class_name = class_decoder[class_arg] 91 | plt.title(class_name) 92 | if pretty: 93 | pretty_imshow(plt.gca(), face, cmap=color_map) 94 | else: 95 | plt.imshow(face, color_map) 96 | 97 | def draw_mosaic(data, num_rows, num_cols, class_vectors=None, 98 | class_decoder=None, cmap='gray'): 99 | 100 | if class_vectors is not None and class_decoder is None: 101 | raise Exception('Provide class decoder') 102 | 103 | figure, axis_array = plt.subplots(num_rows, num_cols) 104 | figure.set_size_inches(8, 8, forward=True) 105 | titles = [] 106 | if class_vectors is not None: 107 | for vector_arg in range(len(class_vectors)): 108 | class_arg = np.argmax(class_vectors[vector_arg]) 109 | class_name = class_decoder[class_arg] 110 | titles.append(class_name) 111 | 112 | image_arg = 0 113 | for row_arg in range(num_rows): 114 | for col_arg in range(num_cols): 115 | image = data[image_arg] 116 | image = np.squeeze(image) 117 | axis_array[row_arg, col_arg].axis('off') 118 | axis_array[row_arg, col_arg].imshow(image, cmap=cmap) 119 | axis_array[row_arg, col_arg].set_title(titles[image_arg]) 120 | image_arg = image_arg + 1 121 | plt.tight_layout() 122 | 123 | if __name__ == '__main__': 124 | #from utils.data_manager import DataManager 125 | from utils.utils import get_labels 126 | from keras.models import load_model 127 | import pickle 128 | 129 | #dataset_name = 'fer2013' 130 | #model_path = '../trained_models/emotion_models/simple_CNN.985-0.66.hdf5' 131 | dataset_name = 'fer2013' 132 | class_decoder = get_labels(dataset_name) 133 | #data_manager = DataManager(dataset_name) 134 | #faces, emotions = data_manager.get_data() 135 | faces = pickle.load(open('faces.pkl', 'rb')) 136 | emotions = pickle.load(open('emotions.pkl', 'rb')) 137 | pretty_imshow(plt.gca(), make_mosaic(faces[:4], 2, 2), cmap='gray') 138 | plt.show() 139 | 140 | """ 141 | image_arg = 0 142 | face = faces[image_arg:image_arg + 1] 143 | emotion = emotions[image_arg:image_arg + 1] 144 | display_image(face, emotion, class_decoder) 145 | plt.show() 146 | 147 | normal_imshow(plt.gca(), make_mosaic(faces[:4], 3, 3), cmap='gray') 148 | plt.show() 149 | 150 | draw_mosaic(faces, 2, 2, emotions, class_decoder) 151 | plt.show() 152 | 153 | """ 154 | model = load_model('../trained_models/emotion_models/simple_CNN.985-0.66.hdf5') 155 | conv1_weights = model.layers[2].get_weights() 156 | kernel_conv1_weights = conv1_weights[0] 157 | kernel_conv1_weights = np.squeeze(kernel_conv1_weights) 158 | kernel_conv1_weights = np.rollaxis(kernel_conv1_weights, 2, 0) 159 | kernel_conv1_weights = np.expand_dims(kernel_conv1_weights, -1) 160 | num_kernels = kernel_conv1_weights.shape[0] 161 | box_size = int(np.ceil(np.sqrt(num_kernels))) 162 | print('Box size:', box_size) 163 | 164 | print('Kernel shape', kernel_conv1_weights.shape) 165 | plt.figure(figsize=(15, 15)) 166 | plt.title('conv1 weights') 167 | pretty_imshow(plt.gca(), 168 | make_mosaic(kernel_conv1_weights, box_size, box_size), 169 | cmap=cm.binary) 170 | plt.show() 171 | --------------------------------------------------------------------------------