├── .gitignore ├── CKPlus_convert_to_Jaffe_format.py ├── README ├── caffe_functions.py ├── datasets └── Emojis │ ├── Angry.png │ ├── Disgust.png │ ├── Fear.png │ ├── Happy.png │ ├── Neutral.png │ ├── Sad.png │ └── Surprise.png ├── doc └── HappyNet.pdf ├── execute_0_create_file_list ├── execute_1_create_lmdb_database ├── execute_2_create_mean_image ├── execute_3_train_custom_model ├── execute_4_cleanup_training_data ├── gather_training_data.py ├── haarcascade_frontalface_default.xml ├── models └── Custom_Model │ ├── deploy.prototxt │ ├── loss_history.txt │ ├── solver.prototxt │ └── train.prototxt ├── opencv_functions.py ├── process_dataset.py ├── process_image.py └── utility_functions.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore files ending with ~ (temporary files created by text editors) 2 | *~ 3 | *.swp 4 | 5 | # Ignore files ending in .o or .a (object and archive files) 6 | *.[oa] 7 | 8 | # Ignore compiled python files 9 | *.pyc 10 | 11 | # Ignore all datasets, except emojis 12 | datasets/* 13 | !datasets/Emojis/ 14 | 15 | # Ignore all models except CustomModel 16 | models/* 17 | !models/Custom_Model/ 18 | 19 | # Ignore Caffe Models 20 | *.caffemodel 21 | 22 | # Ignore any spurious image, text, or document files 23 | # # Does not ignore .png so that emojis will be included 24 | *.jpg 25 | *.jpeg 26 | *.tiff 27 | *.doc 28 | *.docx 29 | 30 | 31 | -------------------------------------------------------------------------------- /CKPlus_convert_to_Jaffe_format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ############################################################################### 4 | # 5 | # This is a quick script to convert all of the files in the open-source 6 | # Cohn-Kanade Plus (CK+) emotions dataset to use the same naming convention 7 | # as the Japanese Female Facial Expressions (JAFFE) dataset 8 | # 9 | # This then allows the two datasets to be merged 10 | # 11 | # Note that CK+ includes transitional images (i.e. a face halfway between neutral 12 | # and strong emotion). Only neutral faces and strong emotions are included. No 13 | # transitional images are included. 14 | # 15 | # Date modified: March 2016 16 | # 17 | # Authors: Dan Duncan 18 | # Gautam Shine 19 | # 20 | ############################################################################### 21 | 22 | import os, shutil, sys, time, re, glob 23 | import numpy as np 24 | import matplotlib.pyplot as plt 25 | import cv2 as cv 26 | import Image 27 | import caffe 28 | 29 | from caffe_functions import * 30 | from opencv_functions import * 31 | from utility_functions import * 32 | 33 | ### USER-SPECIFIED VARIABLES: ### 34 | 35 | # List your dataset root directories here: 36 | dirCKPlus = 'datasets/CK_Plus' 37 | 38 | # Select which dataset to use (case insensitive): 39 | dataset = 'ckplus' 40 | 41 | # Flags: 42 | cropFlag = True # False disables image cropping 43 | 44 | ### START SCRIPT: ### 45 | 46 | # Set up inputs 47 | dir = dirCKPlus 48 | color = False 49 | single_face = True 50 | 51 | # Clean up and discard anything from the last run 52 | dirCrop = dir + '/cropped' 53 | rmdir(dirCrop) 54 | 55 | # Master list of categories for EmotitW network 56 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 57 | suffixes = ['AN', 'DI', 'FE', 'HA', 'NE', 'SA', 'SU'] 58 | 59 | # Load dataset image list 60 | input_list, labels = importDataset(dir, dataset, categories) 61 | 62 | # Perform detection and cropping if desired (and it should be desired) 63 | mkdir(dirCrop) 64 | input_list = faceCrop(dirCrop, input_list, color, single_face) 65 | 66 | # Print outs 67 | # print input_list 68 | # print labels 69 | 70 | # Rename all files to Jaffe format 71 | for i in range(len(input_list)): 72 | # Get file info 73 | filename = input_list[i] 74 | lab = labels[i] 75 | labText = suffixes[lab] 76 | 77 | # Generate new filename 78 | fn = filename.split('.') 79 | out = fn[0] + '.' + labText + '.' + fn[1] 80 | 81 | # Rename file 82 | os.rename(filename,out) 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Source code for HappyNet 2 | Real-Time Human Emotion Recognition with Convolutional Neural Nets 3 | Demo video: https://youtu.be/MDHtzOdnSgA 4 | 5 | Authors: Dan Duncan 6 | Gautam Shine 7 | Chris English 8 | 9 | Stanford CS231n - March 2016 10 | 11 | About HappyNet: 12 | HappyNet takes pictures or webcam video as input. It detects all faces in each frame, and then 13 | classifies which emotion each face is expressing. 14 | 15 | HappyNet then replaces each face with an emoji corresponding to that emotion. 16 | 17 | Recognized emotions: 18 | Neutral 19 | Happy 20 | Sad 21 | Angry 22 | Surprise 23 | Fear 24 | 25 | Due to legacy reasons, HappyNet tries to classify a seventh emotion: disgust. We usually combine 26 | and angry, as HappyNet couldn't tell these two apart. With more training, maybe it could distinguish. 27 | 28 | 29 | Training accuracy was 91% and test accuracy was 57%, with the following requirements: 30 | - User's facial expression must be strong / exaggerated 31 | - Lighting must be good (no shadows on face) 32 | - Camera is at eye level or slightly above eye level 33 | 34 | The scripts in this repository allow you to: 35 | - Run HappyNet 36 | - Retrain HappyNet on new data 37 | - Automatically generate your own training set 38 | 39 | 40 | Credits: 41 | 42 | This was built using the Caffe and OpenCV libraries 43 | 44 | We used the Emotions in the Wild (EitW) Caffe model as our starting point 45 | - This model is available on Caffe Model Zoo 46 | - EitW is itself based on VGG_S Net 47 | 48 | We retrained EitW on new data: 49 | Cohn-Kanade Plus (CK+) dataset (available for online download) 50 | Japanese Female Facial Expressions (JAFFE) dataset (also available online) 51 | Our own data, generated using our 'gather_training_data' script 52 | 53 | Requirements: 54 | > 2 GB of memory 55 | Caffe and OpenCV installed 56 | Webcam 57 | Note: Webcam currently does not work on virtual machines. 58 | Try using a native Mac or Linux system. Don't try on a virtual machine running on Windows. 59 | 60 | If you want to run on GPU: 61 | 2GB or more VRAM 62 | CUDA and CuDNN libraries installed 63 | Caffe must be compiled with these CUDA and CuDNN selected 64 | 65 | 66 | 67 | Description of files: 68 | 69 | Main scripts: 70 | gather_training_data.py - Use this to generate a custom training set 71 | process_dataset.py - Read in an entire training set and calculate accuracy over the set 72 | process_image.py - Read in a single image, add the correct emoji, and write to file 73 | video_generate.py - Run HappyNet in real-time and save output to video 74 | video_test.py - Run HappyNet in real-time; does not save to video 75 | 76 | Scripts for retraining the network on new data: 77 | 78 | These 5 scripts are to be run in numerical order 79 | Note you'll need to modify them with your own paths 80 | 81 | Generate Caffe-compatible database of input images: 82 | execute_0_create_file_list 83 | exceute_1_create_lmdb_databse 84 | 85 | Generate a mean image (mean.binaryproto file) from input dataset: 86 | execute_2_create_mean_image 87 | 88 | Retrain an existing caffe model with the new inputs: 89 | execute_3_train_custom_model 90 | 91 | * Note, this needs to be modified if you are running on GPU. Our network was 92 | VGG_S net, which requires 2 GB of GPU memory, so we ran on CPU. 93 | The modification is just an extra flag, something like "-gpu 0" 94 | 95 | Delete all unnecessary files 96 | execute_4_cleanup_training_data 97 | 98 | This deletes the output files from scripts 0, 1, and 2. 99 | Run this when you are getting ready to start over from file 0. 100 | Don't run it until then though - you might want to reuse the info in those files! 101 | 102 | Utility functions: 103 | caffe_functions.py - anything dealing primarily with caffe 104 | opencv_functions.py - anything dealing primarily with opencv 105 | utility_functions.py - General functions mostly related to file I/O 106 | 107 | Datasets: 108 | Only contains the emojis we used. 109 | Cohn-Kanade Plus (CK+) and Japanese Female Facial Expressions (JAFFE) can be downloaded online. 110 | 111 | Models: 112 | deploy.prototxt - Architecture of our model (this file should not need to be changed) 113 | solver.prototxt - This configures the retraining process. 114 | train.prototxt - This configures the architecture during training. 115 | Mainly used to add layer-specific learning rates. 116 | loss_history.txt - Log file from our last retraining on our dataset 117 | 118 | NOT INCLUDED: 119 | EmotiW_VGG_S.caffemodel - 120 | This is the file with all the weights. It is 500 MB and cannot be archived. 121 | However, you can download the Emotios in the Wild model from Caffe Model Zoo. 122 | Retrain it on new data for a day or two, and you can get similar numbers to 123 | our model. 124 | We trained on: 125 | Homebrewed dataset of 2000 images of 5 people making all 6 emotions 126 | All data was generated with the script generate_training_data.py 127 | We trained for: 128 | About 24 hours 129 | A bigger dataset could be collected in a couple hours and would 130 | likely greatly improve performance. 131 | 132 | 133 | -------------------------------------------------------------------------------- /caffe_functions.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Caffe VGG_S net emotion classification 3 | # 4 | # This file contains utility functions for interactions with the Caffe 5 | # deep learning framework. 6 | # 7 | # 8 | # Date modified: March 2016 9 | # 10 | # Authors: Dan Duncan 11 | # Gautam Shine 12 | # 13 | ############################################################################### 14 | 15 | import os, shutil, sys, time, re, glob 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | import cv2 as cv 19 | import Image 20 | import caffe 21 | 22 | from utility_functions import * 23 | 24 | # Load mean caffe image 25 | def loadMeanCaffeImage(img="mean_training_image.binaryproto",curDir="datasets/"): 26 | mean_filename=os.path.join(curDir,img) 27 | proto_data = open(mean_filename, "rb").read() 28 | a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data) 29 | mean = caffe.io.blobproto_to_array(a)[0] 30 | return mean 31 | 32 | # Display an image (input is numpy array) 33 | def showimage(img): 34 | if img.ndim == 3: 35 | img = img[:, :, ::-1] 36 | plt.set_cmap('jet') 37 | plt.imshow(img,vmin=0, vmax=0.2) 38 | 39 | # Display network activations 40 | def vis_square(data, padsize=1, padval=0): 41 | data -= data.min() 42 | data /= data.max() 43 | 44 | # Force the number of filters to be square 45 | n = int(np.ceil(np.sqrt(data.shape[0]))) 46 | padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3) 47 | data = np.pad(data, padding, mode='constant', constant_values=(padval, padval)) 48 | 49 | # Tile the filters into an image 50 | data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) 51 | data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) 52 | 53 | showimage(data) 54 | 55 | # Plot the last image and conv1 layer's weights and responses 56 | def plot_layer(input_image, VGG_S_Net, layer): 57 | plt.figure(1) 58 | _ = plt.imshow(input_image) 59 | 60 | plt.figure(2) 61 | filters = VGG_S_Net.params[layer][0].data 62 | vis_square(filters.transpose(0, 2, 3, 1)) 63 | 64 | plt.figure(3) 65 | feat = VGG_S_Net.blobs[layer].data[0] 66 | vis_square(feat) 67 | 68 | plt.show(block=False) 69 | 70 | # RGB dimension swap + resize 71 | # Depending on how an image was imported, sometimes it will be XxYxRGB and 72 | # other times it will be RGBxXxY. 73 | # This function takes either as input, and it always returns RGBxXxY. 74 | def mod_dim(img, x=256, y=256, c=3): 75 | # Resize only if necessary: 76 | if not np.array_equal(img.shape,[c,x,y]): 77 | resized = caffe.io.resize_image(img, (x,y,c)) # (256, 256, 3) 78 | rearranged = np.swapaxes(np.swapaxes(resized, 1, 2), 0, 1) # (3,256,256) 79 | else: 80 | rearranged = img 81 | 82 | return rearranged 83 | 84 | # Calculate mean image over list of image filenames 85 | # Can also return the mean image if it is already saved as "mean.binaryproto" 86 | def compute_mean(input_list, plot_mean=False): 87 | # If no data supplied, use mean supplied with pretrained model 88 | if len(input_list) == 0: 89 | net_root = '.' 90 | net_dir = 'VGG_S_rgb' 91 | mean_filename=os.path.join(net_root, net_dir, 'mean.binaryproto') 92 | proto_data = open(mean_filename, "rb").read() 93 | a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data) 94 | mean = caffe.io.blobproto_to_array(a)[0] 95 | else: 96 | x,y,c = 256,256,3 97 | mean = np.zeros((c, x, y)) 98 | for img_file in input_list: 99 | img = caffe.io.load_image(img_file) 100 | img = mod_dim(img, x, y, c) 101 | mean += img 102 | mean /= len(input_list) 103 | 104 | # Plot the mean image if desired: 105 | if plot_mean: 106 | plt.imshow(np.swapaxes(np.swapaxes(mean, 0, 1), 1, 2)) 107 | plt.show() 108 | return mean 109 | 110 | # Given filename for mean image, and the directory containing a network file 111 | # Construct and return a Caffe network object 112 | # Note: For legacy reasons, this assumes your model is stored in: 113 | # ./models/[net_dir]/EmotiW_VGG_S.caffemodel 114 | # where net_dir is supplied by the user 115 | def make_net(mean=None, net_dir='VGG_S_rgb'): 116 | # net_dir specifies a root directory containing a *.caffemodel file 117 | # Options in our setup are: VGG_S_[rgb / lbp / cyclic_lbp / cyclic_lbp_5 / cyclic_lbp_10] 118 | 119 | # This should hopefully already be in your system path, but just to be sure: 120 | caffe_root = '/home/Users/Dan/Development/caffe/' 121 | sys.path.insert(0, caffe_root + 'python') 122 | 123 | # Configure matplotlib 124 | plt.rcParams['figure.figsize'] = (10, 10) 125 | plt.rcParams['image.interpolation'] = 'nearest' 126 | plt.rcParams['image.cmap'] = 'gray' 127 | 128 | # Generate paths to the various model files 129 | net_root = 'models' 130 | net_pretrained = os.path.join(net_root, net_dir, 'EmotiW_VGG_S.caffemodel') 131 | net_model_file = os.path.join(net_root, net_dir, 'deploy.prototxt') 132 | 133 | # Construct Caffe network object 134 | VGG_S_Net = caffe.Classifier(net_model_file, net_pretrained, 135 | mean=mean, 136 | channel_swap=(2,1,0), 137 | raw_scale=255, 138 | image_dims=(256, 256)) 139 | return VGG_S_Net 140 | 141 | # Load a minibatch of images 142 | # Inputs: List of image filenames, 143 | # Color boolean (true if images are in color), 144 | # List of labels corresponding to each image, 145 | # Index of first image to load 146 | # Number of images to load 147 | # Output: List of image numpy arrays of size Num x (W x H x 3) 148 | # List of labels for just the images in the batch 149 | def load_minibatch(input_list, color, labels, start,num): 150 | # Enforce minimum on start 151 | start = max(0,start) 152 | 153 | # Enforce maximum on end 154 | end = start + num 155 | end = min(len(input_list), end) 156 | 157 | # Isolate files 158 | files = input_list[start:end] 159 | 160 | images = [] 161 | for file in files: 162 | img = caffe.io.load_image(file, color) 163 | 164 | # Handle incorrect image dims for uncropped images 165 | # TODO: Get uncropped images to import correctly 166 | if img.shape[0] == 3 or img.shape[0] == 1: 167 | img = np.swapaxes(np.swapaxes(img, 0, 1), 1, 2) 168 | 169 | # BUG FIX: Is this ok? 170 | # color=True gets the correct desired dimension of WxHx3 171 | # But color=False gets images of WxHx1. Need WxHx3 or will get "Index out of bounds" exception 172 | # Fix by concatenating three copies of the image 173 | if img.shape[2] == 1: 174 | img = cv.merge([img,img,img]) 175 | 176 | # Add image array to batch 177 | images.append(img) 178 | 179 | labelsReduced = labels[start:end] 180 | return images, labelsReduced 181 | 182 | # Big function: 183 | # Classify all images in a list of image file names 184 | # Using the inputs, constructs a network, imports images either individually or in minibatches, 185 | # gets the network classification, and builds up the confusion matrix. 186 | # No return value, but it can plot the confusion matrix at the end 187 | def classify_emotions(input_list, color, categories, labels, plot_neurons, plot_confusion,useMean=True): 188 | # Compute mean 189 | #mean = compute_mean(input_list) 190 | if useMean: 191 | mean = loadMeanCaffeImage() 192 | else: 193 | mean = None 194 | 195 | 196 | # Create VGG_S net with mean 197 | VGG_S_Net = make_net(mean,net_dir='Custom_Model') 198 | 199 | # Classify images in directory 200 | conf_mat = [] # tuples to be passed to confusion matrix generator 201 | 202 | numImages = len(input_list) 203 | 204 | # Due to network architecture, using minibatches does not speed anything up 205 | # (at least for datasets of up to 3000 images) 206 | miniBatch = False 207 | if miniBatch: 208 | i = 0 209 | batchSize = 500 210 | 211 | metrics = [] # Will hold tuples of timing metrics for all batches 212 | 213 | totalLoad, totalPredict = 0, 0 214 | 215 | while i < numImages: 216 | 217 | t = time.time() 218 | images,labelsReduced = load_minibatch(input_list, color, labels, i, batchSize) 219 | loadTime = time.time() - t 220 | totalLoad += loadTime 221 | print 'Batch of ' + str(len(images)) + ' images.' 222 | 223 | # images is a list of input images 224 | # Input images should be WxHx3, e.g. 490x640x3 225 | t = time.time() 226 | prediction = VGG_S_Net.predict(images, oversample=False) 227 | predictTime = time.time() - t 228 | totalPredict += predictTime 229 | 230 | for j in range(len(prediction)): 231 | pred = prediction[j] 232 | lab = labelsReduced[j] 233 | 234 | # Append (label, prediction) tuple to confusion matrix list 235 | conf_mat.append((lab, pred.argmax())) 236 | 237 | # Print results as Filename: Prediction 238 | #print(input_list[i+j].split('/')[-1]+': '+categories[prediction.argmax()]) 239 | 240 | metrics.append((len(images),loadTime,predictTime)) 241 | i += batchSize 242 | 243 | # Print all timing metrics 244 | print "\nTiming data for classify_emotions() (minibatch mode):" 245 | for i in range(len(metrics)): 246 | bs, ltime, ptime = metrics[i] 247 | print "Batch " + str(i) + " (" + str(bs) + " images):\tLoad: " + str(ltime) + "s\t Predict: " + str(ptime) + "s" 248 | print "\nTotal images: " + str(len(input_list)) 249 | print "Total time loading: " + str(totalLoad) + "\t(" + str(float(totalLoad)/len(input_list)) + "s / image)" 250 | print "Total time predicting: " + str(totalPredict) + "\t(" + str(float(totalPredict)/len(input_list)) + "s / image)" 251 | print " " 252 | 253 | else: 254 | loadTime, predictTime = 0, 0 255 | 256 | for i in range(numImages): 257 | img_file = input_list[i] 258 | label = labels[i] 259 | 260 | print('File name: ', img_file) 261 | t = time.time() 262 | input_image = caffe.io.load_image(img_file) 263 | loadTime += time.time() - t 264 | 265 | # Handle incorrect image dims for uncropped images 266 | # TODO: Get uncropped images to import correctly 267 | if input_image.shape[0] == 3: 268 | input_image = np.swapaxes(np.swapaxes(input_image, 0, 1), 1, 2) 269 | 270 | # Input image should be WxHxK, e.g. 490x640x3 271 | t = time.time() 272 | prediction = VGG_S_Net.predict([input_image], oversample=False) 273 | predictTime += time.time() - t 274 | 275 | # Append (label, prediction) tuple to confusion matrix list 276 | conf_mat.append((label, prediction.argmax())) 277 | 278 | # Print results as Filename: Prediction 279 | print(img_file.split('/')[-1]+': '+categories[prediction.argmax()]) 280 | 281 | # Print timing metrics: 282 | print "\nTiming data for classify_emotions() (serial mode):" 283 | print "Load time: " + str(loadTime) + "s\t(" + str(loadTime/numImages) + "s / image)" 284 | print "Predict time:" + str(predictTime) + "s\t(" + str(predictTime/numImages) + "s / image)" 285 | print " " 286 | 287 | if plot_neurons: 288 | layer = 'conv1' 289 | plot_layer(input_image, VGG_S_Net, layer) 290 | 291 | # Generates confusion matrix and calculates accuracy 292 | confusion_matrix(conf_mat, categories, plot_confusion) 293 | 294 | 295 | # Classify all faces in a single video frame 296 | # Return a labels list of integer labels 297 | def classify_video_frame(frame, faces, VGG_S_Net, categories=None): 298 | # Convert to float format 299 | # Video frames normally imported as uint32 300 | frame = frame.astype(np.float32) 301 | frame /= 255.0 302 | 303 | labels = [] 304 | 305 | for x,y,w,h in faces: 306 | img = frame[y:y+h,x:x+w,:] 307 | 308 | # Input image should be WxHxK, e.g. 490x640x3 309 | prediction = VGG_S_Net.predict([img], oversample=False) 310 | 311 | 312 | labels.append(prediction.argmax()) 313 | 314 | return labels 315 | 316 | -------------------------------------------------------------------------------- /datasets/Emojis/Angry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Angry.png -------------------------------------------------------------------------------- /datasets/Emojis/Disgust.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Disgust.png -------------------------------------------------------------------------------- /datasets/Emojis/Fear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Fear.png -------------------------------------------------------------------------------- /datasets/Emojis/Happy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Happy.png -------------------------------------------------------------------------------- /datasets/Emojis/Neutral.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Neutral.png -------------------------------------------------------------------------------- /datasets/Emojis/Sad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Sad.png -------------------------------------------------------------------------------- /datasets/Emojis/Surprise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/datasets/Emojis/Surprise.png -------------------------------------------------------------------------------- /doc/HappyNet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danduncan/HappyNet/54a7966c90640a053c213206d2eebf6ca720cfb5/doc/HappyNet.pdf -------------------------------------------------------------------------------- /execute_0_create_file_list: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Assembles file paths and ground truth labels for formatted data 4 | # The output is a formatted list file, which is then used by the 5 | # network to import and label all images. 6 | # 7 | # Dataset must conform to a particular code for filenames: 8 | # *_HA*.tiff (or .png) 9 | # Where * can be anything except underscores 10 | # HA = two letter code for the emotion label for the image (i.e. abc_AN1.5.tiff woud be labeled 'angry') 11 | # Other two letter codes are in the code below 12 | # 13 | # This particular naming convention is borrowed from the Jaffe open-source dataset: 14 | # Jaffe = Japanese Female Facial Expressions (free download on their website) 15 | # Author: Gautam Shine 16 | 17 | # To assemble data into LMDB: 18 | # ./listfile.py 19 | # [caffe root]/build/tools/convert_imageset --resize_height=[h] --resize_width=[w] [dataset root] [image paths/labels file] [lmdb name] 20 | # Example: 21 | # /home/gshine/Documents/caffe/build/tools/convert_imageset --resize_height=224 --resize_width=224 datasets/jaffe/ jaffe_list.txt jaffe_train_lmdb 22 | # /Users/Dan/Development/caffe/build/tools/convert_imageset --resize_height=224 --resize_width=224 datasets/training_images/ datasets/training_list.txt datasets/training_set_lmdb 23 | 24 | # To train net: 25 | # [caffe root] train -solve [solver file] -weights [model file] 26 | # Example: 27 | # /home/gshine/Documents/caffe/build/tools/caffe train -solver models/VGG_S_rgb/solver.prototxt -weights models/VGG_S_rgb/EmotiW_VGG_S.caffemodel 28 | # /Users/Dan/Development/caffe/build/tools/caffe train -solver models/Custom_Model/solver.prototxt -weights models/Custom_Model/EmotiW_VGG_S.caffemodel 29 | 30 | import os, glob 31 | 32 | categoriesEitW = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 33 | 34 | jaffe_categories_map = { 35 | 'HA': categoriesEitW.index('Happy'), 36 | 'SA': categoriesEitW.index('Sad'), 37 | 'NE': categoriesEitW.index('Neutral'), 38 | 'AN': categoriesEitW.index('Angry'), 39 | 'FE': categoriesEitW.index('Fear'), 40 | 'DI': categoriesEitW.index('Disgust'), 41 | 'SU': categoriesEitW.index('Surprise') 42 | } 43 | 44 | def get_label(fname): 45 | label = fname.split('.')[1][0:2] 46 | return jaffe_categories_map[label] 47 | 48 | # File and label list to input to caffe 49 | f = open('datasets/training_list.txt', 'w') 50 | 51 | # List of images to train on 52 | # Include png for homemade images, and tiff for jaffe images 53 | dir = 'datasets/training_images/' 54 | imgList = glob.glob(dir+'*.png') + glob.glob(dir+'*.tiff') 55 | 56 | 57 | for img in imgList: 58 | if os.path.isdir(img): 59 | continue 60 | label = get_label(img) 61 | fname = img.split('/')[2] 62 | f.write(fname + ' ' + str(label) + '\n') 63 | 64 | f.close() 65 | -------------------------------------------------------------------------------- /execute_1_create_lmdb_database: -------------------------------------------------------------------------------- 1 | /Users/Dan/Development/caffe/build/tools/convert_imageset --resize_height=224 --resize_width=224 datasets/training_images/ datasets/training_list.txt datasets/training_set_lmdb 2 | 3 | -------------------------------------------------------------------------------- /execute_2_create_mean_image: -------------------------------------------------------------------------------- 1 | /Users/Dan/Development/caffe/build/tools/compute_image_mean datasets/training_set_lmdb/ datasets/mean_training_image.binaryproto 2 | -------------------------------------------------------------------------------- /execute_3_train_custom_model: -------------------------------------------------------------------------------- 1 | time /Users/Dan/Development/caffe/build/tools/caffe train -solver models/Custom_Model/solver.prototxt -weights models/Custom_Model/EmotiW_VGG_S.caffemodel | tee caffe_loss_history.txt 2 | 3 | -------------------------------------------------------------------------------- /execute_4_cleanup_training_data: -------------------------------------------------------------------------------- 1 | rm datasets/mean_training_image.binaryproto 2 | rm -r datasets/training_set_lmdb 3 | rm datasets/training_list.txt 4 | -------------------------------------------------------------------------------- /gather_training_data.py: -------------------------------------------------------------------------------- 1 | ############################################################################################# 2 | # 3 | # This is a program used to generate your own custom dataset of labeled emotions. 4 | # It uses the webcam and prompts the user to make an emotion. 5 | # When the user is ready, they press ENTER, and the webcam saves a snapshot of their emotion. 6 | # All files are saved using the Japanese Female Facial Expressions (JAFFE) dataset naming 7 | # convention. 8 | # 9 | # Instructions: 10 | # - Execute as python script 11 | # - If working properly, a window will pop up with a feed from your webcam 12 | # Note: This does not appear to work on virtual machines. It was used on a Macbook. 13 | # - Follow the prompts on the shell screen 14 | # - When pressing a button, make sure the video screen is selected, not the text screen. 15 | # The video screen is the one taking your text inputs 16 | # 17 | # Possible text inputs: 18 | # ENTER - save image 19 | # SPACE - Skip to next emotion without saving 20 | # ESC - Quit the program 21 | # 22 | # 23 | # Date modified: March 2016 24 | # 25 | # Authors: Dan Duncan 26 | # Gautam Shine 27 | # 28 | ############################################################################################# 29 | 30 | import os, shutil, sys, time, re, glob 31 | import numpy as np 32 | import matplotlib.pyplot as plt 33 | import cv2 as cv 34 | import Image 35 | import caffe 36 | import contextlib, cStringIO 37 | import random 38 | 39 | from caffe_functions import * 40 | from opencv_functions import * 41 | from utility_functions import * 42 | from PIL import Image 43 | 44 | ############################################################################################# 45 | # 46 | # HELPER FUNCTIONS 47 | # 48 | ############################################################################################# 49 | 50 | # Filename format looks like: 51 | # training_data/0000000.HA.0.png 52 | # outDir is self-explanatory 53 | # counter gets converted into string of length 7, with leading 0's 54 | # Label is the two-character emotion label 55 | # subCount is a number label for jittered images 56 | # Extension is the filetype 57 | def getFilename(counter,subCount=0,outDir=None,strLength=7,label='HA',extension='.png'): 58 | if outDir is None: 59 | outDir = "" 60 | else: 61 | outDir += '/' 62 | 63 | if subCount is None: 64 | subCount = "" 65 | else: 66 | subCount = "." + str(subCount) 67 | 68 | numStr = str(counter).zfill(strLength) 69 | 70 | return outDir + numStr + "." + label + subCount + extension 71 | 72 | # Suppress print statements within a function call 73 | # Just call: 74 | # with nostdout(): 75 | # yourfunction(); 76 | @contextlib.contextmanager 77 | def nostdout(): 78 | save_stdout = sys.stdout 79 | sys.stdout = cStringIO.StringIO() 80 | yield 81 | sys.stdout = save_stdout 82 | 83 | # Get a randon emotion label 84 | def getRandomLabel(pickFrom=None): 85 | if pickFrom is None: 86 | pickFrom = [0,1,2,3,4,5,6] 87 | 88 | return random.choice(pickFrom) 89 | 90 | 91 | # Crop image and save to file 92 | def saveSingleImage(frame,file): 93 | # Save cropped image. Can also rescale cropbox 94 | im = Image.fromarray(toggleRGB(frame)) 95 | im.save(file) 96 | 97 | # Crop and save image, including adding jitter 98 | def saveAcceptedImage(frame,faces,counter,outDir=None,strLength=7,label='HA',extension='.png',jitter=False): 99 | 100 | 101 | if jitter: 102 | frames = jitterImage(frame,faces) 103 | else: 104 | frames = [imgCrop(frame,faces[0])] 105 | 106 | subCount = 0 107 | for frame in frames: 108 | filename = getFilename(counter,outDir=outDir,subCount=subCount,strLength=strLength,label=suf,extension=extension) 109 | saveSingleImage(frame,filename) 110 | subCount += 1 111 | 112 | 113 | # Jitter an image 114 | # Returns several jittered versions of the input image 115 | def jitterImage(frame,faces): 116 | # Define constants 117 | numShiftMax = 4; # Number of shifted images to produce 118 | numColorMax = 6; # Number of color-shifted images to produce 119 | maxShift = 0.1 # Maximum pixel displacement in x and y directions 120 | maxColorShift = 30; # Raw pixel shift 121 | 122 | # Frame width and height 123 | fw = frame.shape[1] 124 | fh = frame.shape[0] 125 | 126 | x,y,w,h = faces[0] 127 | 128 | frames = []; # Will hold output jittered images 129 | 130 | # Return original unjittered image 131 | frames.append(frame[y:y+h,x:x+h]) 132 | 133 | # Shift image by up to 10% of cropbox size in each direction 134 | shiftCount = 0 135 | while shiftCount < numShiftMax: 136 | # Generate shifts: -0.1 < shift < .1 137 | xshift = np.random.uniform(0.0,maxShift*2) - maxShift 138 | yshift = np.random.uniform(0.0,maxShift*2) - maxShift 139 | 140 | # Apply shifts 141 | xt = x + int(xshift*w) 142 | yt = y + int(yshift*h) 143 | 144 | # Verify shifts are within limits 145 | if xt >= 0 and yt >= 0 and xt+w < fw and yt+h < fh: 146 | # New values are ok 147 | frames.append(frame[yt:yt+h,xt:xt+w]) 148 | shiftCount += 1 149 | 150 | # Brighten or darken image uniformly 151 | # Raw pixel values are 0 to 255 152 | for i in range(numColorMax): 153 | shift = random.randint(0,2*maxColorShift) - maxColorShift/2 154 | ftmp = frame.astype(np.int) + shift 155 | 156 | # Make sure ftmp does not exceed 0 and 255 157 | ftmp[ftmp < 0] = 0 158 | ftmp[ftmp > 255] = 255 159 | 160 | # Add new image to output 161 | ftmp = ftmp.astype(np.uint8) 162 | frames.append(ftmp[yt:yt+h,xt:xt+w]) 163 | 164 | return frames 165 | 166 | 167 | 168 | 169 | ################################################################################################ 170 | # 171 | # START SCRIPT 172 | # 173 | ################################################################################################# 174 | 175 | # Pick mode (train or validate) 176 | validationMode = False 177 | 178 | # Pick output size in pixels, of all cropped images (images are all square) 179 | imgSize = 200; 180 | boxScale = 1.2 # Size of crop boxes (relative to original filter size) 181 | jitter = True; # Jitter accepted images? 182 | 183 | # Initialize all labels 184 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 185 | suffixes = [ 'AN', 'DI', 'FE', 'HA', 'NE', 'SA', 'SU'] 186 | pickFrom = [ 2, 4, 5 ] # Only prompt user for emotions in this list 187 | 188 | if validationMode: 189 | jitter = False 190 | outDir = 'datasets/validation_images' 191 | else: 192 | jitter = True 193 | outDir = 'datasets/training_images' 194 | 195 | counter = 300 # Used to increment filenames 196 | 197 | # Output filename configuration: 198 | strLength = 7; # Length of output filename number string 199 | extension = '.png' # Output file type 200 | 201 | # Check that outDir and counter are properly initialized 202 | print "\n" 203 | if not os.path.exists(outDir): 204 | print "Output directory does not exist. Making directory" 205 | os.mkdir(outDir) 206 | else: 207 | print "Output directory already exists" 208 | 209 | numCheck = 1000; # Number of filenames to check before giving up 210 | num = 0; 211 | while True: 212 | strCheck = getFilename(counter,outDir=outDir,label="*") 213 | print "Checking: " + strCheck 214 | if glob.glob(strCheck): 215 | print "\tError: File exists. Incrementing counter" 216 | counter += 1 217 | num += 1 218 | else: 219 | print "First valid file is: " + strCheck 220 | break 221 | 222 | if num > numCheck: 223 | print "ERROR: No available filename up to " + strCheck + " could be found." 224 | sys.exit(0) 225 | 226 | 227 | # Set up face detection 228 | faceCascades = load_cascades() 229 | 230 | # Set up display window 231 | cv.namedWindow("preview") 232 | 233 | # Open input video steam 234 | vc = cv.VideoCapture(0) 235 | 236 | # Check that video stream is running 237 | if vc.isOpened(): # try to get the first frame 238 | rval, frame = vc.read() 239 | #frame = frame.astype(np.float32) 240 | else: 241 | rval = False 242 | 243 | 244 | print "\n" 245 | nextEmotion = True 246 | while rval: 247 | 248 | if nextEmotion: # Generate next emotion 249 | nextEmotion = False 250 | 251 | # Generate a random integer label 252 | intLabel = getRandomLabel(pickFrom) 253 | 254 | # Get emotion string and file suffyx 255 | emotion = categories[intLabel] 256 | suf = suffixes[intLabel] 257 | 258 | # Print prompt to user: 259 | print "Emotion is: " + emotion + ".\t(ENTER to capture, SPACE to skip)" 260 | 261 | # Read in next frame 262 | rval, frame = vc.read() 263 | 264 | # Mirror image 265 | frame = np.fliplr(frame) 266 | 267 | # Detect faces 268 | # Find all faces 269 | with nostdout(): 270 | newFrame, faces = DetectFace(frame,True,faceCascades,single_face=False,second_pass=False,draw_rects=True,scale=boxScale) 271 | 272 | oneFace = False 273 | if faces is None or len(faces) == 0: 274 | # Poor input: do nothing to frame 275 | #newFrame = frame 276 | pass 277 | elif len(faces) > 1: 278 | # Too many faces found 279 | pass 280 | else: 281 | # Just the right number of faces found 282 | oneFace = True 283 | 284 | # Show video with or without boxed face 285 | cv.imshow("preview", newFrame) 286 | 287 | # Wait for user to press key. On ESC, close program 288 | key = cv.waitKey(20) 289 | if key == 27: # ESC --> exit on ESC 290 | print 'ESC was pressed! Quitting...' 291 | break 292 | elif key == 32: # SPACE --> Next image 293 | print 'Label skipped' 294 | nextEmotion = True 295 | continue; # Break out of loop 296 | elif key == 13: # ENTER --> Accept image 297 | if not oneFace: 298 | print "Error: ENTER pressed, but face invalid. Keep trying..." 299 | print "Emotion is: " + emotion + ".\t(ENTER to capture, SPACE to skip)" 300 | else: 301 | saveAcceptedImage(frame,faces,counter,outDir=outDir,strLength=strLength,label=suf,extension=extension,jitter=jitter) 302 | print 'Image accepted and saved!' 303 | counter += 1 304 | nextEmotion = True 305 | continue; # Break out of loop 306 | else: # Invalid key, ignore 307 | pass 308 | 309 | 310 | cv.destroyWindow("preview") 311 | -------------------------------------------------------------------------------- /models/Custom_Model/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layers { 8 | name: "conv1" 9 | type: CONVOLUTION 10 | bottom: "data" 11 | top: "conv1" 12 | convolution_param { 13 | num_output: 96 14 | kernel_size: 7 15 | stride: 2 16 | } 17 | } 18 | layers { 19 | name: "relu1" 20 | type: RELU 21 | bottom: "conv1" 22 | top: "conv1" 23 | } 24 | layers { 25 | name: "norm1" 26 | type: LRN 27 | bottom: "conv1" 28 | top: "norm1" 29 | lrn_param { 30 | local_size: 5 31 | alpha: 0.0005 32 | beta: 0.75 33 | } 34 | } 35 | layers { 36 | name: "pool1" 37 | type: POOLING 38 | bottom: "norm1" 39 | top: "pool1" 40 | pooling_param { 41 | pool: MAX 42 | kernel_size: 3 43 | stride: 3 44 | } 45 | } 46 | layers { 47 | name: "conv2" 48 | type: CONVOLUTION 49 | bottom: "pool1" 50 | top: "conv2" 51 | convolution_param { 52 | num_output: 256 53 | pad: 2 54 | kernel_size: 5 55 | } 56 | } 57 | layers { 58 | name: "relu2" 59 | type: RELU 60 | bottom: "conv2" 61 | top: "conv2" 62 | } 63 | layers { 64 | name: "pool2" 65 | type: POOLING 66 | bottom: "conv2" 67 | top: "pool2" 68 | pooling_param { 69 | pool: MAX 70 | kernel_size: 2 71 | stride: 2 72 | } 73 | } 74 | layers { 75 | name: "conv3" 76 | type: CONVOLUTION 77 | bottom: "pool2" 78 | top: "conv3" 79 | convolution_param { 80 | num_output: 512 81 | pad: 1 82 | kernel_size: 3 83 | } 84 | } 85 | layers { 86 | name: "relu3" 87 | type: RELU 88 | bottom: "conv3" 89 | top: "conv3" 90 | } 91 | layers { 92 | name: "conv4" 93 | type: CONVOLUTION 94 | bottom: "conv3" 95 | top: "conv4" 96 | convolution_param { 97 | num_output: 512 98 | pad: 1 99 | kernel_size: 3 100 | } 101 | } 102 | layers { 103 | name: "relu4" 104 | type: RELU 105 | bottom: "conv4" 106 | top: "conv4" 107 | } 108 | layers { 109 | name: "conv5" 110 | type: CONVOLUTION 111 | bottom: "conv4" 112 | top: "conv5" 113 | convolution_param { 114 | num_output: 512 115 | pad: 1 116 | kernel_size: 3 117 | } 118 | } 119 | layers { 120 | name: "relu5" 121 | type: RELU 122 | bottom: "conv5" 123 | top: "conv5" 124 | } 125 | layers { 126 | name: "pool5" 127 | type: POOLING 128 | bottom: "conv5" 129 | top: "pool5" 130 | pooling_param { 131 | pool: MAX 132 | kernel_size: 3 133 | stride: 3 134 | } 135 | } 136 | layers { 137 | name: "fc6" 138 | type: INNER_PRODUCT 139 | bottom: "pool5" 140 | top: "fc6" 141 | inner_product_param { 142 | num_output: 4048 143 | } 144 | } 145 | layers { 146 | name: "relu6" 147 | type: RELU 148 | bottom: "fc6" 149 | top: "fc6" 150 | } 151 | layers { 152 | name: "drop6" 153 | type: DROPOUT 154 | bottom: "fc6" 155 | top: "fc6" 156 | dropout_param { 157 | dropout_ratio: 0.5 158 | } 159 | } 160 | layers { 161 | name: "fc7" 162 | type: INNER_PRODUCT 163 | bottom: "fc6" 164 | top: "fc7" 165 | inner_product_param { 166 | num_output: 4048 167 | } 168 | } 169 | layers { 170 | name: "relu7" 171 | type: RELU 172 | bottom: "fc7" 173 | top: "fc7" 174 | } 175 | layers { 176 | name: "drop7" 177 | type: DROPOUT 178 | bottom: "fc7" 179 | top: "fc7" 180 | dropout_param { 181 | dropout_ratio: 0.5 182 | } 183 | } 184 | layers { 185 | name: "fc8_cat" 186 | type: INNER_PRODUCT 187 | bottom: "fc7" 188 | top: "fc8" 189 | inner_product_param { 190 | num_output: 7 191 | } 192 | } 193 | layers { 194 | name: "prob" 195 | type: SOFTMAX 196 | bottom: "fc8" 197 | top: "prob" 198 | } 199 | -------------------------------------------------------------------------------- /models/Custom_Model/loss_history.txt: -------------------------------------------------------------------------------- 1 | -12-234196:emotion-conv-net Dan$ ./execute_3_train_custom_model 2 | I0309 03:53:07.924198 2039427072 caffe.cpp:178] Use CPU. 3 | I0309 03:53:08.244709 2039427072 solver.cpp:48] Initializing solver from parameters: 4 | test_iter: 1 5 | test_interval: 20 6 | base_lr: 0.001 7 | display: 10 8 | max_iter: 3000 9 | lr_policy: "fixed" 10 | momentum: 0.9 11 | snapshot: 100 12 | snapshot_prefix: "snapshot" 13 | solver_mode: CPU 14 | net: "models/Custom_Model/train.prototxt" 15 | momentum2: 0.999 16 | type: "Adam" 17 | I0309 03:53:08.245106 2039427072 solver.cpp:91] Creating training net from net file: models/Custom_Model/train.prototxt 18 | I0309 03:53:08.245399 2039427072 upgrade_proto.cpp:51] Attempting to upgrade input file specified using deprecated V1LayerParameter: models/Custom_Model/train.prototxt 19 | I0309 03:53:08.245509 2039427072 upgrade_proto.cpp:59] Successfully upgraded file specified using deprecated V1LayerParameter 20 | I0309 03:53:08.245621 2039427072 net.cpp:322] The NetState phase (0) differed from the phase (1) specified by a rule in layer training_test 21 | I0309 03:53:08.245648 2039427072 net.cpp:49] Initializing net from parameters: 22 | name: "CaffeNet" 23 | state { 24 | phase: TRAIN 25 | } 26 | layer { 27 | name: "training_train" 28 | type: "Data" 29 | top: "data" 30 | top: "label" 31 | include { 32 | phase: TRAIN 33 | } 34 | transform_param { 35 | mean_file: "datasets/mean_training_image.binaryproto" 36 | } 37 | data_param { 38 | source: "datasets/training_set_lmdb" 39 | batch_size: 400 40 | backend: LMDB 41 | } 42 | } 43 | layer { 44 | name: "conv1" 45 | type: "Convolution" 46 | bottom: "data" 47 | top: "conv1" 48 | param { 49 | lr_mult: 0 50 | } 51 | param { 52 | lr_mult: 0 53 | } 54 | convolution_param { 55 | num_output: 96 56 | kernel_size: 7 57 | stride: 2 58 | } 59 | } 60 | layer { 61 | name: "relu1" 62 | type: "ReLU" 63 | bottom: "conv1" 64 | top: "conv1" 65 | } 66 | layer { 67 | name: "norm1" 68 | type: "LRN" 69 | bottom: "conv1" 70 | top: "norm1" 71 | lrn_param { 72 | local_size: 5 73 | alpha: 0.0005 74 | beta: 0.75 75 | } 76 | } 77 | layer { 78 | name: "pool1" 79 | type: "Pooling" 80 | bottom: "norm1" 81 | top: "pool1" 82 | pooling_param { 83 | pool: MAX 84 | kernel_size: 3 85 | stride: 3 86 | } 87 | } 88 | layer { 89 | name: "conv2" 90 | type: "Convolution" 91 | bottom: "pool1" 92 | top: "conv2" 93 | param { 94 | lr_mult: 0 95 | } 96 | param { 97 | lr_mult: 0 98 | } 99 | convolution_param { 100 | num_output: 256 101 | pad: 2 102 | kernel_size: 5 103 | } 104 | } 105 | layer { 106 | name: "relu2" 107 | type: "ReLU" 108 | bottom: "conv2" 109 | top: "conv2" 110 | } 111 | layer { 112 | name: "pool2" 113 | type: "Pooling" 114 | bottom: "conv2" 115 | top: "pool2" 116 | pooling_param { 117 | pool: MAX 118 | kernel_size: 2 119 | stride: 2 120 | } 121 | } 122 | layer { 123 | name: "conv3" 124 | type: "Convolution" 125 | bottom: "pool2" 126 | top: "conv3" 127 | param { 128 | lr_mult: 0 129 | } 130 | param { 131 | lr_mult: 0 132 | } 133 | convolution_param { 134 | num_output: 512 135 | pad: 1 136 | kernel_size: 3 137 | } 138 | } 139 | layer { 140 | name: "relu3" 141 | type: "ReLU" 142 | bottom: "conv3" 143 | top: "conv3" 144 | } 145 | layer { 146 | name: "conv4" 147 | type: "Convolution" 148 | bottom: "conv3" 149 | top: "conv4" 150 | param { 151 | lr_mult: 0 152 | } 153 | param { 154 | lr_mult: 0 155 | } 156 | convolution_param { 157 | num_output: 512 158 | pad: 1 159 | kernel_size: 3 160 | } 161 | } 162 | layer { 163 | name: "relu4" 164 | type: "ReLU" 165 | bottom: "conv4" 166 | top: "conv4" 167 | } 168 | layer { 169 | name: "conv5" 170 | type: "Convolution" 171 | bottom: "conv4" 172 | top: "conv5" 173 | param { 174 | lr_mult: 0 175 | } 176 | param { 177 | lr_mult: 0 178 | } 179 | convolution_param { 180 | num_output: 512 181 | pad: 1 182 | kernel_size: 3 183 | } 184 | } 185 | layer { 186 | name: "relu5" 187 | type: "ReLU" 188 | bottom: "conv5" 189 | top: "conv5" 190 | } 191 | layer { 192 | name: "pool5" 193 | type: "Pooling" 194 | bottom: "conv5" 195 | top: "pool5" 196 | pooling_param { 197 | pool: MAX 198 | kernel_size: 3 199 | stride: 3 200 | } 201 | } 202 | layer { 203 | name: "fc6" 204 | type: "InnerProduct" 205 | bottom: "pool5" 206 | top: "fc6" 207 | param { 208 | lr_mult: 1 209 | } 210 | param { 211 | lr_mult: 1 212 | } 213 | inner_product_param { 214 | num_output: 4048 215 | } 216 | } 217 | layer { 218 | name: "relu6" 219 | type: "ReLU" 220 | bottom: "fc6" 221 | top: "fc6" 222 | } 223 | layer { 224 | name: "drop6" 225 | type: "Dropout" 226 | bottom: "fc6" 227 | top: "fc6" 228 | dropout_param { 229 | dropout_ratio: 0.5 230 | } 231 | } 232 | layer { 233 | name: "fc7" 234 | type: "InnerProduct" 235 | bottom: "fc6" 236 | top: "fc7" 237 | param { 238 | lr_mult: 1 239 | } 240 | param { 241 | lr_mult: 1 242 | } 243 | inner_product_param { 244 | num_output: 4048 245 | } 246 | } 247 | layer { 248 | name: "relu7" 249 | type: "ReLU" 250 | bottom: "fc7" 251 | top: "fc7" 252 | } 253 | layer { 254 | name: "drop7" 255 | type: "Dropout" 256 | bottom: "fc7" 257 | top: "fc7" 258 | dropout_param { 259 | dropout_ratio: 0.5 260 | } 261 | } 262 | layer { 263 | name: "fc8_cat" 264 | type: "InnerProduct" 265 | bottom: "fc7" 266 | top: "fc8" 267 | param { 268 | lr_mult: 1 269 | } 270 | param { 271 | lr_mult: 1 272 | } 273 | inner_product_param { 274 | num_output: 7 275 | } 276 | } 277 | layer { 278 | name: "prob" 279 | type: "SoftmaxWithLoss" 280 | bottom: "fc8" 281 | bottom: "label" 282 | } 283 | I0309 03:53:08.245957 2039427072 layer_factory.hpp:77] Creating layer training_train 284 | I0309 03:53:08.251075 2039427072 net.cpp:106] Creating Layer training_train 285 | I0309 03:53:08.251111 2039427072 net.cpp:411] training_train -> data 286 | I0309 03:53:08.251133 2039427072 net.cpp:411] training_train -> label 287 | I0309 03:53:08.251154 2039427072 data_transformer.cpp:25] Loading mean file from: datasets/mean_training_image.binaryproto 288 | I0309 03:53:08.256718 3211264 db_lmdb.cpp:38] Opened lmdb datasets/training_set_lmdb 289 | I0309 03:53:08.256925 2039427072 data_layer.cpp:41] output data size: 400,3,224,224 290 | I0309 03:53:08.591356 2039427072 net.cpp:150] Setting up training_train 291 | I0309 03:53:08.591398 2039427072 net.cpp:157] Top shape: 400 3 224 224 (60211200) 292 | I0309 03:53:08.591416 2039427072 net.cpp:157] Top shape: 400 (400) 293 | I0309 03:53:08.591424 2039427072 net.cpp:165] Memory required for data: 240846400 294 | I0309 03:53:08.591434 2039427072 layer_factory.hpp:77] Creating layer conv1 295 | I0309 03:53:08.591462 2039427072 net.cpp:106] Creating Layer conv1 296 | I0309 03:53:08.591475 2039427072 net.cpp:454] conv1 <- data 297 | I0309 03:53:08.591487 2039427072 net.cpp:411] conv1 -> conv1 298 | I0309 03:53:08.785176 2039427072 net.cpp:150] Setting up conv1 299 | I0309 03:53:08.785208 2039427072 net.cpp:157] Top shape: 400 96 109 109 (456230400) 300 | I0309 03:53:08.785219 2039427072 net.cpp:165] Memory required for data: 2065768000 301 | I0309 03:53:08.785235 2039427072 layer_factory.hpp:77] Creating layer relu1 302 | I0309 03:53:08.785254 2039427072 net.cpp:106] Creating Layer relu1 303 | I0309 03:53:08.785262 2039427072 net.cpp:454] relu1 <- conv1 304 | I0309 03:53:08.785271 2039427072 net.cpp:397] relu1 -> conv1 (in-place) 305 | I0309 03:53:08.785472 2039427072 net.cpp:150] Setting up relu1 306 | I0309 03:53:08.785485 2039427072 net.cpp:157] Top shape: 400 96 109 109 (456230400) 307 | I0309 03:53:08.785495 2039427072 net.cpp:165] Memory required for data: 3890689600 308 | I0309 03:53:08.785501 2039427072 layer_factory.hpp:77] Creating layer norm1 309 | I0309 03:53:08.785516 2039427072 net.cpp:106] Creating Layer norm1 310 | I0309 03:53:08.785522 2039427072 net.cpp:454] norm1 <- conv1 311 | I0309 03:53:08.785533 2039427072 net.cpp:411] norm1 -> norm1 312 | I0309 03:53:08.785784 2039427072 net.cpp:150] Setting up norm1 313 | I0309 03:53:08.785802 2039427072 net.cpp:157] Top shape: 400 96 109 109 (456230400) 314 | I0309 03:53:08.785811 2039427072 net.cpp:165] Memory required for data: 5715611200 315 | I0309 03:53:08.785818 2039427072 layer_factory.hpp:77] Creating layer pool1 316 | I0309 03:53:08.785830 2039427072 net.cpp:106] Creating Layer pool1 317 | I0309 03:53:08.785836 2039427072 net.cpp:454] pool1 <- norm1 318 | I0309 03:53:08.785847 2039427072 net.cpp:411] pool1 -> pool1 319 | I0309 03:53:08.785866 2039427072 net.cpp:150] Setting up pool1 320 | I0309 03:53:08.785872 2039427072 net.cpp:157] Top shape: 400 96 37 37 (52569600) 321 | I0309 03:53:08.785881 2039427072 net.cpp:165] Memory required for data: 5925889600 322 | I0309 03:53:08.785887 2039427072 layer_factory.hpp:77] Creating layer conv2 323 | I0309 03:53:08.785898 2039427072 net.cpp:106] Creating Layer conv2 324 | I0309 03:53:08.785904 2039427072 net.cpp:454] conv2 <- pool1 325 | I0309 03:53:08.785913 2039427072 net.cpp:411] conv2 -> conv2 326 | I0309 03:53:08.787889 2039427072 net.cpp:150] Setting up conv2 327 | I0309 03:53:08.787921 2039427072 net.cpp:157] Top shape: 400 256 37 37 (140185600) 328 | I0309 03:53:08.787930 2039427072 net.cpp:165] Memory required for data: 6486632000 329 | I0309 03:53:08.787945 2039427072 layer_factory.hpp:77] Creating layer relu2 330 | I0309 03:53:08.787957 2039427072 net.cpp:106] Creating Layer relu2 331 | I0309 03:53:08.787964 2039427072 net.cpp:454] relu2 <- conv2 332 | I0309 03:53:08.787973 2039427072 net.cpp:397] relu2 -> conv2 (in-place) 333 | I0309 03:53:08.788177 2039427072 net.cpp:150] Setting up relu2 334 | I0309 03:53:08.788188 2039427072 net.cpp:157] Top shape: 400 256 37 37 (140185600) 335 | I0309 03:53:08.788197 2039427072 net.cpp:165] Memory required for data: 7047374400 336 | I0309 03:53:08.788242 2039427072 layer_factory.hpp:77] Creating layer pool2 337 | I0309 03:53:08.788252 2039427072 net.cpp:106] Creating Layer pool2 338 | I0309 03:53:08.788259 2039427072 net.cpp:454] pool2 <- conv2 339 | I0309 03:53:08.788270 2039427072 net.cpp:411] pool2 -> pool2 340 | I0309 03:53:08.788285 2039427072 net.cpp:150] Setting up pool2 341 | I0309 03:53:08.788290 2039427072 net.cpp:157] Top shape: 400 256 19 19 (36966400) 342 | I0309 03:53:08.788300 2039427072 net.cpp:165] Memory required for data: 7195240000 343 | I0309 03:53:08.788305 2039427072 layer_factory.hpp:77] Creating layer conv3 344 | I0309 03:53:08.788317 2039427072 net.cpp:106] Creating Layer conv3 345 | I0309 03:53:08.788323 2039427072 net.cpp:454] conv3 <- pool2 346 | I0309 03:53:08.788331 2039427072 net.cpp:411] conv3 -> conv3 347 | I0309 03:53:08.791609 2039427072 net.cpp:150] Setting up conv3 348 | I0309 03:53:08.791642 2039427072 net.cpp:157] Top shape: 400 512 19 19 (73932800) 349 | I0309 03:53:08.791652 2039427072 net.cpp:165] Memory required for data: 7490971200 350 | I0309 03:53:08.791667 2039427072 layer_factory.hpp:77] Creating layer relu3 351 | I0309 03:53:08.791683 2039427072 net.cpp:106] Creating Layer relu3 352 | I0309 03:53:08.791692 2039427072 net.cpp:454] relu3 <- conv3 353 | I0309 03:53:08.791702 2039427072 net.cpp:397] relu3 -> conv3 (in-place) 354 | I0309 03:53:08.791836 2039427072 net.cpp:150] Setting up relu3 355 | I0309 03:53:08.791846 2039427072 net.cpp:157] Top shape: 400 512 19 19 (73932800) 356 | I0309 03:53:08.791856 2039427072 net.cpp:165] Memory required for data: 7786702400 357 | I0309 03:53:08.791862 2039427072 layer_factory.hpp:77] Creating layer conv4 358 | I0309 03:53:08.791873 2039427072 net.cpp:106] Creating Layer conv4 359 | I0309 03:53:08.791879 2039427072 net.cpp:454] conv4 <- conv3 360 | I0309 03:53:08.791887 2039427072 net.cpp:411] conv4 -> conv4 361 | I0309 03:53:08.798092 2039427072 net.cpp:150] Setting up conv4 362 | I0309 03:53:08.798127 2039427072 net.cpp:157] Top shape: 400 512 19 19 (73932800) 363 | I0309 03:53:08.798138 2039427072 net.cpp:165] Memory required for data: 8082433600 364 | I0309 03:53:08.798149 2039427072 layer_factory.hpp:77] Creating layer relu4 365 | I0309 03:53:08.798162 2039427072 net.cpp:106] Creating Layer relu4 366 | I0309 03:53:08.798169 2039427072 net.cpp:454] relu4 <- conv4 367 | I0309 03:53:08.798179 2039427072 net.cpp:397] relu4 -> conv4 (in-place) 368 | I0309 03:53:08.798420 2039427072 net.cpp:150] Setting up relu4 369 | I0309 03:53:08.798434 2039427072 net.cpp:157] Top shape: 400 512 19 19 (73932800) 370 | I0309 03:53:08.798441 2039427072 net.cpp:165] Memory required for data: 8378164800 371 | I0309 03:53:08.798449 2039427072 layer_factory.hpp:77] Creating layer conv5 372 | I0309 03:53:08.798460 2039427072 net.cpp:106] Creating Layer conv5 373 | I0309 03:53:08.798466 2039427072 net.cpp:454] conv5 <- conv4 374 | I0309 03:53:08.798475 2039427072 net.cpp:411] conv5 -> conv5 375 | I0309 03:53:08.804391 2039427072 net.cpp:150] Setting up conv5 376 | I0309 03:53:08.804440 2039427072 net.cpp:157] Top shape: 400 512 19 19 (73932800) 377 | I0309 03:53:08.804450 2039427072 net.cpp:165] Memory required for data: 8673896000 378 | I0309 03:53:08.804466 2039427072 layer_factory.hpp:77] Creating layer relu5 379 | I0309 03:53:08.804478 2039427072 net.cpp:106] Creating Layer relu5 380 | I0309 03:53:08.804486 2039427072 net.cpp:454] relu5 <- conv5 381 | I0309 03:53:08.804494 2039427072 net.cpp:397] relu5 -> conv5 (in-place) 382 | I0309 03:53:08.804695 2039427072 net.cpp:150] Setting up relu5 383 | I0309 03:53:08.804708 2039427072 net.cpp:157] Top shape: 400 512 19 19 (73932800) 384 | I0309 03:53:08.804716 2039427072 net.cpp:165] Memory required for data: 8969627200 385 | I0309 03:53:08.804723 2039427072 layer_factory.hpp:77] Creating layer pool5 386 | I0309 03:53:08.804733 2039427072 net.cpp:106] Creating Layer pool5 387 | I0309 03:53:08.804738 2039427072 net.cpp:454] pool5 <- conv5 388 | I0309 03:53:08.804747 2039427072 net.cpp:411] pool5 -> pool5 389 | I0309 03:53:08.804760 2039427072 net.cpp:150] Setting up pool5 390 | I0309 03:53:08.804767 2039427072 net.cpp:157] Top shape: 400 512 7 7 (10035200) 391 | I0309 03:53:08.804774 2039427072 net.cpp:165] Memory required for data: 9009768000 392 | I0309 03:53:08.804781 2039427072 layer_factory.hpp:77] Creating layer fc6 393 | I0309 03:53:08.804792 2039427072 net.cpp:106] Creating Layer fc6 394 | I0309 03:53:08.804836 2039427072 net.cpp:454] fc6 <- pool5 395 | I0309 03:53:08.804847 2039427072 net.cpp:411] fc6 -> fc6 396 | I0309 03:53:09.041837 2039427072 net.cpp:150] Setting up fc6 397 | I0309 03:53:09.041875 2039427072 net.cpp:157] Top shape: 400 4048 (1619200) 398 | I0309 03:53:09.041884 2039427072 net.cpp:165] Memory required for data: 9016244800 399 | I0309 03:53:09.041898 2039427072 layer_factory.hpp:77] Creating layer relu6 400 | I0309 03:53:09.041916 2039427072 net.cpp:106] Creating Layer relu6 401 | I0309 03:53:09.041925 2039427072 net.cpp:454] relu6 <- fc6 402 | I0309 03:53:09.041936 2039427072 net.cpp:397] relu6 -> fc6 (in-place) 403 | I0309 03:53:09.042212 2039427072 net.cpp:150] Setting up relu6 404 | I0309 03:53:09.042230 2039427072 net.cpp:157] Top shape: 400 4048 (1619200) 405 | I0309 03:53:09.042240 2039427072 net.cpp:165] Memory required for data: 9022721600 406 | I0309 03:53:09.042248 2039427072 layer_factory.hpp:77] Creating layer drop6 407 | I0309 03:53:09.042258 2039427072 net.cpp:106] Creating Layer drop6 408 | I0309 03:53:09.042265 2039427072 net.cpp:454] drop6 <- fc6 409 | I0309 03:53:09.042274 2039427072 net.cpp:397] drop6 -> fc6 (in-place) 410 | I0309 03:53:09.042292 2039427072 net.cpp:150] Setting up drop6 411 | I0309 03:53:09.042299 2039427072 net.cpp:157] Top shape: 400 4048 (1619200) 412 | I0309 03:53:09.042307 2039427072 net.cpp:165] Memory required for data: 9029198400 413 | I0309 03:53:09.042314 2039427072 layer_factory.hpp:77] Creating layer fc7 414 | I0309 03:53:09.042322 2039427072 net.cpp:106] Creating Layer fc7 415 | I0309 03:53:09.042330 2039427072 net.cpp:454] fc7 <- fc6 416 | I0309 03:53:09.042337 2039427072 net.cpp:411] fc7 -> fc7 417 | I0309 03:53:09.084827 2039427072 net.cpp:150] Setting up fc7 418 | I0309 03:53:09.084895 2039427072 net.cpp:157] Top shape: 400 4048 (1619200) 419 | I0309 03:53:09.084906 2039427072 net.cpp:165] Memory required for data: 9035675200 420 | I0309 03:53:09.084923 2039427072 layer_factory.hpp:77] Creating layer relu7 421 | I0309 03:53:09.084944 2039427072 net.cpp:106] Creating Layer relu7 422 | I0309 03:53:09.084952 2039427072 net.cpp:454] relu7 <- fc7 423 | I0309 03:53:09.084964 2039427072 net.cpp:397] relu7 -> fc7 (in-place) 424 | I0309 03:53:09.085621 2039427072 net.cpp:150] Setting up relu7 425 | I0309 03:53:09.085659 2039427072 net.cpp:157] Top shape: 400 4048 (1619200) 426 | I0309 03:53:09.085669 2039427072 net.cpp:165] Memory required for data: 9042152000 427 | I0309 03:53:09.085677 2039427072 layer_factory.hpp:77] Creating layer drop7 428 | I0309 03:53:09.085691 2039427072 net.cpp:106] Creating Layer drop7 429 | I0309 03:53:09.085700 2039427072 net.cpp:454] drop7 <- fc7 430 | I0309 03:53:09.085712 2039427072 net.cpp:397] drop7 -> fc7 (in-place) 431 | I0309 03:53:09.085731 2039427072 net.cpp:150] Setting up drop7 432 | I0309 03:53:09.085737 2039427072 net.cpp:157] Top shape: 400 4048 (1619200) 433 | I0309 03:53:09.085746 2039427072 net.cpp:165] Memory required for data: 9048628800 434 | I0309 03:53:09.085752 2039427072 layer_factory.hpp:77] Creating layer fc8_cat 435 | I0309 03:53:09.085767 2039427072 net.cpp:106] Creating Layer fc8_cat 436 | I0309 03:53:09.085774 2039427072 net.cpp:454] fc8_cat <- fc7 437 | I0309 03:53:09.085783 2039427072 net.cpp:411] fc8_cat -> fc8 438 | I0309 03:53:09.085888 2039427072 net.cpp:150] Setting up fc8_cat 439 | I0309 03:53:09.085901 2039427072 net.cpp:157] Top shape: 400 7 (2800) 440 | I0309 03:53:09.085908 2039427072 net.cpp:165] Memory required for data: 9048640000 441 | I0309 03:53:09.085918 2039427072 layer_factory.hpp:77] Creating layer prob 442 | I0309 03:53:09.085928 2039427072 net.cpp:106] Creating Layer prob 443 | I0309 03:53:09.085935 2039427072 net.cpp:454] prob <- fc8 444 | I0309 03:53:09.085942 2039427072 net.cpp:454] prob <- label 445 | I0309 03:53:09.085959 2039427072 net.cpp:411] prob -> (automatic) 446 | I0309 03:53:09.085973 2039427072 layer_factory.hpp:77] Creating layer prob 447 | I0309 03:53:09.086429 2039427072 net.cpp:150] Setting up prob 448 | I0309 03:53:09.086462 2039427072 net.cpp:157] Top shape: (1) 449 | I0309 03:53:09.086469 2039427072 net.cpp:160] with loss weight 1 450 | I0309 03:53:09.086496 2039427072 net.cpp:165] Memory required for data: 9048640004 451 | I0309 03:53:09.086505 2039427072 net.cpp:226] prob needs backward computation. 452 | I0309 03:53:09.086515 2039427072 net.cpp:226] fc8_cat needs backward computation. 453 | I0309 03:53:09.086598 2039427072 net.cpp:226] drop7 needs backward computation. 454 | I0309 03:53:09.086606 2039427072 net.cpp:226] relu7 needs backward computation. 455 | I0309 03:53:09.086614 2039427072 net.cpp:226] fc7 needs backward computation. 456 | I0309 03:53:09.086621 2039427072 net.cpp:226] drop6 needs backward computation. 457 | I0309 03:53:09.086628 2039427072 net.cpp:226] relu6 needs backward computation. 458 | I0309 03:53:09.086634 2039427072 net.cpp:226] fc6 needs backward computation. 459 | I0309 03:53:09.086642 2039427072 net.cpp:228] pool5 does not need backward computation. 460 | I0309 03:53:09.086649 2039427072 net.cpp:228] relu5 does not need backward computation. 461 | I0309 03:53:09.086657 2039427072 net.cpp:228] conv5 does not need backward computation. 462 | I0309 03:53:09.086663 2039427072 net.cpp:228] relu4 does not need backward computation. 463 | I0309 03:53:09.086670 2039427072 net.cpp:228] conv4 does not need backward computation. 464 | I0309 03:53:09.086678 2039427072 net.cpp:228] relu3 does not need backward computation. 465 | I0309 03:53:09.086685 2039427072 net.cpp:228] conv3 does not need backward computation. 466 | I0309 03:53:09.086693 2039427072 net.cpp:228] pool2 does not need backward computation. 467 | I0309 03:53:09.086699 2039427072 net.cpp:228] relu2 does not need backward computation. 468 | I0309 03:53:09.086705 2039427072 net.cpp:228] conv2 does not need backward computation. 469 | I0309 03:53:09.086714 2039427072 net.cpp:228] pool1 does not need backward computation. 470 | I0309 03:53:09.086721 2039427072 net.cpp:228] norm1 does not need backward computation. 471 | I0309 03:53:09.086729 2039427072 net.cpp:228] relu1 does not need backward computation. 472 | I0309 03:53:09.086735 2039427072 net.cpp:228] conv1 does not need backward computation. 473 | I0309 03:53:09.086743 2039427072 net.cpp:228] training_train does not need backward computation. 474 | I0309 03:53:09.086762 2039427072 net.cpp:283] Network initialization done. 475 | I0309 03:53:09.087157 2039427072 upgrade_proto.cpp:51] Attempting to upgrade input file specified using deprecated V1LayerParameter: models/Custom_Model/train.prototxt 476 | I0309 03:53:09.087245 2039427072 upgrade_proto.cpp:59] Successfully upgraded file specified using deprecated V1LayerParameter 477 | I0309 03:53:09.087275 2039427072 solver.cpp:181] Creating test net (#0) specified by net file: models/Custom_Model/train.prototxt 478 | I0309 03:53:09.087309 2039427072 net.cpp:322] The NetState phase (1) differed from the phase (0) specified by a rule in layer training_train 479 | I0309 03:53:09.087327 2039427072 net.cpp:49] Initializing net from parameters: 480 | name: "CaffeNet" 481 | state { 482 | phase: TEST 483 | } 484 | layer { 485 | name: "training_test" 486 | type: "Data" 487 | top: "data" 488 | top: "label" 489 | include { 490 | phase: TEST 491 | } 492 | transform_param { 493 | mean_file: "datasets/mean_training_image.binaryproto" 494 | } 495 | data_param { 496 | source: "datasets/validation_set_lmdb" 497 | batch_size: 14 498 | backend: LMDB 499 | } 500 | } 501 | layer { 502 | name: "conv1" 503 | type: "Convolution" 504 | bottom: "data" 505 | top: "conv1" 506 | param { 507 | lr_mult: 0 508 | } 509 | param { 510 | lr_mult: 0 511 | } 512 | convolution_param { 513 | num_output: 96 514 | kernel_size: 7 515 | stride: 2 516 | } 517 | } 518 | layer { 519 | name: "relu1" 520 | type: "ReLU" 521 | bottom: "conv1" 522 | top: "conv1" 523 | } 524 | layer { 525 | name: "norm1" 526 | type: "LRN" 527 | bottom: "conv1" 528 | top: "norm1" 529 | lrn_param { 530 | local_size: 5 531 | alpha: 0.0005 532 | beta: 0.75 533 | } 534 | } 535 | layer { 536 | name: "pool1" 537 | type: "Pooling" 538 | bottom: "norm1" 539 | top: "pool1" 540 | pooling_param { 541 | pool: MAX 542 | kernel_size: 3 543 | stride: 3 544 | } 545 | } 546 | layer { 547 | name: "conv2" 548 | type: "Convolution" 549 | bottom: "pool1" 550 | top: "conv2" 551 | param { 552 | lr_mult: 0 553 | } 554 | param { 555 | lr_mult: 0 556 | } 557 | convolution_param { 558 | num_output: 256 559 | pad: 2 560 | kernel_size: 5 561 | } 562 | } 563 | layer { 564 | name: "relu2" 565 | type: "ReLU" 566 | bottom: "conv2" 567 | top: "conv2" 568 | } 569 | layer { 570 | name: "pool2" 571 | type: "Pooling" 572 | bottom: "conv2" 573 | top: "pool2" 574 | pooling_param { 575 | pool: MAX 576 | kernel_size: 2 577 | stride: 2 578 | } 579 | } 580 | layer { 581 | name: "conv3" 582 | type: "Convolution" 583 | bottom: "pool2" 584 | top: "conv3" 585 | param { 586 | lr_mult: 0 587 | } 588 | param { 589 | lr_mult: 0 590 | } 591 | convolution_param { 592 | num_output: 512 593 | pad: 1 594 | kernel_size: 3 595 | } 596 | } 597 | layer { 598 | name: "relu3" 599 | type: "ReLU" 600 | bottom: "conv3" 601 | top: "conv3" 602 | } 603 | layer { 604 | name: "conv4" 605 | type: "Convolution" 606 | bottom: "conv3" 607 | top: "conv4" 608 | param { 609 | lr_mult: 0 610 | } 611 | param { 612 | lr_mult: 0 613 | } 614 | convolution_param { 615 | num_output: 512 616 | pad: 1 617 | kernel_size: 3 618 | } 619 | } 620 | layer { 621 | name: "relu4" 622 | type: "ReLU" 623 | bottom: "conv4" 624 | top: "conv4" 625 | } 626 | layer { 627 | name: "conv5" 628 | type: "Convolution" 629 | bottom: "conv4" 630 | top: "conv5" 631 | param { 632 | lr_mult: 0 633 | } 634 | param { 635 | lr_mult: 0 636 | } 637 | convolution_param { 638 | num_output: 512 639 | pad: 1 640 | kernel_size: 3 641 | } 642 | } 643 | layer { 644 | name: "relu5" 645 | type: "ReLU" 646 | bottom: "conv5" 647 | top: "conv5" 648 | } 649 | layer { 650 | name: "pool5" 651 | type: "Pooling" 652 | bottom: "conv5" 653 | top: "pool5" 654 | pooling_param { 655 | pool: MAX 656 | kernel_size: 3 657 | stride: 3 658 | } 659 | } 660 | layer { 661 | name: "fc6" 662 | type: "InnerProduct" 663 | bottom: "pool5" 664 | top: "fc6" 665 | param { 666 | lr_mult: 1 667 | } 668 | param { 669 | lr_mult: 1 670 | } 671 | inner_product_param { 672 | num_output: 4048 673 | } 674 | } 675 | layer { 676 | name: "relu6" 677 | type: "ReLU" 678 | bottom: "fc6" 679 | top: "fc6" 680 | } 681 | layer { 682 | name: "drop6" 683 | type: "Dropout" 684 | bottom: "fc6" 685 | top: "fc6" 686 | dropout_param { 687 | dropout_ratio: 0.5 688 | } 689 | } 690 | layer { 691 | name: "fc7" 692 | type: "InnerProduct" 693 | bottom: "fc6" 694 | top: "fc7" 695 | param { 696 | lr_mult: 1 697 | } 698 | param { 699 | lr_mult: 1 700 | } 701 | inner_product_param { 702 | num_output: 4048 703 | } 704 | } 705 | layer { 706 | name: "relu7" 707 | type: "ReLU" 708 | bottom: "fc7" 709 | top: "fc7" 710 | } 711 | layer { 712 | name: "drop7" 713 | type: "Dropout" 714 | bottom: "fc7" 715 | top: "fc7" 716 | dropout_param { 717 | dropout_ratio: 0.5 718 | } 719 | } 720 | layer { 721 | name: "fc8_cat" 722 | type: "InnerProduct" 723 | bottom: "fc7" 724 | top: "fc8" 725 | param { 726 | lr_mult: 1 727 | } 728 | param { 729 | lr_mult: 1 730 | } 731 | inner_product_param { 732 | num_output: 7 733 | } 734 | } 735 | layer { 736 | name: "prob" 737 | type: "SoftmaxWithLoss" 738 | bottom: "fc8" 739 | bottom: "label" 740 | } 741 | I0309 03:53:09.087695 2039427072 layer_factory.hpp:77] Creating layer training_test 742 | I0309 03:53:09.087796 2039427072 net.cpp:106] Creating Layer training_test 743 | I0309 03:53:09.087816 2039427072 net.cpp:411] training_test -> data 744 | I0309 03:53:09.087839 2039427072 net.cpp:411] training_test -> label 745 | I0309 03:53:09.087859 2039427072 data_transformer.cpp:25] Loading mean file from: datasets/mean_training_image.binaryproto 746 | I0309 03:53:09.094477 4284416 db_lmdb.cpp:38] Opened lmdb datasets/validation_set_lmdb 747 | I0309 03:53:09.094655 2039427072 data_layer.cpp:41] output data size: 14,3,224,224 748 | I0309 03:53:09.111527 2039427072 net.cpp:150] Setting up training_test 749 | I0309 03:53:09.111560 2039427072 net.cpp:157] Top shape: 14 3 224 224 (2107392) 750 | I0309 03:53:09.111572 2039427072 net.cpp:157] Top shape: 14 (14) 751 | I0309 03:53:09.111579 2039427072 net.cpp:165] Memory required for data: 8429624 752 | I0309 03:53:09.111589 2039427072 layer_factory.hpp:77] Creating layer conv1 753 | I0309 03:53:09.111608 2039427072 net.cpp:106] Creating Layer conv1 754 | I0309 03:53:09.111615 2039427072 net.cpp:454] conv1 <- data 755 | I0309 03:53:09.111627 2039427072 net.cpp:411] conv1 -> conv1 756 | I0309 03:53:09.112519 2039427072 net.cpp:150] Setting up conv1 757 | I0309 03:53:09.112540 2039427072 net.cpp:157] Top shape: 14 96 109 109 (15968064) 758 | I0309 03:53:09.112550 2039427072 net.cpp:165] Memory required for data: 72301880 759 | I0309 03:53:09.112562 2039427072 layer_factory.hpp:77] Creating layer relu1 760 | I0309 03:53:09.112574 2039427072 net.cpp:106] Creating Layer relu1 761 | I0309 03:53:09.112581 2039427072 net.cpp:454] relu1 <- conv1 762 | I0309 03:53:09.112610 2039427072 net.cpp:397] relu1 -> conv1 (in-place) 763 | I0309 03:53:09.112915 2039427072 net.cpp:150] Setting up relu1 764 | I0309 03:53:09.112936 2039427072 net.cpp:157] Top shape: 14 96 109 109 (15968064) 765 | I0309 03:53:09.112952 2039427072 net.cpp:165] Memory required for data: 136174136 766 | I0309 03:53:09.112965 2039427072 layer_factory.hpp:77] Creating layer norm1 767 | I0309 03:53:09.112992 2039427072 net.cpp:106] Creating Layer norm1 768 | I0309 03:53:09.113006 2039427072 net.cpp:454] norm1 <- conv1 769 | I0309 03:53:09.113023 2039427072 net.cpp:411] norm1 -> norm1 770 | I0309 03:53:09.113227 2039427072 net.cpp:150] Setting up norm1 771 | I0309 03:53:09.113243 2039427072 net.cpp:157] Top shape: 14 96 109 109 (15968064) 772 | I0309 03:53:09.113289 2039427072 net.cpp:165] Memory required for data: 200046392 773 | I0309 03:53:09.113297 2039427072 layer_factory.hpp:77] Creating layer pool1 774 | I0309 03:53:09.113312 2039427072 net.cpp:106] Creating Layer pool1 775 | I0309 03:53:09.113318 2039427072 net.cpp:454] pool1 <- norm1 776 | I0309 03:53:09.113327 2039427072 net.cpp:411] pool1 -> pool1 777 | I0309 03:53:09.113342 2039427072 net.cpp:150] Setting up pool1 778 | I0309 03:53:09.113348 2039427072 net.cpp:157] Top shape: 14 96 37 37 (1839936) 779 | I0309 03:53:09.113355 2039427072 net.cpp:165] Memory required for data: 207406136 780 | I0309 03:53:09.113361 2039427072 layer_factory.hpp:77] Creating layer conv2 781 | I0309 03:53:09.113371 2039427072 net.cpp:106] Creating Layer conv2 782 | I0309 03:53:09.113378 2039427072 net.cpp:454] conv2 <- pool1 783 | I0309 03:53:09.113385 2039427072 net.cpp:411] conv2 -> conv2 784 | I0309 03:53:09.115545 2039427072 net.cpp:150] Setting up conv2 785 | I0309 03:53:09.115577 2039427072 net.cpp:157] Top shape: 14 256 37 37 (4906496) 786 | I0309 03:53:09.115587 2039427072 net.cpp:165] Memory required for data: 227032120 787 | I0309 03:53:09.115602 2039427072 layer_factory.hpp:77] Creating layer relu2 788 | I0309 03:53:09.115618 2039427072 net.cpp:106] Creating Layer relu2 789 | I0309 03:53:09.115627 2039427072 net.cpp:454] relu2 <- conv2 790 | I0309 03:53:09.115635 2039427072 net.cpp:397] relu2 -> conv2 (in-place) 791 | I0309 03:53:09.115955 2039427072 net.cpp:150] Setting up relu2 792 | I0309 03:53:09.115969 2039427072 net.cpp:157] Top shape: 14 256 37 37 (4906496) 793 | I0309 03:53:09.115978 2039427072 net.cpp:165] Memory required for data: 246658104 794 | I0309 03:53:09.115984 2039427072 layer_factory.hpp:77] Creating layer pool2 795 | I0309 03:53:09.115993 2039427072 net.cpp:106] Creating Layer pool2 796 | I0309 03:53:09.116001 2039427072 net.cpp:454] pool2 <- conv2 797 | I0309 03:53:09.116009 2039427072 net.cpp:411] pool2 -> pool2 798 | I0309 03:53:09.116024 2039427072 net.cpp:150] Setting up pool2 799 | I0309 03:53:09.116030 2039427072 net.cpp:157] Top shape: 14 256 19 19 (1293824) 800 | I0309 03:53:09.116037 2039427072 net.cpp:165] Memory required for data: 251833400 801 | I0309 03:53:09.116044 2039427072 layer_factory.hpp:77] Creating layer conv3 802 | I0309 03:53:09.116080 2039427072 net.cpp:106] Creating Layer conv3 803 | I0309 03:53:09.116117 2039427072 net.cpp:454] conv3 <- pool2 804 | I0309 03:53:09.116130 2039427072 net.cpp:411] conv3 -> conv3 805 | I0309 03:53:09.120031 2039427072 net.cpp:150] Setting up conv3 806 | I0309 03:53:09.120069 2039427072 net.cpp:157] Top shape: 14 512 19 19 (2587648) 807 | I0309 03:53:09.120085 2039427072 net.cpp:165] Memory required for data: 262183992 808 | I0309 03:53:09.120107 2039427072 layer_factory.hpp:77] Creating layer relu3 809 | I0309 03:53:09.120146 2039427072 net.cpp:106] Creating Layer relu3 810 | I0309 03:53:09.120162 2039427072 net.cpp:454] relu3 <- conv3 811 | I0309 03:53:09.120177 2039427072 net.cpp:397] relu3 -> conv3 (in-place) 812 | I0309 03:53:09.120499 2039427072 net.cpp:150] Setting up relu3 813 | I0309 03:53:09.120517 2039427072 net.cpp:157] Top shape: 14 512 19 19 (2587648) 814 | I0309 03:53:09.120530 2039427072 net.cpp:165] Memory required for data: 272534584 815 | I0309 03:53:09.120543 2039427072 layer_factory.hpp:77] Creating layer conv4 816 | I0309 03:53:09.120565 2039427072 net.cpp:106] Creating Layer conv4 817 | I0309 03:53:09.120579 2039427072 net.cpp:454] conv4 <- conv3 818 | I0309 03:53:09.120594 2039427072 net.cpp:411] conv4 -> conv4 819 | I0309 03:53:09.132443 2039427072 net.cpp:150] Setting up conv4 820 | I0309 03:53:09.132483 2039427072 net.cpp:157] Top shape: 14 512 19 19 (2587648) 821 | I0309 03:53:09.132498 2039427072 net.cpp:165] Memory required for data: 282885176 822 | I0309 03:53:09.132518 2039427072 layer_factory.hpp:77] Creating layer relu4 823 | I0309 03:53:09.132537 2039427072 net.cpp:106] Creating Layer relu4 824 | I0309 03:53:09.132550 2039427072 net.cpp:454] relu4 <- conv4 825 | I0309 03:53:09.132570 2039427072 net.cpp:397] relu4 -> conv4 (in-place) 826 | I0309 03:53:09.134182 2039427072 net.cpp:150] Setting up relu4 827 | I0309 03:53:09.134212 2039427072 net.cpp:157] Top shape: 14 512 19 19 (2587648) 828 | I0309 03:53:09.134227 2039427072 net.cpp:165] Memory required for data: 293235768 829 | I0309 03:53:09.134241 2039427072 layer_factory.hpp:77] Creating layer conv5 830 | I0309 03:53:09.134313 2039427072 net.cpp:106] Creating Layer conv5 831 | I0309 03:53:09.134327 2039427072 net.cpp:454] conv5 <- conv4 832 | I0309 03:53:09.134346 2039427072 net.cpp:411] conv5 -> conv5 833 | I0309 03:53:09.144678 2039427072 net.cpp:150] Setting up conv5 834 | I0309 03:53:09.144711 2039427072 net.cpp:157] Top shape: 14 512 19 19 (2587648) 835 | I0309 03:53:09.144721 2039427072 net.cpp:165] Memory required for data: 303586360 836 | I0309 03:53:09.144736 2039427072 layer_factory.hpp:77] Creating layer relu5 837 | I0309 03:53:09.144750 2039427072 net.cpp:106] Creating Layer relu5 838 | I0309 03:53:09.144758 2039427072 net.cpp:454] relu5 <- conv5 839 | I0309 03:53:09.144767 2039427072 net.cpp:397] relu5 -> conv5 (in-place) 840 | I0309 03:53:09.146145 2039427072 net.cpp:150] Setting up relu5 841 | I0309 03:53:09.146160 2039427072 net.cpp:157] Top shape: 14 512 19 19 (2587648) 842 | I0309 03:53:09.146167 2039427072 net.cpp:165] Memory required for data: 313936952 843 | I0309 03:53:09.146174 2039427072 layer_factory.hpp:77] Creating layer pool5 844 | I0309 03:53:09.146186 2039427072 net.cpp:106] Creating Layer pool5 845 | I0309 03:53:09.146193 2039427072 net.cpp:454] pool5 <- conv5 846 | I0309 03:53:09.146203 2039427072 net.cpp:411] pool5 -> pool5 847 | I0309 03:53:09.146217 2039427072 net.cpp:150] Setting up pool5 848 | I0309 03:53:09.146224 2039427072 net.cpp:157] Top shape: 14 512 7 7 (351232) 849 | I0309 03:53:09.146232 2039427072 net.cpp:165] Memory required for data: 315341880 850 | I0309 03:53:09.146239 2039427072 layer_factory.hpp:77] Creating layer fc6 851 | I0309 03:53:09.146250 2039427072 net.cpp:106] Creating Layer fc6 852 | I0309 03:53:09.146256 2039427072 net.cpp:454] fc6 <- pool5 853 | I0309 03:53:09.146265 2039427072 net.cpp:411] fc6 -> fc6 854 | I0309 03:53:09.377846 2039427072 net.cpp:150] Setting up fc6 855 | I0309 03:53:09.377890 2039427072 net.cpp:157] Top shape: 14 4048 (56672) 856 | I0309 03:53:09.377908 2039427072 net.cpp:165] Memory required for data: 315568568 857 | I0309 03:53:09.377920 2039427072 layer_factory.hpp:77] Creating layer relu6 858 | I0309 03:53:09.377935 2039427072 net.cpp:106] Creating Layer relu6 859 | I0309 03:53:09.377943 2039427072 net.cpp:454] relu6 <- fc6 860 | I0309 03:53:09.377962 2039427072 net.cpp:397] relu6 -> fc6 (in-place) 861 | I0309 03:53:09.378199 2039427072 net.cpp:150] Setting up relu6 862 | I0309 03:53:09.378211 2039427072 net.cpp:157] Top shape: 14 4048 (56672) 863 | I0309 03:53:09.378228 2039427072 net.cpp:165] Memory required for data: 315795256 864 | I0309 03:53:09.378234 2039427072 layer_factory.hpp:77] Creating layer drop6 865 | I0309 03:53:09.378244 2039427072 net.cpp:106] Creating Layer drop6 866 | I0309 03:53:09.378250 2039427072 net.cpp:454] drop6 <- fc6 867 | I0309 03:53:09.378258 2039427072 net.cpp:397] drop6 -> fc6 (in-place) 868 | I0309 03:53:09.378269 2039427072 net.cpp:150] Setting up drop6 869 | I0309 03:53:09.378275 2039427072 net.cpp:157] Top shape: 14 4048 (56672) 870 | I0309 03:53:09.378283 2039427072 net.cpp:165] Memory required for data: 316021944 871 | I0309 03:53:09.378298 2039427072 layer_factory.hpp:77] Creating layer fc7 872 | I0309 03:53:09.378316 2039427072 net.cpp:106] Creating Layer fc7 873 | I0309 03:53:09.378324 2039427072 net.cpp:454] fc7 <- fc6 874 | I0309 03:53:09.378335 2039427072 net.cpp:411] fc7 -> fc7 875 | I0309 03:53:09.411770 2039427072 net.cpp:150] Setting up fc7 876 | I0309 03:53:09.411813 2039427072 net.cpp:157] Top shape: 14 4048 (56672) 877 | I0309 03:53:09.411830 2039427072 net.cpp:165] Memory required for data: 316248632 878 | I0309 03:53:09.411842 2039427072 layer_factory.hpp:77] Creating layer relu7 879 | I0309 03:53:09.411854 2039427072 net.cpp:106] Creating Layer relu7 880 | I0309 03:53:09.411860 2039427072 net.cpp:454] relu7 <- fc7 881 | I0309 03:53:09.411870 2039427072 net.cpp:397] relu7 -> fc7 (in-place) 882 | I0309 03:53:09.412672 2039427072 net.cpp:150] Setting up relu7 883 | I0309 03:53:09.412696 2039427072 net.cpp:157] Top shape: 14 4048 (56672) 884 | I0309 03:53:09.413028 2039427072 net.cpp:165] Memory required for data: 316475320 885 | I0309 03:53:09.413048 2039427072 layer_factory.hpp:77] Creating layer drop7 886 | I0309 03:53:09.413380 2039427072 net.cpp:106] Creating Layer drop7 887 | I0309 03:53:09.413771 2039427072 net.cpp:454] drop7 <- fc7 888 | I0309 03:53:09.414346 2039427072 net.cpp:397] drop7 -> fc7 (in-place) 889 | I0309 03:53:09.414511 2039427072 net.cpp:150] Setting up drop7 890 | I0309 03:53:09.414527 2039427072 net.cpp:157] Top shape: 14 4048 (56672) 891 | I0309 03:53:09.414681 2039427072 net.cpp:165] Memory required for data: 316702008 892 | I0309 03:53:09.414700 2039427072 layer_factory.hpp:77] Creating layer fc8_cat 893 | I0309 03:53:09.414713 2039427072 net.cpp:106] Creating Layer fc8_cat 894 | I0309 03:53:09.415029 2039427072 net.cpp:454] fc8_cat <- fc7 895 | I0309 03:53:09.415053 2039427072 net.cpp:411] fc8_cat -> fc8 896 | I0309 03:53:09.416302 2039427072 net.cpp:150] Setting up fc8_cat 897 | I0309 03:53:09.416688 2039427072 net.cpp:157] Top shape: 14 7 (98) 898 | I0309 03:53:09.416955 2039427072 net.cpp:165] Memory required for data: 316702400 899 | I0309 03:53:09.417032 2039427072 layer_factory.hpp:77] Creating layer prob 900 | I0309 03:53:09.417198 2039427072 net.cpp:106] Creating Layer prob 901 | I0309 03:53:09.417228 2039427072 net.cpp:454] prob <- fc8 902 | I0309 03:53:09.417246 2039427072 net.cpp:454] prob <- label 903 | I0309 03:53:09.417263 2039427072 net.cpp:411] prob -> (automatic) 904 | I0309 03:53:09.417280 2039427072 layer_factory.hpp:77] Creating layer prob 905 | I0309 03:53:09.417740 2039427072 net.cpp:150] Setting up prob 906 | I0309 03:53:09.417764 2039427072 net.cpp:157] Top shape: (1) 907 | I0309 03:53:09.417773 2039427072 net.cpp:160] with loss weight 1 908 | I0309 03:53:09.417788 2039427072 net.cpp:165] Memory required for data: 316702404 909 | I0309 03:53:09.417800 2039427072 net.cpp:226] prob needs backward computation. 910 | I0309 03:53:09.417810 2039427072 net.cpp:226] fc8_cat needs backward computation. 911 | I0309 03:53:09.417817 2039427072 net.cpp:226] drop7 needs backward computation. 912 | I0309 03:53:09.417824 2039427072 net.cpp:226] relu7 needs backward computation. 913 | I0309 03:53:09.417830 2039427072 net.cpp:226] fc7 needs backward computation. 914 | I0309 03:53:09.417836 2039427072 net.cpp:226] drop6 needs backward computation. 915 | I0309 03:53:09.417845 2039427072 net.cpp:226] relu6 needs backward computation. 916 | I0309 03:53:09.417851 2039427072 net.cpp:226] fc6 needs backward computation. 917 | I0309 03:53:09.417858 2039427072 net.cpp:228] pool5 does not need backward computation. 918 | I0309 03:53:09.417865 2039427072 net.cpp:228] relu5 does not need backward computation. 919 | I0309 03:53:09.417872 2039427072 net.cpp:228] conv5 does not need backward computation. 920 | I0309 03:53:09.417878 2039427072 net.cpp:228] relu4 does not need backward computation. 921 | I0309 03:53:09.417886 2039427072 net.cpp:228] conv4 does not need backward computation. 922 | I0309 03:53:09.417892 2039427072 net.cpp:228] relu3 does not need backward computation. 923 | I0309 03:53:09.417899 2039427072 net.cpp:228] conv3 does not need backward computation. 924 | I0309 03:53:09.417906 2039427072 net.cpp:228] pool2 does not need backward computation. 925 | I0309 03:53:09.417913 2039427072 net.cpp:228] relu2 does not need backward computation. 926 | I0309 03:53:09.417920 2039427072 net.cpp:228] conv2 does not need backward computation. 927 | I0309 03:53:09.417927 2039427072 net.cpp:228] pool1 does not need backward computation. 928 | I0309 03:53:09.417934 2039427072 net.cpp:228] norm1 does not need backward computation. 929 | I0309 03:53:09.417942 2039427072 net.cpp:228] relu1 does not need backward computation. 930 | I0309 03:53:09.417948 2039427072 net.cpp:228] conv1 does not need backward computation. 931 | I0309 03:53:09.417955 2039427072 net.cpp:228] training_test does not need backward computation. 932 | I0309 03:53:09.417970 2039427072 net.cpp:283] Network initialization done. 933 | I0309 03:53:09.418092 2039427072 solver.cpp:60] Solver scaffolding done. 934 | I0309 03:53:09.418184 2039427072 caffe.cpp:129] Finetuning from models/Custom_Model/EmotiW_VGG_S.caffemodel 935 | I0309 03:53:11.228819 2039427072 net.cpp:816] Ignoring source layer training_train 936 | I0309 03:53:11.363332 2039427072 caffe.cpp:219] Starting Optimization 937 | I0309 03:53:11.363370 2039427072 solver.cpp:280] Solving CaffeNet 938 | I0309 03:53:11.363387 2039427072 solver.cpp:281] Learning Rate Policy: fixed 939 | I0309 03:53:11.586057 2039427072 solver.cpp:338] Iteration 0, Testing net (#0) 940 | I0309 03:53:11.586123 2039427072 net.cpp:748] Ignoring source layer training_train 941 | I0309 03:54:08.447562 2039427072 solver.cpp:229] Iteration 0, loss = 0.122531 942 | I0309 03:54:08.449357 2039427072 sgd_solver.cpp:106] Iteration 0, lr = 0.001 943 | I0309 04:03:04.910650 2039427072 solver.cpp:229] Iteration 10, loss = 0.100578 944 | I0309 04:03:04.911058 2039427072 sgd_solver.cpp:106] Iteration 10, lr = 0.001 945 | I0309 04:11:43.014382 2039427072 solver.cpp:338] Iteration 20, Testing net (#0) 946 | I0309 04:11:43.020212 2039427072 net.cpp:748] Ignoring source layer training_train 947 | I0309 04:12:35.892045 2039427072 solver.cpp:229] Iteration 20, loss = 0.741465 948 | I0309 04:12:35.892329 2039427072 sgd_solver.cpp:106] Iteration 20, lr = 0.001 949 | I0309 04:20:43.769124 2039427072 solver.cpp:229] Iteration 30, loss = 0.874817 950 | I0309 04:20:43.771332 2039427072 sgd_solver.cpp:106] Iteration 30, lr = 0.001 951 | I0309 04:27:49.382239 2039427072 solver.cpp:338] Iteration 40, Testing net (#0) 952 | I0309 04:27:49.385661 2039427072 net.cpp:748] Ignoring source layer training_train 953 | I0309 04:28:36.873368 2039427072 solver.cpp:229] Iteration 40, loss = 0.0139934 954 | I0309 04:28:36.873745 2039427072 sgd_solver.cpp:106] Iteration 40, lr = 0.001 955 | I0309 04:36:20.586778 2039427072 solver.cpp:229] Iteration 50, loss = 0.0296144 956 | I0309 04:36:20.588299 2039427072 sgd_solver.cpp:106] Iteration 50, lr = 0.001 957 | I0309 04:43:18.206178 2039427072 solver.cpp:338] Iteration 60, Testing net (#0) 958 | I0309 04:43:18.207828 2039427072 net.cpp:748] Ignoring source layer training_train 959 | I0309 04:44:04.991914 2039427072 solver.cpp:229] Iteration 60, loss = 1.1115 960 | I0309 04:44:04.993556 2039427072 sgd_solver.cpp:106] Iteration 60, lr = 0.001 961 | I0309 04:51:43.962283 2039427072 solver.cpp:229] Iteration 70, loss = 0.0305976 962 | I0309 04:51:43.964499 2039427072 sgd_solver.cpp:106] Iteration 70, lr = 0.001 963 | I0309 04:58:38.608314 2039427072 solver.cpp:338] Iteration 80, Testing net (#0) 964 | I0309 04:58:38.609947 2039427072 net.cpp:748] Ignoring source layer training_train 965 | I0309 04:59:24.513803 2039427072 solver.cpp:229] Iteration 80, loss = 0.0297828 966 | I0309 04:59:24.513872 2039427072 sgd_solver.cpp:106] Iteration 80, lr = 0.001 967 | I0309 05:07:04.973184 2039427072 solver.cpp:229] Iteration 90, loss = 0.0570686 968 | I0309 05:07:04.975365 2039427072 sgd_solver.cpp:106] Iteration 90, lr = 0.001 969 | I0309 05:14:03.793534 2039427072 solver.cpp:456] Snapshotting to binary proto file snapshot_iter_100.caffemodel 970 | I0309 05:14:10.611331 2039427072 sgd_solver.cpp:273] Snapshotting solver state to binary proto file snapshot_iter_100.solverstate 971 | I0309 05:14:15.523087 2039427072 solver.cpp:338] Iteration 100, Testing net (#0) 972 | I0309 05:14:15.523134 2039427072 net.cpp:748] Ignoring source layer training_train 973 | I0309 05:15:05.115272 2039427072 solver.cpp:229] Iteration 100, loss = 0.0503929 974 | I0309 05:15:05.118124 2039427072 sgd_solver.cpp:106] Iteration 100, lr = 0.001 975 | I0309 05:22:44.440035 2039427072 solver.cpp:229] Iteration 110, loss = 0.000416442 976 | I0309 05:22:44.441722 2039427072 sgd_solver.cpp:106] Iteration 110, lr = 0.001 977 | I0309 05:29:40.074200 2039427072 solver.cpp:338] Iteration 120, Testing net (#0) 978 | I0309 05:29:40.075846 2039427072 net.cpp:748] Ignoring source layer training_train 979 | I0309 05:30:26.350479 2039427072 solver.cpp:229] Iteration 120, loss = 0.0292876 980 | I0309 05:30:26.352140 2039427072 sgd_solver.cpp:106] Iteration 120, lr = 0.001 981 | I0309 05:38:12.074432 2039427072 solver.cpp:229] Iteration 130, loss = 0.0954089 982 | I0309 05:38:12.076714 2039427072 sgd_solver.cpp:106] Iteration 130, lr = 0.001 983 | I0309 05:45:10.769503 2039427072 solver.cpp:338] Iteration 140, Testing net (#0) 984 | I0309 05:45:10.771693 2039427072 net.cpp:748] Ignoring source layer training_train 985 | I0309 05:45:59.169849 2039427072 solver.cpp:229] Iteration 140, loss = 0.00305188 986 | I0309 05:45:59.171521 2039427072 sgd_solver.cpp:106] Iteration 140, lr = 0.001 987 | I0309 05:53:39.295989 2039427072 solver.cpp:229] Iteration 150, loss = 0.0179188 988 | I0309 05:53:39.297720 2039427072 sgd_solver.cpp:106] Iteration 150, lr = 0.001 989 | I0309 06:00:35.618237 2039427072 solver.cpp:338] Iteration 160, Testing net (#0) 990 | I0309 06:00:35.619884 2039427072 net.cpp:748] Ignoring source layer training_train 991 | I0309 06:01:21.353274 2039427072 solver.cpp:229] Iteration 160, loss = 0.123599 992 | I0309 06:01:21.353348 2039427072 sgd_solver.cpp:106] Iteration 160, lr = 0.001 993 | I0309 06:09:03.010154 2039427072 solver.cpp:229] Iteration 170, loss = 0.0274998 994 | I0309 06:09:03.011798 2039427072 sgd_solver.cpp:106] Iteration 170, lr = 0.001 995 | I0309 06:16:01.219427 2039427072 solver.cpp:338] Iteration 180, Testing net (#0) 996 | I0309 06:16:01.221066 2039427072 net.cpp:748] Ignoring source layer training_train 997 | I0309 06:16:47.895036 2039427072 solver.cpp:229] Iteration 180, loss = 0.0670092 998 | I0309 06:16:47.895108 2039427072 sgd_solver.cpp:106] Iteration 180, lr = 0.001 999 | I0309 06:24:27.334940 2039427072 solver.cpp:229] Iteration 190, loss = 0.137421 1000 | I0309 06:24:27.336590 2039427072 sgd_solver.cpp:106] Iteration 190, lr = 0.001 1001 | I0309 06:31:23.539767 2039427072 solver.cpp:456] Snapshotting to binary proto file snapshot_iter_200.caffemodel 1002 | I0309 06:31:30.569471 2039427072 sgd_solver.cpp:273] Snapshotting solver state to binary proto file snapshot_iter_200.solverstate 1003 | I0309 06:31:35.590631 2039427072 solver.cpp:338] Iteration 200, Testing net (#0) 1004 | I0309 06:31:35.590675 2039427072 net.cpp:748] Ignoring source layer training_train 1005 | I0309 06:32:24.506901 2039427072 solver.cpp:229] Iteration 200, loss = 0.00947549 1006 | I0309 06:32:24.509032 2039427072 sgd_solver.cpp:106] Iteration 200, lr = 0.001 1007 | I0309 06:40:05.798178 2039427072 solver.cpp:229] Iteration 210, loss = 0.000621959 1008 | I0309 06:40:05.800447 2039427072 sgd_solver.cpp:106] Iteration 210, lr = 0.001 1009 | I0309 06:47:01.988184 2039427072 solver.cpp:338] Iteration 220, Testing net (#0) 1010 | I0309 06:47:01.989845 2039427072 net.cpp:748] Ignoring source layer training_train 1011 | I0309 06:47:48.538092 2039427072 solver.cpp:229] Iteration 220, loss = 0.010307 1012 | I0309 06:47:48.538161 2039427072 sgd_solver.cpp:106] Iteration 220, lr = 0.001 1013 | I0309 06:55:27.401527 2039427072 solver.cpp:229] Iteration 230, loss = 1.19209e-06 1014 | I0309 06:55:27.403167 2039427072 sgd_solver.cpp:106] Iteration 230, lr = 0.001 1015 | I0309 07:02:23.158701 2039427072 solver.cpp:338] Iteration 240, Testing net (#0) 1016 | I0309 07:02:23.160835 2039427072 net.cpp:748] Ignoring source layer training_train 1017 | I0309 07:03:08.996206 2039427072 solver.cpp:229] Iteration 240, loss = 0.00566178 1018 | I0309 07:03:08.996301 2039427072 sgd_solver.cpp:106] Iteration 240, lr = 0.001 1019 | I0309 07:10:51.816609 2039427072 solver.cpp:229] Iteration 250, loss = 0.0224584 1020 | I0309 07:10:51.818809 2039427072 sgd_solver.cpp:106] Iteration 250, lr = 0.001 1021 | I0309 07:17:48.533710 2039427072 solver.cpp:338] Iteration 260, Testing net (#0) 1022 | I0309 07:17:48.535917 2039427072 net.cpp:748] Ignoring source layer training_train 1023 | I0309 07:18:34.476519 2039427072 solver.cpp:229] Iteration 260, loss = 0.133503 1024 | I0309 07:18:34.478127 2039427072 sgd_solver.cpp:106] Iteration 260, lr = 0.001 1025 | I0309 07:26:14.051692 2039427072 solver.cpp:229] Iteration 270, loss = 0.000508897 1026 | I0309 07:26:14.053835 2039427072 sgd_solver.cpp:106] Iteration 270, lr = 0.001 1027 | I0309 07:33:09.545931 2039427072 solver.cpp:338] Iteration 280, Testing net (#0) 1028 | I0309 07:33:09.547595 2039427072 net.cpp:748] Ignoring source layer training_train 1029 | I0309 07:33:55.813336 2039427072 solver.cpp:229] Iteration 280, loss = 0.0371675 1030 | I0309 07:33:55.813410 2039427072 sgd_solver.cpp:106] Iteration 280, lr = 0.001 1031 | I0309 07:41:37.257357 2039427072 solver.cpp:229] Iteration 290, loss = 0.0953428 1032 | I0309 07:41:37.259615 2039427072 sgd_solver.cpp:106] Iteration 290, lr = 0.001 1033 | I0309 07:48:43.921154 2039427072 solver.cpp:456] Snapshotting to binary proto file snapshot_iter_300.caffemodel 1034 | I0309 07:48:50.749795 2039427072 sgd_solver.cpp:273] Snapshotting solver state to binary proto file snapshot_iter_300.solverstate 1035 | I0309 07:48:55.835794 2039427072 solver.cpp:338] Iteration 300, Testing net (#0) 1036 | I0309 07:48:55.835839 2039427072 net.cpp:748] Ignoring source layer training_train 1037 | I0309 07:49:45.041051 2039427072 solver.cpp:229] Iteration 300, loss = 0.104563 1038 | I0309 07:49:45.042726 2039427072 sgd_solver.cpp:106] Iteration 300, lr = 0.001 1039 | I0309 07:57:26.111398 2039427072 solver.cpp:229] Iteration 310, loss = 0.00431097 1040 | I0309 07:57:26.113517 2039427072 sgd_solver.cpp:106] Iteration 310, lr = 0.001 1041 | I0309 08:04:23.465739 2039427072 solver.cpp:338] Iteration 320, Testing net (#0) 1042 | I0309 08:04:23.467900 2039427072 net.cpp:748] Ignoring source layer training_train 1043 | I0309 08:05:09.417436 2039427072 solver.cpp:229] Iteration 320, loss = 0.164055 1044 | I0309 08:05:09.417510 2039427072 sgd_solver.cpp:106] Iteration 320, lr = 0.001 1045 | I0309 08:12:53.043699 2039427072 solver.cpp:229] Iteration 330, loss = 0.0759841 1046 | I0309 08:12:53.045910 2039427072 sgd_solver.cpp:106] Iteration 330, lr = 0.001 1047 | I0309 08:19:49.223664 2039427072 solver.cpp:338] Iteration 340, Testing net (#0) 1048 | I0309 08:19:49.225770 2039427072 net.cpp:748] Ignoring source layer training_train 1049 | I0309 08:20:35.606662 2039427072 solver.cpp:229] Iteration 340, loss = 0.0246208 1050 | I0309 08:20:35.608291 2039427072 sgd_solver.cpp:106] Iteration 340, lr = 0.001 1051 | I0309 08:28:15.801987 2039427072 solver.cpp:229] Iteration 350, loss = 0.0311792 1052 | I0309 08:28:15.802347 2039427072 sgd_solver.cpp:106] Iteration 350, lr = 0.001 1053 | I0309 08:35:12.471279 2039427072 solver.cpp:338] Iteration 360, Testing net (#0) 1054 | I0309 08:35:12.473426 2039427072 net.cpp:748] Ignoring source layer training_train 1055 | I0309 08:35:58.197039 2039427072 solver.cpp:229] Iteration 360, loss = -3.69923e-08 1056 | I0309 08:35:58.197111 2039427072 sgd_solver.cpp:106] Iteration 360, lr = 0.001 1057 | I0309 08:43:40.246301 2039427072 solver.cpp:229] Iteration 370, loss = 0.010891 1058 | I0309 08:43:40.248558 2039427072 sgd_solver.cpp:106] Iteration 370, lr = 0.001 1059 | I0309 08:51:00.173764 2039427072 solver.cpp:338] Iteration 380, Testing net (#0) 1060 | I0309 08:51:00.175463 2039427072 net.cpp:748] Ignoring source layer training_train 1061 | I0309 08:51:48.892660 2039427072 solver.cpp:229] Iteration 380, loss = 2.97371e-05 1062 | I0309 08:51:48.894011 2039427072 sgd_solver.cpp:106] Iteration 380, lr = 0.001 1063 | I0309 08:59:35.545130 2039427072 solver.cpp:229] Iteration 390, loss = 0.000224456 1064 | I0309 08:59:35.546790 2039427072 sgd_solver.cpp:106] Iteration 390, lr = 0.001 1065 | I0309 09:06:42.094503 2039427072 solver.cpp:456] Snapshotting to binary proto file snapshot_iter_400.caffemodel 1066 | I0309 09:06:49.335649 2039427072 sgd_solver.cpp:273] Snapshotting solver state to binary proto file snapshot_iter_400.solverstate 1067 | I0309 09:06:54.876243 2039427072 solver.cpp:338] Iteration 400, Testing net (#0) 1068 | I0309 09:06:54.876282 2039427072 net.cpp:748] Ignoring source layer training_train 1069 | ^CI0309 09:07:52.545898 2039427072 solver.cpp:229] Iteration 400, loss = 0.0174363 1070 | I0309 09:07:52.546358 2039427072 sgd_solver.cpp:106] Iteration 400, lr = 0.001 1071 | I0309 09:07:57.119024 2039427072 solver.cpp:456] Snapshotting to binary proto file snapshot_iter_401.caffemodel 1072 | I0309 09:08:04.234040 2039427072 sgd_solver.cpp:273] Snapshotting solver state to binary proto file snapshot_iter_401.solverstate 1073 | I0309 09:08:09.227821 2039427072 solver.cpp:302] Optimization stopped early. 1074 | I0309 09:08:09.230278 2039427072 caffe.cpp:222] Optimization Done. 1075 | 1076 | real 315m3.272s 1077 | user 821m41.505s 1078 | sys 45m58.416s 1079 | 1080 | -------------------------------------------------------------------------------- /models/Custom_Model/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/Custom_Model/train.prototxt" 2 | # test_iter specifies how many forward passes the test should carry out 3 | test_iter: 1 4 | # Carry out testing every X training iterations 5 | test_interval: 20 6 | # Learning rate and momentum parameters for Adam 7 | base_lr: 0.001 8 | momentum: 0.9 9 | momentum2: 0.999 10 | # Adam takes care of changing the learning rate 11 | lr_policy: "fixed" 12 | # Display every X iterations 13 | display: 10 14 | # The maximum number of iterations 15 | max_iter: 3000 16 | # snapshot intermediate results 17 | snapshot: 100 18 | snapshot_prefix: "snapshot" 19 | # solver mode: CPU or GPU 20 | type: "Adam" 21 | solver_mode: CPU 22 | -------------------------------------------------------------------------------- /models/Custom_Model/train.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layers { 3 | name: "training_train" 4 | type: DATA 5 | data_param { 6 | source: "datasets/training_set_lmdb" 7 | backend: LMDB 8 | batch_size: 400 9 | } 10 | transform_param{ 11 | mean_file: "datasets/mean_training_image.binaryproto" 12 | } 13 | top: "data" 14 | top: "label" 15 | include { 16 | phase: TRAIN 17 | } 18 | } 19 | layers { 20 | name: "training_test" 21 | type: DATA 22 | data_param { 23 | source: "datasets/validation_set_lmdb" 24 | backend: LMDB 25 | batch_size: 14 26 | } 27 | transform_param{ 28 | mean_file: "datasets/mean_training_image.binaryproto" 29 | } 30 | top: "data" 31 | top: "label" 32 | include { 33 | phase: TEST 34 | } 35 | } 36 | layers { 37 | name: "conv1" 38 | type: CONVOLUTION 39 | bottom: "data" 40 | top: "conv1" 41 | convolution_param { 42 | num_output: 96 43 | kernel_size: 7 44 | stride: 2 45 | } 46 | blobs_lr: 0 47 | blobs_lr: 0 48 | } 49 | layers { 50 | name: "relu1" 51 | type: RELU 52 | bottom: "conv1" 53 | top: "conv1" 54 | } 55 | layers { 56 | name: "norm1" 57 | type: LRN 58 | bottom: "conv1" 59 | top: "norm1" 60 | lrn_param { 61 | local_size: 5 62 | alpha: 0.0005 63 | beta: 0.75 64 | } 65 | } 66 | layers { 67 | name: "pool1" 68 | type: POOLING 69 | bottom: "norm1" 70 | top: "pool1" 71 | pooling_param { 72 | pool: MAX 73 | kernel_size: 3 74 | stride: 3 75 | } 76 | } 77 | layers { 78 | name: "conv2" 79 | type: CONVOLUTION 80 | bottom: "pool1" 81 | top: "conv2" 82 | convolution_param { 83 | num_output: 256 84 | pad: 2 85 | kernel_size: 5 86 | } 87 | blobs_lr: 0 88 | blobs_lr: 0 89 | } 90 | layers { 91 | name: "relu2" 92 | type: RELU 93 | bottom: "conv2" 94 | top: "conv2" 95 | } 96 | layers { 97 | name: "pool2" 98 | type: POOLING 99 | bottom: "conv2" 100 | top: "pool2" 101 | pooling_param { 102 | pool: MAX 103 | kernel_size: 2 104 | stride: 2 105 | } 106 | } 107 | layers { 108 | name: "conv3" 109 | type: CONVOLUTION 110 | bottom: "pool2" 111 | top: "conv3" 112 | convolution_param { 113 | num_output: 512 114 | pad: 1 115 | kernel_size: 3 116 | } 117 | blobs_lr: 0 118 | blobs_lr: 0 119 | } 120 | layers { 121 | name: "relu3" 122 | type: RELU 123 | bottom: "conv3" 124 | top: "conv3" 125 | } 126 | layers { 127 | name: "conv4" 128 | type: CONVOLUTION 129 | bottom: "conv3" 130 | top: "conv4" 131 | convolution_param { 132 | num_output: 512 133 | pad: 1 134 | kernel_size: 3 135 | } 136 | blobs_lr: 0 137 | blobs_lr: 0 138 | } 139 | layers { 140 | name: "relu4" 141 | type: RELU 142 | bottom: "conv4" 143 | top: "conv4" 144 | } 145 | layers { 146 | name: "conv5" 147 | type: CONVOLUTION 148 | bottom: "conv4" 149 | top: "conv5" 150 | convolution_param { 151 | num_output: 512 152 | pad: 1 153 | kernel_size: 3 154 | } 155 | blobs_lr: 0 156 | blobs_lr: 0 157 | } 158 | layers { 159 | name: "relu5" 160 | type: RELU 161 | bottom: "conv5" 162 | top: "conv5" 163 | } 164 | layers { 165 | name: "pool5" 166 | type: POOLING 167 | bottom: "conv5" 168 | top: "pool5" 169 | pooling_param { 170 | pool: MAX 171 | kernel_size: 3 172 | stride: 3 173 | } 174 | } 175 | layers { 176 | name: "fc6" 177 | type: INNER_PRODUCT 178 | bottom: "pool5" 179 | top: "fc6" 180 | inner_product_param { 181 | num_output: 4048 182 | } 183 | blobs_lr: 1.0 184 | blobs_lr: 1.0 185 | } 186 | layers { 187 | name: "relu6" 188 | type: RELU 189 | bottom: "fc6" 190 | top: "fc6" 191 | } 192 | layers { 193 | name: "drop6" 194 | type: DROPOUT 195 | bottom: "fc6" 196 | top: "fc6" 197 | dropout_param { 198 | dropout_ratio: 0.5 199 | } 200 | } 201 | layers { 202 | name: "fc7" 203 | type: INNER_PRODUCT 204 | bottom: "fc6" 205 | top: "fc7" 206 | inner_product_param { 207 | num_output: 4048 208 | } 209 | blobs_lr: 1.0 210 | blobs_lr: 1.0 211 | } 212 | layers { 213 | name: "relu7" 214 | type: RELU 215 | bottom: "fc7" 216 | top: "fc7" 217 | } 218 | layers { 219 | name: "drop7" 220 | type: DROPOUT 221 | bottom: "fc7" 222 | top: "fc7" 223 | dropout_param { 224 | dropout_ratio: 0.5 225 | } 226 | } 227 | layers { 228 | name: "fc8_cat" 229 | type: INNER_PRODUCT 230 | bottom: "fc7" 231 | top: "fc8" 232 | inner_product_param { 233 | num_output: 7 234 | } 235 | blobs_lr: 1.0 236 | blobs_lr: 1.0 237 | } 238 | layers { 239 | name: "prob" 240 | type: SOFTMAX_LOSS 241 | bottom: "fc8" 242 | bottom: "label" 243 | } 244 | 245 | -------------------------------------------------------------------------------- /opencv_functions.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # OpenCV face recognition and segmentation 3 | # 4 | # This file contains utility functions for using OpenCV for face detection 5 | # and other tasks. 6 | # 7 | # Face detection is done with Haar Cascades, whose weights must be downloaded 8 | # from online resources. 9 | # 10 | # 11 | # Date modified: March 2016 12 | # 13 | # Authors: Dan Duncan 14 | # Gautam Shine 15 | # 16 | ############################################################################### 17 | 18 | import os, shutil, sys, time, re, glob 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | import cv2 as cv 22 | import Image 23 | import caffe 24 | 25 | from utility_functions import * 26 | 27 | # Load Haar cascades from XML files 28 | def load_cascades(): 29 | # Load Haar cascade files containing features 30 | cascPaths = ['models/haarcascades/haarcascade_frontalface_default.xml', 31 | 'models/haarcascades/haarcascade_frontalface_alt.xml', 32 | 'models/haarcascades/haarcascade_frontalface_alt2.xml', 33 | 'models/haarcascades/haarcascade_frontalface_alt_tree.xml' 34 | 'models/lbpcascades/lbpcascade_frontalface.xml'] 35 | faceCascades = [] 36 | for casc in cascPaths: 37 | faceCascades.append(cv.CascadeClassifier(casc)) 38 | 39 | return faceCascades 40 | 41 | # Do Haar cascade face detection on a single image 42 | # Face detection returns a list of faces 43 | # Where each face is the coordinates of a rectangle containing a face: 44 | # (x,y,w,h) 45 | def DetectFace(image,color,faceCascades,single_face,second_pass,draw_rects,scale=1.0): 46 | # Resize 47 | img = cv.resize(image, (0,0), fx=1, fy=1, interpolation = cv.INTER_CUBIC) 48 | 49 | # Convert to grayscale and equalize the histogram 50 | if color: 51 | gray_img = img.copy().astype(np.uint8) 52 | gray_img = cv.cvtColor(gray_img, cv.COLOR_BGR2GRAY) 53 | else: 54 | gray_img = img.copy().astype(np.uint8) 55 | cv.equalizeHist(gray_img, gray_img) 56 | 57 | # Detect the faces 58 | faces = faceCascades[2].detectMultiScale( 59 | gray_img, 60 | scaleFactor=1.1, 61 | minNeighbors=7, 62 | minSize=(50, 50), 63 | flags = cv.CASCADE_SCALE_IMAGE) 64 | 65 | # Eliminate spurious extra faces 66 | discardExtraFaces = False # Set to true to enable 67 | if discardExtraFaces and len(faces) > 1: 68 | faces = faces[0,:] 69 | faces = faces[np.newaxis,:] 70 | 71 | # Rescale cropBox 72 | 73 | if scale != 1.0 and len(faces) > 0: 74 | for i in range(faces.shape[0]): 75 | faces[i] = rescaleCropbox(img,faces[i],scale) 76 | 77 | print('Detected %d faces.' % len(faces)) 78 | # Draw a rectangle around the faces 79 | if draw_rects: 80 | for (x, y, w, h) in faces: 81 | cv.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2) 82 | 83 | # For laboratory images, remove any spurious detections 84 | if single_face and len(faces) > 1: 85 | faces = faces[0,:] 86 | faces = faces[np.newaxis,:] 87 | 88 | if len(faces) > 0 and second_pass: 89 | approved = [] 90 | for i in range(len(faces)): 91 | cropped_face = imgCrop(gray_img, faces[i]) 92 | alt_check = faceCascades[1].detectMultiScale( 93 | cropped_face, 94 | scaleFactor=1.05, 95 | minNeighbors=5, 96 | minSize=(int(0.8*faces[i][2]), int(0.8*faces[i][3])), 97 | flags = cv.CASCADE_SCALE_IMAGE) 98 | # Check if exactly 1 face was detected in cropped image 99 | if len(alt_check) == 1: 100 | approved.append(i) 101 | faces = faces[approved] 102 | 103 | return img, faces 104 | 105 | # Resize cropBox 106 | # This is useful if you want the face rectangle to be slightly bigger 107 | # such aqs making it the size of the person's whole head. 108 | def rescaleCropbox(img,cropBox,scale=1.0): 109 | x, y, w, h = cropBox 110 | 111 | # Check for valid box sizes 112 | if scale <= 0: 113 | # Invalid input. Return original 114 | return cropBox 115 | 116 | 117 | if scale < 1.0: 118 | x += int(w*(1-scale)/2) 119 | y += int(h*(1-scale)/2) 120 | w = int(w*scale) 121 | h = int(h*scale) 122 | 123 | elif scale > 1.0: 124 | x -= int(w*(scale-1.0)/2) 125 | y -= int(h*(scale-1.0)/2) 126 | w = int(w*scale) 127 | h = int(h*scale) 128 | 129 | # Make sure dimensions won't be exceeded: 130 | exceeded = False; count = 0; maxCount = 10 # Arbitrary magic number 131 | while True: 132 | if x < 0: 133 | w += 2*x # Make w smaller to maintain symmetry 134 | x = 0 135 | 136 | if y < 0: 137 | h += 2*y 138 | y = 0 139 | exceeded = True 140 | 141 | if x+w > img.shape[1]: 142 | x -= x + w - img.shape[1] 143 | exceeded = True 144 | 145 | if y+h > img.shape[0]: 146 | y -= y + h - img.shape[0] 147 | exceeded = True 148 | 149 | if count > maxCount: 150 | # Rescaling has failed. Just return original image 151 | print "Error: opencv_functions.imgCrop: Crop scale exceeded image dimensions" 152 | return cropBox 153 | 154 | if not exceeded: 155 | # Rescaling succeeded! 156 | break 157 | else: 158 | count += 1 159 | exceeded = False 160 | 161 | # Return rescaled cropbox 162 | return (x,y,w,h) 163 | 164 | 165 | # Crop image array to pixels indicated by crop box 166 | def imgCrop(img, cropBox, scale=1.0): 167 | cropBox = rescaleCropbox(img,cropBox,scale) 168 | (x,y,w,h) = cropBox 169 | img = img[y:(y+h), x:(x+h)] 170 | return img 171 | 172 | # Convert bgr to rgb 173 | # bgr is a common format and the default one for opencv 174 | def rgb(bgr_img): 175 | b,g,r = cv.split(bgr_img) # get b,g,r 176 | rgb_img = cv.merge([r,g,b]) # switch it to rgb 177 | return rgb_img 178 | 179 | # Given directory loc, get all images in directory and crop to just faces 180 | # Returns face_list, an array of cropped image file names 181 | def faceCrop(targetDir, imgList, color, single_face): 182 | # Load list of Haar cascades for faces 183 | faceCascades = load_cascades() 184 | 185 | # Iterate through images 186 | face_list = [] 187 | for img in imgList: 188 | if os.path.isdir(img): 189 | continue 190 | pil_img = Image.open(img) 191 | if color: 192 | cv_img = cv.cvtColor(np.array(pil_img), cv.COLOR_RGB2BGR) 193 | else: 194 | cv_img = np.array(pil_img) 195 | # Convert to grayscale if this image is actually color 196 | if cv_img.ndim == 3: 197 | cv_img = cv.cvtColor(np.array(pil_img), cv.COLOR_BGR2GRAY) 198 | 199 | # Detect all faces in this image 200 | scaled_img, faces = DetectFace(cv_img, color, faceCascades, single_face, second_pass=False, draw_rects=False) 201 | 202 | # Iterate through faces 203 | n=1 204 | for face in faces: 205 | cropped_cv_img = imgCrop(scaled_img, face, scale=1.0) 206 | if color: 207 | cropped_cv_img = rgb(cropped_cv_img) 208 | fname, ext = os.path.splitext(img) 209 | cropped_pil_img = Image.fromarray(cropped_cv_img) 210 | #save_name = loc + '/cropped/' + fname.split('/')[-1] + '_crop' + str(n) + ext 211 | save_name = targetDir + '/' + fname.split('/')[-1] + '_crop' + str(n) + ext 212 | cropped_pil_img.save(save_name) 213 | face_list.append(save_name) 214 | n += 1 215 | 216 | return face_list 217 | 218 | # Add an emoji to an image at a specified point and size 219 | # Inputs: img, emoji are ndarrays of WxHx3 220 | # faces is a list of (x,y,w,h) tuples for each face to be replaced 221 | def addEmoji(img,faces,emoji): 222 | for x,y,w,h in faces: 223 | # Resize emoji to desired width and height 224 | dim = max(w,h) 225 | em = cv.resize(emoji, (dim,dim), interpolation = cv.INTER_CUBIC) 226 | 227 | # Get boolean for transparency 228 | trans = em.copy() 229 | trans[em == 0] = 1 230 | trans[em != 0] = 0 231 | 232 | # Delete all pixels in image where emoji is nonzero 233 | img[y:y+h,x:x+w,:] *= trans 234 | 235 | # Add emoji on those pixels 236 | img[y:y+h,x:x+w,:] += em 237 | 238 | return img 239 | 240 | # Add emojis to image at specified points and sizes 241 | # Inputs: img is ndarrays of WxHx3 242 | # emojis is a list of WxHx3 emoji arrays 243 | # faces is a list of (x,y,w,h) tuples for each face to be replaced 244 | # Labels is a list of integer labels for each emotion 245 | def addMultipleEmojis(img,faces,emojis,labels): 246 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 247 | 248 | for i in range(len(labels)): 249 | 250 | x,y,w,h = faces[i] 251 | label = labels[i] 252 | emoji = emojis[int(label)] 253 | 254 | 255 | # Resize emoji to desired width and height 256 | dim = max(w,h) 257 | em = cv.resize(emoji, (dim,dim), interpolation = cv.INTER_CUBIC) 258 | 259 | # Get boolean for transparency 260 | trans = em.copy() 261 | trans[em == 0] = 1 262 | trans[em != 0] = 0 263 | 264 | # Delete all pixels in image where emoji is nonzero 265 | img[y:y+h,x:x+w,:] *= trans 266 | 267 | # Add emoji on those pixels 268 | img[y:y+h,x:x+w,:] += em 269 | 270 | return img 271 | 272 | 273 | # Switch between RGB and BGR 274 | def toggleRGB(img): 275 | r,g,b = cv.split(img) 276 | img = cv.merge([b,g,r]) 277 | return img 278 | 279 | 280 | # Combine two images for displaying side-by-side 281 | # If maxSize is true, crops sides of image to keep under 2880 pixel width of screen 282 | def cvCombineTwoImages(img1,img2,buf=2,maxSize=True): 283 | h1, w1, c1 = img1.shape 284 | h2, w2, c2 = img2.shape 285 | 286 | # Choose video size. Can be sized to either maximum screen size, or to the size of a YouTube video 287 | if maxSize == True: 288 | maxType = 'youtube' 289 | 290 | if maxType == 'youtube': 291 | # Convert to a 16:9 aspect ratio (YouTube's native aspect ratio) 292 | wh = 16.0/9.0 # = 1.778 293 | 294 | h = max(h1,h2) 295 | maxWidth = int(wh*float(h)) 296 | excess = w1 + w2 + buf - maxWidth 297 | 298 | elif maxType == 'screen': 299 | screenWidth = 1920 # Width in pixels for macbook pro is 2880 300 | margin = 40 # Minimum number of extra pixels to save 301 | excess = w1 + w2 + buf - screenWidth + margin 302 | 303 | 304 | diff = int(np.ceil(float(excess)/4.0)) 305 | 306 | img1 = img1[:,diff:-diff,:] 307 | img2 = img2[:,diff:-diff,:] 308 | 309 | h1, w1, c1 = img1.shape 310 | h2, w2, c2 = img2.shape 311 | 312 | #print "\tImages resized. New combined width: " + str(w1 + w2 + buf) 313 | 314 | h = max(h1,h2) 315 | w = w1 + w2 + buf 316 | c = max(c1,c2) 317 | 318 | if c1 != c2: 319 | # Incompatible dimensions 320 | print "Error, images have imcompatible dimensions along depth axis" 321 | return None 322 | 323 | img = np.zeros([h,w,c]).astype(np.uint8) 324 | 325 | # Add in the two images 326 | img[0:h1,0:w1,:] = img1 327 | img[0:h2,w1+buf:w1+buf+w2,:] = img2 328 | 329 | # Returned combined image as numpy array of uint8's 330 | return img 331 | 332 | 333 | # Create a directory only if it does not already exist 334 | def mkdirNoForce(dir): 335 | if not os.path.exists(dir): 336 | os.mkdir(dir) 337 | 338 | 339 | # Save a test image with a default name from the current timestamp 340 | def saveTestImage(img,filename=None,outDir=None): 341 | # Get image filename from current timestamp 342 | if filename is None: 343 | ts = time.time() 344 | formatStr = "%Y-%m-%d_%H-%M-%S" 345 | filestr = datetime.datetime.fromtimestamp(ts).strftime(formatStr) 346 | filename = filestr + ".png" 347 | 348 | if outDir is not None: 349 | mkdirNoForce(outDir) 350 | filename = outDir + "/" + filename 351 | 352 | # Save image 353 | im = Image.fromarray(toggleRGB(img)) 354 | im.save(filename) 355 | 356 | # Return filename 357 | return filename 358 | -------------------------------------------------------------------------------- /process_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ############################################################################### 4 | # 5 | # This was our original 'main' script. 6 | # Its job is to read and classify an entire dataset. 7 | # It also calculates its a confusion matrix and its accuracy. 8 | # Configured for the CK+ and JAFFE datasets 9 | # 10 | # 11 | # Date modified: March 2016 12 | # 13 | # Authors: Dan Duncan 14 | # Gautam Shine 15 | # 16 | ############################################################################### 17 | 18 | import os, shutil, sys, time, re, glob 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | import cv2 as cv 22 | import Image 23 | import caffe 24 | 25 | from caffe_functions import * 26 | from opencv_functions import * 27 | from utility_functions import * 28 | 29 | ### USER-SPECIFIED VARIABLES: ### 30 | 31 | # List your dataset root directories here: 32 | dirJaffe = 'datasets/jaffe' 33 | dirCKPlus = 'datasets/CK_Plus' 34 | dirMisc = 'datasets/misc' 35 | dirTraining = 'datasets/validation_images' 36 | # dirOther ... TODO: allow any generic directory of pictures 37 | 38 | # Select which dataset to use (case insensitive): 39 | #dataset = 'jaffe' 40 | #dataset = 'ckplus' 41 | dataset = 'training' 42 | #dataset = 'misc' 43 | 44 | # Flags: 45 | cropFlag = True # False disables image cropping 46 | plot_confusion = True 47 | useMean = False # Use image mean during classification 48 | 49 | ### START SCRIPT: ### 50 | 51 | # Set up inputs 52 | dir = None 53 | if dataset.lower() == 'jaffe': 54 | dir = dirJaffe 55 | color = False 56 | single_face = True 57 | cropFlag = True 58 | useMean = True 59 | elif dataset.lower() == 'ckplus': 60 | dir = dirCKPlus 61 | color = False 62 | single_face = True 63 | cropFlag = True 64 | useMean = True 65 | elif dataset.lower() == 'misc': 66 | dir = dirMisc 67 | color = True 68 | single_face = True 69 | useMean = True 70 | elif dataset.lower() == 'training': 71 | dir = dirTraining 72 | color = True 73 | single_face = True 74 | cropFlag = False 75 | useMean = True 76 | else: 77 | print 'Error - Unsupported dataset: ' + dataset 78 | sys.exit(0) 79 | 80 | # Clean up and discard anything from the last run 81 | dirCrop = dir + '/cropped' 82 | rmdir(dirCrop) 83 | 84 | # Master list of categories for EmotitW network 85 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 86 | 87 | # Start keeping time: 88 | t0 = time.time() 89 | 90 | # Load dataset image list 91 | input_list, labels = importDataset(dir, dataset, categories) 92 | 93 | # Perform detection and cropping if desired (and it should be desired) 94 | crop_time = None 95 | if cropFlag: 96 | start = time.time() 97 | mkdir(dirCrop) 98 | input_list = faceCrop(dirCrop, input_list, color, single_face) 99 | crop_time = time.time() - start 100 | 101 | 102 | # Perform classification 103 | start = time.time() 104 | classify_emotions(input_list, color, categories, labels, plot_neurons=False, plot_confusion=plot_confusion, useMean=useMean) 105 | classify_time = time.time() - start 106 | totalTime = time.time() - t0 107 | 108 | print '\nNumber of images: ' + str(len(input_list)) 109 | if crop_time is not None: 110 | print 'Crop time: ' + str(crop_time) + 's\t(' + str(crop_time / len(input_list)) + "s / image)" 111 | print 'Classify time: ' + str(classify_time) + 's\t(' + str(classify_time / len(input_list)) + "s / image)" 112 | print 'Total time: ' + str(totalTime) + 's\t(' + str(totalTime / len(input_list)) + "s / image)" 113 | 114 | 115 | -------------------------------------------------------------------------------- /process_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ############################################################################### 4 | # 5 | # This script reads in a single image, detects all faces with opencv, 6 | # classifies emotions on each face with a neural network, adds emojis to the 7 | # original image corresponding to each emotion, and saves the new image to file. 8 | # 9 | # 10 | # Date modified: March 2016 11 | # 12 | # Authors: Dan Duncan 13 | # Gautam Shine 14 | # 15 | ############################################################################### 16 | 17 | import os, shutil, sys, time, re, glob 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | import cv2 as cv 21 | import Image 22 | import caffe 23 | 24 | from caffe_functions import * 25 | from opencv_functions import * 26 | from utility_functions import * 27 | 28 | ### USER-SPECIFIED VARIABLES: ### 29 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 30 | useCNN = True 31 | defaultEmoji = 2 # Index of default emoji (0-6) 32 | 33 | # List your dataset root directories here: 34 | dirImage = 'datasets/generic_image_directory' 35 | 36 | ### START SCRIPT ### 37 | 38 | # Set up face detection 39 | faceCascades = load_cascades() 40 | 41 | # Set up network 42 | if useCNN: 43 | mean = loadMeanCaffeImage() 44 | VGG_S_Net = make_net(mean,net_dir="Custom_Model") 45 | 46 | # Get all emojis 47 | emojis = loadAllEmojis() 48 | 49 | # Find all image files 50 | extensions = [".png",".jpg",".jpeg",".tiff"] 51 | filenames = [] 52 | for ext in extensions: 53 | filenames += glob.glob(dirImage + "/*" + ext) 54 | 55 | print "Processing " + str(len(filenames)) + " images" 56 | 57 | for filename in filenames: 58 | print "Now processing: " + filename 59 | 60 | # Note: Caffe and openCV use different input formats! 61 | # Both images will be WxHxC shaped 62 | # But Caffe's units are floats from 0.0 - 1.0 63 | # OpenCV uses uint8 data format with values from 0 - 255 64 | 65 | # Load image in caffe format 66 | frame = caffe.io.load_image(filename) 67 | 68 | # Load same image in openCV format 69 | pilImg = Image.open(filename) 70 | cvImg = cv.cvtColor(np.array(pilImg), cv.COLOR_RGB2BGR) 71 | 72 | # Find all faces 73 | with nostdout(): 74 | _, faces = DetectFace(cvImg,True,faceCascades,single_face=False,second_pass=False,draw_rects=False,scale=1.0) 75 | 76 | frame = cvImg 77 | if len(faces) == 0 or faces is None: 78 | # No faces found 79 | pass 80 | else: 81 | if useCNN: 82 | # Get a label for each face 83 | labels = classify_video_frame(frame, faces, VGG_S_Net, categories=None) 84 | 85 | # Add an emoji for each label 86 | frame = addMultipleEmojis(frame,faces,emojis,labels) 87 | 88 | else: 89 | # Just use the smiley face (no CNN classification) 90 | frame = addEmoji(frame,faces,emojis[defaultEmoji]) 91 | 92 | # Save to file 93 | fn = filename.split('/')[-1] 94 | fn = fn.split('.') 95 | fn = fn[0] + '_emojis.' + fn[1] 96 | _ = saveTestImage(frame,outDir=dirImage,filename=fn) 97 | print "Image: " + fn + " saved. " + str(len(faces)) + " faces found!" 98 | 99 | 100 | -------------------------------------------------------------------------------- /utility_functions.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Utility functions for OpenCV-Caffe chaining 3 | # and anything generally useful for our other scripts 4 | # 5 | # Date modified: March 2016 6 | # 7 | # Authors: Dan Duncan 8 | # Gautam Shine 9 | # 10 | ############################################################################### 11 | 12 | import os, shutil, sys, time, re, glob, csv 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | import cv2 as cv 16 | import Image 17 | import caffe 18 | import datetime 19 | from PIL import Image 20 | from opencv_functions import * 21 | import contextlib, cStringIO 22 | 23 | # Plot confusion matrix 24 | def plot_confusion_matrix(cm, names=None, title='Confusion Matrix', cmap=plt.cm.Blues): 25 | plt.figure(4) 26 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 27 | plt.title(title) 28 | plt.colorbar() 29 | 30 | # Add labels to confusion matrix: 31 | if names is None: 32 | names = range(cm.shape[0]) 33 | 34 | tick_marks = np.arange(len(names)) 35 | plt.xticks(tick_marks, names, rotation=45) 36 | plt.yticks(tick_marks, names) 37 | 38 | plt.tight_layout() 39 | plt.ylabel('Correct label') 40 | plt.xlabel('Predicted label') 41 | plt.show() 42 | 43 | # Generate confusion matrix for Jaffe 44 | # results = list of tuples of (correct label, predicted label) 45 | # e.g. [ ('HA', 3) ] 46 | # categories = list of category names 47 | # Returns confusion matrix; rows are correct labels and columns are predictions 48 | def confusion_matrix(results, categories, plotConfusion=False): 49 | # Empty confusion matrix 50 | matrix = np.zeros((7,7)) 51 | 52 | # Iterate over all labels and populate matrix 53 | for label, pred in results: 54 | matrix[label,pred] += 1 55 | #matrix[map_categories[label],pred] += 1 56 | 57 | # Print matrix and percent accuracy 58 | accuracy = float(np.trace(matrix))/len(results) 59 | print('Confusion Matrix: ') 60 | print(matrix) 61 | print 'Accuracy: ' + str(accuracy*100) + '%' 62 | 63 | # Normalize confusion matrix 64 | normalizeMatrix = True 65 | if normalizeMatrix: 66 | print "utility.confusion_matrix(). Non-normalized conf_mat:" 67 | print matrix 68 | s = np.sum(matrix,1) # Sum each row 69 | for i in range(matrix.shape[0]): 70 | # Normalization handles class imbalance in training set 71 | matrix[i,:] /= s[i] 72 | 73 | # Save matrix to file: 74 | np.save("confusion_matrix.npy",matrix) 75 | 76 | # Plot the confusion matrix 77 | if plotConfusion: 78 | plot_confusion_matrix(matrix, categories) 79 | 80 | # Get images, labels tuple for CK+ datset 81 | def importCKPlusDataset(dir = 'CKPlus', categories = None, includeNeutral = False, contemptAs = None): 82 | ############################################################################ 83 | # Function: importCKPlusDataset 84 | # Depending on preferences, this ranges from 309 - 920 images and labels 85 | # - 309 labeled images 86 | # - 18 more "Contempt" images (not in our vocabulary) 87 | # - 593 neutral images 88 | # 89 | # For this to work, make sure your CKPlus dataset is formatted like this: 90 | # CKPlus = root (or whatever is in your 'dir' variable) 91 | # CKPlus/CKPlus_Images = Root for all image files (no other file types here) 92 | # Example image path: 93 | # CKPlus/CKPlus_Images/S005/001/S005_001_00000011.png 94 | # 95 | # CKPlus/CKPlus_Labels = Root for all image labels (no other file types) 96 | # Example label path: 97 | # CKPlus/CKPlus_Labels/S005/001/S005_001_00000011_emotion.png 98 | # 99 | # CKPlus/* - anything else in this directory is ignored, as long as it 100 | # is not in the _Images or _Labels subdirectories 101 | # 102 | # Optional inputs: 103 | # dir - Custom root directory for CKPlus dataset (if not 'CKPlus') 104 | # 105 | # includeNeutral - Boolean to include neutral pictures or not 106 | # Note: Every sequence begins with neutral photos, so neutral photos 107 | # greatly outnumber all other combined (approximately 593 to 327) 108 | # 109 | # contemptAs - Since it's not in our vocabulary, by default all pictures 110 | # labeled "Contempt" are discarded. But if you put a string here, e.g. 111 | # "Disgust", pictures labeled "Contempt" will be lumped in with "Disgust" 112 | # instead of being discarded. 113 | # 114 | # 115 | # RETURN VALUES: 116 | # images, labels = List of image file paths, list of numeric labels 117 | # according to EitW numbers 118 | # 119 | # Author: Dan Duncan 120 | # 121 | ############################################################################ 122 | 123 | # Note: "Neutral" is not labeled in the CK+ dataset 124 | categoriesCK = ['Angry', 'Contempt', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise'] 125 | 126 | if categories is None: 127 | categoriesEitW = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 128 | else: 129 | categoriesEitW = categories 130 | 131 | # Root directories for images and labels. Should have no other .txt or .png files present 132 | dirImages = dir + '/CKPlus_Images' 133 | dirLabels = dir + '/CKPlus_Labels' 134 | 135 | if contemptAs is not None: 136 | # Verify a valid string was supplied 137 | try: 138 | ind = categoriesEitW.index(contemptAs) 139 | except ValueError: 140 | raise ValueError("\nError in importCKPlusDataset(): contemptAs = '" + contemptAs + "' is not a valid category. Exiting.\n") 141 | 142 | # Get all possible label and image filenames 143 | imageFiles = glob.glob(dirImages + '/*/*/*.png') 144 | labelFiles = glob.glob(dirLabels + '/*/*/*.txt') 145 | 146 | # Get list of all labeled images: 147 | # Convert label filenames to image filenames 148 | # Label looks like: CK_Plus/CKPlus_Labels/S005/001/S005_001_00000011_emotion.txt 149 | # Image looks like: CK_Plus/CKPlus_Images/S005/001/S005_001_00000011.png 150 | allLabeledImages = [] 151 | 152 | for label in labelFiles: 153 | img = label.replace(dirLabels,dirImages) 154 | img = img.replace('_emotion.txt','.png') 155 | allLabeledImages.append(img) 156 | 157 | # Construct final set of labeled image file names and corresponding labels 158 | # Be sure not to include images labeled as "contempt", since those are not part of our vocabulary 159 | labeledImages = [] 160 | labels = [] 161 | labelNames = [] 162 | contemptImages = [] 163 | for ind in range(len(labelFiles)): 164 | curLabel = labelFiles[ind] 165 | curImage = allLabeledImages[ind] 166 | 167 | # Open the image as binary read-only 168 | with open(curLabel, 'rb') as csvfile: 169 | 170 | # Convert filestream to csv-reading filestream 171 | rd = csv.reader(csvfile) 172 | str = rd.next() 173 | 174 | # Get integer label in CK+ format 175 | numCK = int(float(str[0])) 176 | 177 | # Get text label from CK+ number 178 | labelText = categoriesCK[numCK-1] 179 | 180 | if labelText != 'Contempt': 181 | numEitW = categoriesEitW.index(labelText) 182 | labeledImages.append(curImage) 183 | labels.append(numEitW) 184 | labelNames.append(labelText) 185 | elif contemptAs is not None: 186 | # Lump "Contempt" in with another category 187 | numEitW = categoriesEitW.index(contemptAs) 188 | labeledImages.append(curImage) 189 | labels.append(numEitW) 190 | labelNames.append(labelText) 191 | else: 192 | # Discard "Contempt" image 193 | contemptImages.append(curImage) 194 | 195 | if includeNeutral: 196 | # Add all neutral images to our list too: 197 | # The first image in every series is neutral 198 | neutralPattern = '_00000001.png' 199 | neutralInd = categoriesEitW.index('Neutral') 200 | neutralImages = [] 201 | neutralLabels = [] 202 | neutralLabelNames = [] 203 | 204 | for imgStr in imageFiles: 205 | if neutralPattern in imgStr: 206 | neutralImages.append(imgStr) 207 | neutralLabels.append(neutralInd) 208 | neutralLabelNames.append('Neutral') 209 | 210 | # Combine lists of labeled and neutral images 211 | images = labeledImages + neutralImages 212 | labels = labels + neutralLabels 213 | labelNames = labelNames + neutralLabelNames 214 | 215 | else: 216 | images = labeledImages 217 | 218 | # For testing only: 219 | #images = images[0:10] 220 | #labels = labels[0:10] 221 | 222 | return images, labels #, labelNames 223 | 224 | # Get entire dataset 225 | # Inputs: Dataset root directory; optional dataset name 226 | # Returns: List of all image file paths; list of correct labels for each image 227 | def importDataset(dir, dataset, categories): 228 | imgList = glob.glob(dir+'/*') 229 | labels = None 230 | 231 | # Datset-specific import rules: 232 | if dataset.lower() == 'jaffe' or dataset.lower() == 'training': 233 | # Get Jaffe labels 234 | jaffe_categories_map = { 235 | 'HA': categories.index('Happy'), 236 | 'SA': categories.index('Sad'), 237 | 'NE': categories.index('Neutral'), 238 | 'AN': categories.index('Angry'), 239 | 'FE': categories.index('Fear'), 240 | 'DI': categories.index('Disgust'), 241 | 'SU': categories.index('Surprise') 242 | } 243 | 244 | labels = [] 245 | 246 | for img in imgList: 247 | if os.path.isdir(img): 248 | continue 249 | key = img.split('.')[1][0:2] 250 | labels.append(jaffe_categories_map[key]) 251 | 252 | elif dataset.lower() == 'ckplus': 253 | # Pathnames and labels for all images 254 | imgList, labels = importCKPlusDataset(dir, categories=categories,includeNeutral=True,contemptAs=None) 255 | 256 | elif dataset.lower() == 'misc': 257 | labels = [0,1,2,3,4,5,6] 258 | 259 | else: 260 | print 'Error - Unsupported dataset: ' + dataset 261 | return None 262 | 263 | # Make sure some dataset was imported 264 | if len(imgList) <= 0: 265 | print 'Error - No images found in ' + str(dir) 266 | return None 267 | 268 | # Return list of filenames 269 | return imgList, labels 270 | 271 | # Delete all files in a directory matching pattern 272 | def purge(dir, pattern): 273 | for f in os.listdir(dir): 274 | if re.search(pattern, f): 275 | os.remove(os.path.join(dir, f)) 276 | 277 | # Delete a directory 278 | def rmdir(dir): 279 | if os.path.exists(dir): 280 | shutil.rmtree(dir) 281 | 282 | # Create a directory. Overwrite any existing directories 283 | def mkdir(dir): 284 | if os.path.exists(dir): 285 | shutil.rmtree(dir) 286 | os.mkdir(dir) 287 | 288 | 289 | # Create a directory only if it does not already exist 290 | def mkdirNoForce(dir): 291 | if not os.path.exists(dir): 292 | os.mkdir(dir) 293 | 294 | 295 | 296 | 297 | def flatten(biglist): 298 | return [item for sublist in biglist for item in sublist] 299 | 300 | 301 | 302 | # Load an image whose array elements are in uint8 format 303 | # Caffe imports images in float format...elements vary from 0.0 to 1.0 304 | # OpenCV Webcam brings in images in uint8 format...elements range from 0 to 255 305 | def loadUintImage(imgFile): 306 | img = toggleRGB(caffe.io.load_image(imgFile)) 307 | img *= 255.0 308 | img = img.astype(np.uint8) 309 | return img 310 | 311 | # Load an emoji according to the desired category 312 | def loadEmoji(ind=3): 313 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 314 | emojiFile = 'datasets/Emojis/' + categories[ind] + '.png' 315 | emoji = loadUintImage(emojiFile) 316 | return emoji 317 | 318 | # Load all emojis into a list 319 | # Assumes emojis are stored in "datasets/Emojis/angry.png" etc 320 | def loadAllEmojis(emojiDir=None, categories=None): 321 | if emojiDir is None: 322 | emojiDir = 'datasets/Emojis/' 323 | if categories is None: 324 | categories = [ 'Angry' , 'Disgust' , 'Fear' , 'Happy' , 'Neutral' , 'Sad' , 'Surprise'] 325 | 326 | emojis = [] 327 | for cat in categories: 328 | emojiFile = emojiDir + cat + ".png" 329 | emojis.append(loadUintImage(emojiFile)) 330 | 331 | return emojis 332 | 333 | 334 | # Suppress print statements within a function call 335 | # To use, write this: 336 | # with nostdout(): 337 | # yourfunction(); 338 | @contextlib.contextmanager 339 | def nostdout(): 340 | save_stdout = sys.stdout 341 | sys.stdout = cStringIO.StringIO() 342 | yield 343 | sys.stdout = save_stdout 344 | 345 | --------------------------------------------------------------------------------