├── Driver Behavior recognition with Deep Learning.py ├── README.md ├── camera.py ├── main.py ├── model.json └── model.py /Driver Behavior recognition with Deep Learning.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import applications 2 | from keras.preprocessing.image import ImageDataGenerator 3 | from keras import layers,models 4 | from keras.callbacks import ModelCheckpoint 5 | from keras.optimizers import Adam 6 | import matplotlib.pyplot as plt 7 | 8 | def build_model(img_width, img_height): 9 | # Initializing weights with Imagenet weights 10 | mobilenet = applications.MobileNetV2(weights="imagenet", include_top=False, input_shape=(img_width, img_height, 3)) 11 | 12 | # freezing the layers except last 6 layers 13 | for layer in mobilenet.layers[:-6]: 14 | layer.trainable = False 15 | 16 | #debugging 17 | for layer in mobilenet.layers: 18 | print(layer.name, layer.trainable) 19 | 20 | # Create the model 21 | model = models.Sequential() 22 | 23 | # Add the mobilenet convolutional base model 24 | model.add(mobilenet) 25 | 26 | # Add new layers 27 | model.add(layers.Flatten()) 28 | model.add(layers.Dense(1024, activation='relu')) 29 | model.add(layers.Dropout(0.5)) 30 | model.add(layers.Dense(6, activation='softmax')) 31 | 32 | return model 33 | 34 | def train_model(model, img_width, img_height): 35 | train_data_dir = "C:/Users/hsone/Desktop/Extras/Projects/Distracted-Driver-Detection-master/Big_dataset/train" 36 | validation_data_dir = "C:/Users/hsone/Desktop/Extras/Projects/Distracted-Driver-Detection-master/Big_dataset/validation" 37 | 38 | train_datagen = ImageDataGenerator() 39 | test_datagen = ImageDataGenerator() 40 | 41 | train_batch_size = 56 42 | valid_batch_size = 8 43 | 44 | train_generator = train_datagen.flow_from_directory(directory=train_data_dir, 45 | target_size=(img_height, img_width), 46 | batch_size=train_batch_size, 47 | class_mode="categorical", 48 | shuffle=True) 49 | 50 | validation_generator = test_datagen.flow_from_directory(validation_data_dir, 51 | target_size=(img_height, img_width), 52 | batch_size=valid_batch_size, 53 | class_mode="categorical", 54 | shuffle=True) 55 | 56 | opt = Adam(learning_rate=0.001) 57 | model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) 58 | 59 | # checkpoint will save the best weights 60 | checkpoint = ModelCheckpoint("model_weights.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='max') 61 | callbacks_list = [checkpoint] 62 | 63 | history = model.fit( 64 | train_generator, 65 | steps_per_epoch=train_generator.samples / train_generator.batch_size, 66 | epochs=1, 67 | validation_data=validation_generator, 68 | validation_steps=validation_generator.samples / validation_generator.batch_size, 69 | verbose=1, 70 | callbacks=callbacks_list 71 | ) 72 | 73 | return history 74 | 75 | def visualization(history): 76 | plt.figure(figsize=(20, 10)) 77 | plt.subplot(1, 2, 1) 78 | plt.suptitle('Optimizer : Adam', fontsize=10) 79 | plt.ylabel('Loss', fontsize=16) 80 | plt.plot(history.history['loss'], label='Training Loss') 81 | plt.plot(history.history['val_loss'], label='Validation Loss') 82 | plt.legend(loc='upper right') 83 | 84 | plt.subplot(1, 2, 2) 85 | plt.ylabel('Accuracy', fontsize=16) 86 | plt.plot(history.history['accuracy'], label='Training Accuracy') 87 | plt.plot(history.history['val_accuracy'], label='Validation Accuracy') 88 | plt.legend(loc='lower right') 89 | plt.show() 90 | 91 | def main(): 92 | img_width, img_height = 224, 224 93 | model = build_model(img_width, img_height) 94 | history = train_model(model, img_width, img_height) 95 | visualization(history) 96 | 97 | if __name__ == '__main__': 98 | main() 99 | 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Distracted-Driver-Detection 2 | 3 | Distracted Driver detection using Keras, MTCNN, OpenCV and Flask. 4 | 5 | [![Watch the video](https://i.imgur.com/SyhvFfX.png)](https://youtu.be/EVk4aAk-l5Q) 6 | 7 | The dataset used for this project was utilized from kaggle. You can find the original dataset available here : https://www.kaggle.com/c/state-farm-distracted-driver-detection 8 | 9 | This dataset consists of thousands of images showing a variety of behaviors exhibited by drivers while driving. From this set, I selected a subset of behaviors which consisted of : Safe Driving, Texting, talking on phone, operating radio, reaching behind. The final size of dataset consisted of 12000 images. 10 | 11 | Tools Used: Google Colab, Jupyter Notebook, Eclipse 12 | 13 | Workflow: 14 | 15 | 1. Trained MobilenetV2 model to recognize the distracted drivers. 16 | 17 | 2. Used MTCNN (Multi-task Cascade Convolutional Neural Network) to detect profile face of humans in an image. 18 | Reference : https://github.com/ipazc/mtcnn 19 | 20 | 3. After detection of human face in an image, predicted the probabilities of the behaviour in the frame using trained model weights. 21 | 22 | 4. Deployed the model on flask to generate real time predictions. (Either live camera feed or upload a video) 23 | 24 | 25 | Files: 26 | 27 | model.py : This class will give us the predictions of our previously trained model. 28 | 29 | camera.py : This file implements a camera class that does the following operations: 30 | 31 | - Get the image stream from our input (Webcam feed or from video) 32 | - Detect faces with MTCNN and add bounding boxes 33 | - Rescale the images and send them to our trained deep learning model 34 | - get the predictions back from our trained model and add the label to each frame and return the final image stream 35 | 36 | main.py : Lastly, our main script will create a Flask app that will render our image predictions into a web page. 37 | -------------------------------------------------------------------------------- /camera.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from model import DriverBehaviourModel 3 | import numpy as np 4 | import tensorflow as tf 5 | import keras as k 6 | from mtcnn.mtcnn import MTCNN 7 | from PIL import Image 8 | 9 | 10 | session = tf.Session(graph=tf.Graph()) 11 | with session.graph.as_default(): 12 | k.backend.set_session(session) 13 | model = DriverBehaviourModel("model.json", "model_weights.h5") 14 | font = cv2.FONT_HERSHEY_SIMPLEX 15 | 16 | class VideoCamera(object): 17 | def __init__(self): 18 | #Arguement '0' takes feed from camera 19 | self.video = cv2.VideoCapture(0) 20 | 21 | def __del__(self): 22 | self.video.release() 23 | 24 | # returns camera frames along with bounding boxes and predictions 25 | def get_frame(self): 26 | _, fr = self.video.read() 27 | pixels = np.asarray(fr) 28 | detector = MTCNN() 29 | box = detector.detect_faces(pixels)[0]['box'] 30 | fc = pixels[box[1]:box[1]+box[3],box[0]:box[0]+box[2]] 31 | roi = cv2.resize(fc, (224, 224)) 32 | with session.graph.as_default(): 33 | k.backend.set_session(session) 34 | pred = model.predict_emotion(roi[np.newaxis, :, :]) 35 | 36 | cv2.putText(fr, pred, (box[0], box[1]), font, 2, (0 ,0, 255), 3) 37 | cv2.rectangle(fr,(box[0], box[1]),(box[0]+box[2],box[1]+box[3]),(0,255,0),2) 38 | 39 | 40 | _, jpeg = cv2.imencode('.jpg', fr) 41 | return jpeg.tobytes() 42 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, Response 2 | from camera import VideoCamera 3 | 4 | app = Flask(__name__) 5 | 6 | @app.route('/') 7 | def index(): 8 | return render_template('index.html') 9 | 10 | def gen(camera): 11 | while True: 12 | frame = camera.get_frame() 13 | yield (b'--frame\r\n' 14 | b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') 15 | 16 | @app.route('/video_feed') 17 | def video_feed(): 18 | return Response(gen(VideoCamera()), 19 | mimetype='multipart/x-mixed-replace; boundary=frame') 20 | 21 | if __name__ == '__main__': 22 | app.run(host='0.0.0.0', debug=False, threaded=True) -------------------------------------------------------------------------------- /model.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Model", "config": {"name": "vgg16", "layers": [{"name": "input_1", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 224, 224, 3], "dtype": "float32", "sparse": false, "name": "input_1"}, "inbound_nodes": []}, {"name": "block1_conv1", "class_name": "Conv2D", "config": {"name": "block1_conv1", "trainable": false, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"name": "block1_conv2", "class_name": "Conv2D", "config": {"name": "block1_conv2", "trainable": false, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block1_conv1", 0, 0, {}]]]}, {"name": "block1_pool", "class_name": "MaxPooling2D", "config": {"name": "block1_pool", "trainable": false, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block1_conv2", 0, 0, {}]]]}, {"name": "block2_conv1", "class_name": "Conv2D", "config": {"name": "block2_conv1", "trainable": false, "dtype": "float32", "filters": 128, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block1_pool", 0, 0, {}]]]}, {"name": "block2_conv2", "class_name": "Conv2D", "config": {"name": "block2_conv2", "trainable": false, "dtype": "float32", "filters": 128, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block2_conv1", 0, 0, {}]]]}, {"name": "block2_pool", "class_name": "MaxPooling2D", "config": {"name": "block2_pool", "trainable": false, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block2_conv2", 0, 0, {}]]]}, {"name": "block3_conv1", "class_name": "Conv2D", "config": {"name": "block3_conv1", "trainable": false, "dtype": "float32", "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block2_pool", 0, 0, {}]]]}, {"name": "block3_conv2", "class_name": "Conv2D", "config": {"name": "block3_conv2", "trainable": false, "dtype": "float32", "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block3_conv1", 0, 0, {}]]]}, {"name": "block3_conv3", "class_name": "Conv2D", "config": {"name": "block3_conv3", "trainable": false, "dtype": "float32", "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block3_conv2", 0, 0, {}]]]}, {"name": "block3_pool", "class_name": "MaxPooling2D", "config": {"name": "block3_pool", "trainable": false, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block3_conv3", 0, 0, {}]]]}, {"name": "block4_conv1", "class_name": "Conv2D", "config": {"name": "block4_conv1", "trainable": false, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block3_pool", 0, 0, {}]]]}, {"name": "block4_conv2", "class_name": "Conv2D", "config": {"name": "block4_conv2", "trainable": false, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block4_conv1", 0, 0, {}]]]}, {"name": "block4_conv3", "class_name": "Conv2D", "config": {"name": "block4_conv3", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block4_conv2", 0, 0, {}]]]}, {"name": "block4_pool", "class_name": "MaxPooling2D", "config": {"name": "block4_pool", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block4_conv3", 0, 0, {}]]]}, {"name": "block5_conv1", "class_name": "Conv2D", "config": {"name": "block5_conv1", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block4_pool", 0, 0, {}]]]}, {"name": "block5_conv2", "class_name": "Conv2D", "config": {"name": "block5_conv2", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block5_conv1", 0, 0, {}]]]}, {"name": "block5_conv3", "class_name": "Conv2D", "config": {"name": "block5_conv3", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block5_conv2", 0, 0, {}]]]}, {"name": "block5_pool", "class_name": "MaxPooling2D", "config": {"name": "block5_pool", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block5_conv3", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["block5_pool", 0, 0]]}}, {"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1024, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 6, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.5", "backend": "tensorflow"} -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from keras.models import model_from_json 2 | import numpy as np 3 | from tensorflow import keras 4 | import tensorflow as tf 5 | import numpy as np 6 | import time 7 | 8 | class DriverBehaviourModel(object): 9 | 10 | Behaviour_LIST = ['safe driving', 11 | 'texting', 12 | 'texting', 13 | 'talking on phone', 14 | 'operating the radio', 15 | 'reaching behind'] 16 | 17 | def __init__(self, model_json_file, model_weights_file): 18 | # load model from JSON file 19 | with open(model_json_file, "r") as json_file: 20 | loaded_model_json = json_file.read() 21 | self.loaded_model = model_from_json(loaded_model_json) 22 | 23 | # load weights into the new model 24 | self.loaded_model.load_weights(model_weights_file) 25 | self.loaded_model._make_predict_function() 26 | 27 | def predict_emotion(self, img): 28 | self.preds = self.loaded_model.predict(img) 29 | return DriverBehaviourModel.Behaviour_LIST[np.argmax(self.preds)] --------------------------------------------------------------------------------