├── Driver Behavior recognition with Deep Learning.py
├── README.md
├── camera.py
├── main.py
├── model.json
└── model.py


/Driver Behavior recognition with Deep Learning.py:
--------------------------------------------------------------------------------
  1 | from tensorflow.keras import applications
  2 | from keras.preprocessing.image import ImageDataGenerator
  3 | from keras import layers,models
  4 | from keras.callbacks import ModelCheckpoint
  5 | from keras.optimizers import Adam
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | def build_model(img_width, img_height):
  9 |     # Initializing weights with Imagenet weights
 10 |     mobilenet = applications.MobileNetV2(weights="imagenet", include_top=False, input_shape=(img_width, img_height, 3))
 11 | 
 12 |     # freezing the layers except last 6 layers
 13 |     for layer in mobilenet.layers[:-6]:
 14 |         layer.trainable = False
 15 | 
 16 |     #debugging
 17 |     for layer in mobilenet.layers:
 18 |         print(layer.name, layer.trainable)
 19 | 
 20 |     # Create the model
 21 |     model = models.Sequential()
 22 | 
 23 |     # Add the mobilenet convolutional base model
 24 |     model.add(mobilenet)
 25 | 
 26 |     # Add new layers
 27 |     model.add(layers.Flatten())
 28 |     model.add(layers.Dense(1024, activation='relu'))
 29 |     model.add(layers.Dropout(0.5))
 30 |     model.add(layers.Dense(6, activation='softmax'))
 31 | 
 32 |     return model
 33 | 
 34 | def train_model(model, img_width, img_height):
 35 |     train_data_dir = "C:/Users/hsone/Desktop/Extras/Projects/Distracted-Driver-Detection-master/Big_dataset/train"
 36 |     validation_data_dir = "C:/Users/hsone/Desktop/Extras/Projects/Distracted-Driver-Detection-master/Big_dataset/validation"
 37 | 
 38 |     train_datagen = ImageDataGenerator()
 39 |     test_datagen = ImageDataGenerator()
 40 | 
 41 |     train_batch_size = 56
 42 |     valid_batch_size = 8
 43 | 
 44 |     train_generator = train_datagen.flow_from_directory(directory=train_data_dir,
 45 |                                                         target_size=(img_height, img_width),
 46 |                                                         batch_size=train_batch_size,
 47 |                                                         class_mode="categorical",
 48 |                                                         shuffle=True)
 49 | 
 50 |     validation_generator = test_datagen.flow_from_directory(validation_data_dir,
 51 |                                                             target_size=(img_height, img_width),
 52 |                                                             batch_size=valid_batch_size,
 53 |                                                             class_mode="categorical",
 54 |                                                             shuffle=True)
 55 | 
 56 |     opt = Adam(learning_rate=0.001)
 57 |     model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
 58 | 
 59 |     # checkpoint will save the best weights
 60 |     checkpoint = ModelCheckpoint("model_weights.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='max')
 61 |     callbacks_list = [checkpoint]
 62 | 
 63 |     history = model.fit(
 64 |         train_generator,
 65 |         steps_per_epoch=train_generator.samples / train_generator.batch_size,
 66 |         epochs=1,
 67 |         validation_data=validation_generator,
 68 |         validation_steps=validation_generator.samples / validation_generator.batch_size,
 69 |         verbose=1,
 70 |         callbacks=callbacks_list
 71 |     )
 72 | 
 73 |     return history
 74 | 
 75 | def visualization(history):
 76 |     plt.figure(figsize=(20, 10))
 77 |     plt.subplot(1, 2, 1)
 78 |     plt.suptitle('Optimizer : Adam', fontsize=10)
 79 |     plt.ylabel('Loss', fontsize=16)
 80 |     plt.plot(history.history['loss'], label='Training Loss')
 81 |     plt.plot(history.history['val_loss'], label='Validation Loss')
 82 |     plt.legend(loc='upper right')
 83 | 
 84 |     plt.subplot(1, 2, 2)
 85 |     plt.ylabel('Accuracy', fontsize=16)
 86 |     plt.plot(history.history['accuracy'], label='Training Accuracy')
 87 |     plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
 88 |     plt.legend(loc='lower right')
 89 |     plt.show()
 90 | 
 91 | def main():
 92 |     img_width, img_height = 224, 224
 93 |     model = build_model(img_width, img_height)
 94 |     history = train_model(model, img_width, img_height)
 95 |     visualization(history)
 96 | 
 97 | if __name__ == '__main__':
 98 |     main()
 99 | 
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Distracted-Driver-Detection
 2 | 
 3 | Distracted Driver detection using Keras, MTCNN, OpenCV and Flask.
 4 | 
 5 | [![Watch the video](https://i.imgur.com/SyhvFfX.png)](https://youtu.be/EVk4aAk-l5Q)
 6 | 
 7 | The dataset used for this project was utilized from kaggle. You can find the original dataset available here : https://www.kaggle.com/c/state-farm-distracted-driver-detection
 8 | 
 9 | This dataset consists of thousands of images showing a variety of behaviors exhibited by drivers while driving. From this set, I selected a subset of behaviors which consisted of : Safe Driving, Texting, talking on phone, operating radio, reaching behind. The final size of dataset consisted of 12000 images.
10 | 
11 | Tools Used: Google Colab, Jupyter Notebook, Eclipse
12 | 
13 | Workflow:
14 | 
15 | 1. Trained MobilenetV2 model to recognize the distracted drivers.
16 | 
17 | 2. Used MTCNN (Multi-task Cascade Convolutional Neural Network) to detect profile face of humans in an image.
18 | Reference : https://github.com/ipazc/mtcnn
19 | 
20 | 3. After detection of human face in an image, predicted the probabilities of the behaviour in the frame using trained model weights.
21 | 
22 | 4. Deployed the model on flask to generate real time predictions. (Either live camera feed or upload a video)
23 | 
24 | 
25 | Files:
26 | 
27 | model.py : This class will give us the predictions of our previously trained model.
28 | 
29 | camera.py : This file implements a camera class that does the following operations: 
30 | 
31 | - Get the image stream from our input (Webcam feed or from video)
32 | - Detect faces with MTCNN and add bounding boxes
33 | - Rescale the images and send them to our trained deep learning model 
34 | - get the predictions back from our trained model and add the label to each frame and return the final image stream
35 | 
36 | main.py : Lastly, our main script will create a Flask app that will render our image predictions into a web page.
37 | 


--------------------------------------------------------------------------------
/camera.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from model import DriverBehaviourModel
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | import keras as k
 6 | from mtcnn.mtcnn import MTCNN
 7 | from PIL import Image
 8 | 
 9 | 
10 | session = tf.Session(graph=tf.Graph())
11 | with session.graph.as_default():
12 |     k.backend.set_session(session)
13 |     model = DriverBehaviourModel("model.json", "model_weights.h5")
14 | font = cv2.FONT_HERSHEY_SIMPLEX
15 | 
16 | class VideoCamera(object):
17 |     def __init__(self):
18 |     	#Arguement '0' takes feed from camera
19 |         self.video = cv2.VideoCapture(0)
20 | 
21 |     def __del__(self):
22 |         self.video.release()
23 | 
24 |     # returns camera frames along with bounding boxes and predictions
25 |     def get_frame(self):
26 |         _, fr = self.video.read()
27 |         pixels = np.asarray(fr)
28 |         detector = MTCNN()
29 |         box = detector.detect_faces(pixels)[0]['box']
30 |         fc = pixels[box[1]:box[1]+box[3],box[0]:box[0]+box[2]]
31 |         roi = cv2.resize(fc, (224, 224))
32 |         with session.graph.as_default():
33 |             k.backend.set_session(session)
34 |             pred = model.predict_emotion(roi[np.newaxis, :, :])
35 | 
36 |         cv2.putText(fr, pred, (box[0], box[1]), font, 2, (0 ,0, 255), 3)
37 |         cv2.rectangle(fr,(box[0], box[1]),(box[0]+box[2],box[1]+box[3]),(0,255,0),2)
38 | 
39 | 
40 |         _, jpeg = cv2.imencode('.jpg', fr)
41 |         return jpeg.tobytes()
42 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, Response
 2 | from camera import VideoCamera
 3 | 
 4 | app = Flask(__name__)
 5 | 
 6 | @app.route('/')
 7 | def index():
 8 |     return render_template('index.html')
 9 | 
10 | def gen(camera):
11 |     while True:
12 |         frame = camera.get_frame()
13 |         yield (b'--frame\r\n'
14 |                b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
15 | 
16 | @app.route('/video_feed')
17 | def video_feed():
18 |     return Response(gen(VideoCamera()),
19 |                     mimetype='multipart/x-mixed-replace; boundary=frame')
20 | 
21 | if __name__ == '__main__':
22 |     app.run(host='0.0.0.0', debug=False, threaded=True)


--------------------------------------------------------------------------------
/model.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Model", "config": {"name": "vgg16", "layers": [{"name": "input_1", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 224, 224, 3], "dtype": "float32", "sparse": false, "name": "input_1"}, "inbound_nodes": []}, {"name": "block1_conv1", "class_name": "Conv2D", "config": {"name": "block1_conv1", "trainable": false, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"name": "block1_conv2", "class_name": "Conv2D", "config": {"name": "block1_conv2", "trainable": false, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block1_conv1", 0, 0, {}]]]}, {"name": "block1_pool", "class_name": "MaxPooling2D", "config": {"name": "block1_pool", "trainable": false, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block1_conv2", 0, 0, {}]]]}, {"name": "block2_conv1", "class_name": "Conv2D", "config": {"name": "block2_conv1", "trainable": false, "dtype": "float32", "filters": 128, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block1_pool", 0, 0, {}]]]}, {"name": "block2_conv2", "class_name": "Conv2D", "config": {"name": "block2_conv2", "trainable": false, "dtype": "float32", "filters": 128, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block2_conv1", 0, 0, {}]]]}, {"name": "block2_pool", "class_name": "MaxPooling2D", "config": {"name": "block2_pool", "trainable": false, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block2_conv2", 0, 0, {}]]]}, {"name": "block3_conv1", "class_name": "Conv2D", "config": {"name": "block3_conv1", "trainable": false, "dtype": "float32", "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block2_pool", 0, 0, {}]]]}, {"name": "block3_conv2", "class_name": "Conv2D", "config": {"name": "block3_conv2", "trainable": false, "dtype": "float32", "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block3_conv1", 0, 0, {}]]]}, {"name": "block3_conv3", "class_name": "Conv2D", "config": {"name": "block3_conv3", "trainable": false, "dtype": "float32", "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block3_conv2", 0, 0, {}]]]}, {"name": "block3_pool", "class_name": "MaxPooling2D", "config": {"name": "block3_pool", "trainable": false, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block3_conv3", 0, 0, {}]]]}, {"name": "block4_conv1", "class_name": "Conv2D", "config": {"name": "block4_conv1", "trainable": false, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block3_pool", 0, 0, {}]]]}, {"name": "block4_conv2", "class_name": "Conv2D", "config": {"name": "block4_conv2", "trainable": false, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block4_conv1", 0, 0, {}]]]}, {"name": "block4_conv3", "class_name": "Conv2D", "config": {"name": "block4_conv3", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block4_conv2", 0, 0, {}]]]}, {"name": "block4_pool", "class_name": "MaxPooling2D", "config": {"name": "block4_pool", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block4_conv3", 0, 0, {}]]]}, {"name": "block5_conv1", "class_name": "Conv2D", "config": {"name": "block5_conv1", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block4_pool", 0, 0, {}]]]}, {"name": "block5_conv2", "class_name": "Conv2D", "config": {"name": "block5_conv2", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block5_conv1", 0, 0, {}]]]}, {"name": "block5_conv3", "class_name": "Conv2D", "config": {"name": "block5_conv3", "trainable": true, "dtype": "float32", "filters": 512, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["block5_conv2", 0, 0, {}]]]}, {"name": "block5_pool", "class_name": "MaxPooling2D", "config": {"name": "block5_pool", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "inbound_nodes": [[["block5_conv3", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["block5_pool", 0, 0]]}}, {"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1024, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 6, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | from keras.models import model_from_json
 2 | import numpy as np
 3 | from tensorflow import keras
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | import time
 7 | 
 8 | class DriverBehaviourModel(object):
 9 | 
10 |     Behaviour_LIST = ['safe driving',
11 |                      'texting',
12 |                      'texting',
13 |                      'talking on phone',
14 |                      'operating the radio',
15 |                      'reaching behind']
16 | 
17 |     def __init__(self, model_json_file, model_weights_file):
18 |         # load model from JSON file
19 |         with open(model_json_file, "r") as json_file:
20 |             loaded_model_json = json_file.read()
21 |             self.loaded_model = model_from_json(loaded_model_json)
22 | 
23 |         # load weights into the new model
24 |         self.loaded_model.load_weights(model_weights_file)
25 |         self.loaded_model._make_predict_function()
26 | 
27 |     def predict_emotion(self, img):
28 |             self.preds = self.loaded_model.predict(img)
29 |             return DriverBehaviourModel.Behaviour_LIST[np.argmax(self.preds)]


--------------------------------------------------------------------------------