├── .gitignore ├── README.md ├── capture_img.ipynb ├── deepfake_detection_train.ipynb └── model_play.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .gitignore 2 | sample_submission.csv 3 | deepfake-detection-model.h5 4 | dataset/ 5 | test_videos/ 6 | train_sample_videos/ 7 | .ipynb_checkpoints/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deepfake_detection 2 | 3 | 4 | Detect the video is fake or not using InceptionResNetV2. 5 | 6 | 7 | The dataset was downloaded from kaggle deepfake detection challenge: https://www.kaggle.com/c/deepfake-detection-challenge/data 8 | 9 | Experimental steps 10 | 11 | Step1. 12 | run capture_img -> use the dlib face detecter to convert video to face img. 13 | 14 | Step2. 15 | run deepfake_detection_train -> training the dataset from capture_img and output the model file. 16 | 17 | Step3. 18 | run model_play -> load model then input video to detect the video is fake or not. [0]: fake, [1]: real 19 | -------------------------------------------------------------------------------- /capture_img.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import dlib\n", 10 | "import cv2\n", 11 | "import os\n", 12 | "import re\n", 13 | "import json\n", 14 | "from pylab import *\n", 15 | "from PIL import Image, ImageChops, ImageEnhance" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "train_frame_folder = 'train_sample_videos'\n", 25 | "with open(os.path.join(train_frame_folder, 'metadata.json'), 'r') as file:\n", 26 | " data = json.load(file)\n", 27 | "list_of_train_data = [f for f in os.listdir(train_frame_folder) if f.endswith('.mp4')]\n", 28 | "detector = dlib.get_frontal_face_detector()\n", 29 | "for vid in list_of_train_data:\n", 30 | " count = 0\n", 31 | " cap = cv2.VideoCapture(os.path.join(train_frame_folder, vid))\n", 32 | " frameRate = cap.get(5)\n", 33 | " while cap.isOpened():\n", 34 | " frameId = cap.get(1)\n", 35 | " ret, frame = cap.read()\n", 36 | " if ret != True:\n", 37 | " break\n", 38 | " if frameId % ((int(frameRate)+1)*1) == 0:\n", 39 | " face_rects, scores, idx = detector.run(frame, 0)\n", 40 | " for i, d in enumerate(face_rects):\n", 41 | " x1 = d.left()\n", 42 | " y1 = d.top()\n", 43 | " x2 = d.right()\n", 44 | " y2 = d.bottom()\n", 45 | " crop_img = frame[y1:y2, x1:x2]\n", 46 | " if data[vid]['label'] == 'REAL':\n", 47 | " cv2.imwrite('dataset/real/'+vid.split('.')[0]+'_'+str(count)+'.png', cv2.resize(crop_img, (128, 128)))\n", 48 | " elif data[vid]['label'] == 'FAKE':\n", 49 | " cv2.imwrite('dataset/fake/'+vid.split('.')[0]+'_'+str(count)+'.png', cv2.resize(crop_img, (128, 128)))\n", 50 | " count+=1" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Python 3", 57 | "language": "python", 58 | "name": "python3" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.7.4" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 4 75 | } 76 | -------------------------------------------------------------------------------- /deepfake_detection_train.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import cv2\n", 11 | "import json\n", 12 | "import tensorflow as tf\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sn\n", 16 | "import pandas as pd\n", 17 | "from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img\n", 18 | "from tensorflow.keras.utils import to_categorical\n", 19 | "from sklearn.model_selection import train_test_split\n", 20 | "from sklearn.metrics import confusion_matrix" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "input_shape = (128, 128, 3)\n", 30 | "data_dir = 'dataset'\n", 31 | "\n", 32 | "real_data = [f for f in os.listdir(data_dir+'/real') if f.endswith('.png')]\n", 33 | "fake_data = [f for f in os.listdir(data_dir+'/fake') if f.endswith('.png')]\n", 34 | "\n", 35 | "X = []\n", 36 | "Y = []\n", 37 | "\n", 38 | "for img in real_data:\n", 39 | " X.append(img_to_array(load_img(data_dir+'/real/'+img)).flatten() / 255.0)\n", 40 | " Y.append(1)\n", 41 | "for img in fake_data:\n", 42 | " X.append(img_to_array(load_img(data_dir+'/fake/'+img)).flatten() / 255.0)\n", 43 | " Y.append(0)\n", 44 | "\n", 45 | "Y_val_org = Y\n", 46 | "\n", 47 | "#Normalization\n", 48 | "X = np.array(X)\n", 49 | "Y = to_categorical(Y, 2)\n", 50 | "\n", 51 | "#Reshape\n", 52 | "X = X.reshape(-1, 128, 128, 3)\n", 53 | "\n", 54 | "#Train-Test split\n", 55 | "X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.2, random_state=5)\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "Model: \"sequential\"\n", 68 | "_________________________________________________________________\n", 69 | "Layer (type) Output Shape Param # \n", 70 | "=================================================================\n", 71 | "inception_resnet_v2 (Model) (None, 2, 2, 1536) 54336736 \n", 72 | "_________________________________________________________________\n", 73 | "global_average_pooling2d (Gl (None, 1536) 0 \n", 74 | "_________________________________________________________________\n", 75 | "dense (Dense) (None, 2) 3074 \n", 76 | "=================================================================\n", 77 | "Total params: 54,339,810\n", 78 | "Trainable params: 54,279,266\n", 79 | "Non-trainable params: 60,544\n", 80 | "_________________________________________________________________\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "from tensorflow.keras.applications import InceptionResNetV2\n", 86 | "from tensorflow.keras.layers import Conv2D\n", 87 | "from tensorflow.keras.layers import MaxPooling2D\n", 88 | "from tensorflow.keras.layers import Flatten\n", 89 | "from tensorflow.keras.layers import Dense\n", 90 | "from tensorflow.keras.layers import Dropout\n", 91 | "from tensorflow.keras.layers import InputLayer\n", 92 | "from tensorflow.keras.layers import GlobalAveragePooling2D\n", 93 | "from tensorflow.keras.models import Sequential\n", 94 | "from tensorflow.keras.models import Model\n", 95 | "from tensorflow.keras import optimizers\n", 96 | "from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping\n", 97 | "\n", 98 | "googleNet_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=input_shape)\n", 99 | "googleNet_model.trainable = True\n", 100 | "model = Sequential()\n", 101 | "model.add(googleNet_model)\n", 102 | "model.add(GlobalAveragePooling2D())\n", 103 | "model.add(Dense(units=2, activation='softmax'))\n", 104 | "model.compile(loss='binary_crossentropy',\n", 105 | " optimizer=optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),\n", 106 | " metrics=['accuracy'])\n", 107 | "model.summary()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "Train on 2996 samples, validate on 749 samples\n", 120 | "Epoch 1/20\n", 121 | "2996/2996 [==============================] - 57s 19ms/sample - loss: 0.5870 - accuracy: 0.7196 - val_loss: 0.5837 - val_accuracy: 0.7690\n", 122 | "Epoch 2/20\n", 123 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.4450 - accuracy: 0.8508 - val_loss: 0.5882 - val_accuracy: 0.7704\n", 124 | "Epoch 3/20\n", 125 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.3490 - accuracy: 0.9035 - val_loss: 0.5590 - val_accuracy: 0.7864\n", 126 | "Epoch 4/20\n", 127 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.2736 - accuracy: 0.9346 - val_loss: 0.4907 - val_accuracy: 0.8238\n", 128 | "Epoch 5/20\n", 129 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.2148 - accuracy: 0.9513 - val_loss: 0.4456 - val_accuracy: 0.8518\n", 130 | "Epoch 6/20\n", 131 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.1752 - accuracy: 0.9636 - val_loss: 0.4021 - val_accuracy: 0.8678\n", 132 | "Epoch 7/20\n", 133 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.1363 - accuracy: 0.9760 - val_loss: 0.3899 - val_accuracy: 0.8625\n", 134 | "Epoch 8/20\n", 135 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.1041 - accuracy: 0.9813 - val_loss: 0.3871 - val_accuracy: 0.8598\n", 136 | "Epoch 9/20\n", 137 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0867 - accuracy: 0.9863 - val_loss: 0.3565 - val_accuracy: 0.8852\n", 138 | "Epoch 10/20\n", 139 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0662 - accuracy: 0.9893 - val_loss: 0.3455 - val_accuracy: 0.8959\n", 140 | "Epoch 11/20\n", 141 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0615 - accuracy: 0.9877 - val_loss: 0.3213 - val_accuracy: 0.9092\n", 142 | "Epoch 12/20\n", 143 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0474 - accuracy: 0.9903 - val_loss: 0.3126 - val_accuracy: 0.9012\n", 144 | "Epoch 13/20\n", 145 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0401 - accuracy: 0.9917 - val_loss: 0.3023 - val_accuracy: 0.9105\n", 146 | "Epoch 14/20\n", 147 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0342 - accuracy: 0.9927 - val_loss: 0.2971 - val_accuracy: 0.9146\n", 148 | "Epoch 15/20\n", 149 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0292 - accuracy: 0.9950 - val_loss: 0.2920 - val_accuracy: 0.9172\n", 150 | "Epoch 16/20\n", 151 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0261 - accuracy: 0.9957 - val_loss: 0.3000 - val_accuracy: 0.9159\n", 152 | "Epoch 17/20\n", 153 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0233 - accuracy: 0.9963 - val_loss: 0.3005 - val_accuracy: 0.9079\n", 154 | "Epoch 18/20\n", 155 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0190 - accuracy: 0.9963 - val_loss: 0.3069 - val_accuracy: 0.9092\n", 156 | "Epoch 19/20\n", 157 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0171 - accuracy: 0.9963 - val_loss: 0.2977 - val_accuracy: 0.9146\n", 158 | "Epoch 20/20\n", 159 | "2996/2996 [==============================] - 21s 7ms/sample - loss: 0.0142 - accuracy: 0.9973 - val_loss: 0.2997 - val_accuracy: 0.9172\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "#Currently not used\n", 165 | "early_stopping = EarlyStopping(monitor='val_loss',\n", 166 | " min_delta=0,\n", 167 | " patience=2,\n", 168 | " verbose=0, mode='auto')\n", 169 | "EPOCHS = 20\n", 170 | "BATCH_SIZE = 100\n", 171 | "history = model.fit(X_train, Y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, validation_data = (X_val, Y_val), verbose = 1)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 5, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "image/png": "\n", 182 | "text/plain": [ 183 | "
" 184 | ] 185 | }, 186 | "metadata": { 187 | "needs_background": "light" 188 | }, 189 | "output_type": "display_data" 190 | } 191 | ], 192 | "source": [ 193 | "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 4))\n", 194 | "t = f.suptitle('Pre-trained InceptionResNetV2 Transfer Learn with Fine-Tuning & Image Augmentation Performance ', fontsize=12)\n", 195 | "f.subplots_adjust(top=0.85, wspace=0.3)\n", 196 | "\n", 197 | "epoch_list = list(range(1,EPOCHS+1))\n", 198 | "ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')\n", 199 | "ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')\n", 200 | "ax1.set_xticks(np.arange(0, EPOCHS+1, 1))\n", 201 | "ax1.set_ylabel('Accuracy Value')\n", 202 | "ax1.set_xlabel('Epoch #')\n", 203 | "ax1.set_title('Accuracy')\n", 204 | "l1 = ax1.legend(loc=\"best\")\n", 205 | "\n", 206 | "ax2.plot(epoch_list, history.history['loss'], label='Train Loss')\n", 207 | "ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')\n", 208 | "ax2.set_xticks(np.arange(0, EPOCHS+1, 1))\n", 209 | "ax2.set_ylabel('Loss Value')\n", 210 | "ax2.set_xlabel('Epoch #')\n", 211 | "ax2.set_title('Loss')\n", 212 | "l2 = ax2.legend(loc=\"best\")" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 6, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "True positive = 2949\n", 225 | "False positive = 37\n", 226 | "False negative = 42\n", 227 | "True negative = 717\n", 228 | "\n", 229 | "\n" 230 | ] 231 | }, 232 | { 233 | "data": { 234 | "image/png": "\n", 235 | "text/plain": [ 236 | "
" 237 | ] 238 | }, 239 | "metadata": {}, 240 | "output_type": "display_data" 241 | } 242 | ], 243 | "source": [ 244 | "#Output confusion matrix\n", 245 | "def print_confusion_matrix(y_true, y_pred):\n", 246 | " cm = confusion_matrix(y_true, y_pred)\n", 247 | " print('True positive = ', cm[0][0])\n", 248 | " print('False positive = ', cm[0][1])\n", 249 | " print('False negative = ', cm[1][0])\n", 250 | " print('True negative = ', cm[1][1])\n", 251 | " print('\\n')\n", 252 | " df_cm = pd.DataFrame(cm, range(2), range(2))\n", 253 | " sn.set(font_scale=1.4) # for label size\n", 254 | " sn.heatmap(df_cm, annot=True, annot_kws={\"size\": 16}) # font size\n", 255 | " plt.ylabel('Actual label', size = 20)\n", 256 | " plt.xlabel('Predicted label', size = 20)\n", 257 | " plt.xticks(np.arange(2), ['Fake', 'Real'], size = 16)\n", 258 | " plt.yticks(np.arange(2), ['Fake', 'Real'], size = 16)\n", 259 | " plt.ylim([2, 0])\n", 260 | " plt.show()\n", 261 | " \n", 262 | "print_confusion_matrix(Y_val_org, model.predict_classes(X))" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 7, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "model.save('deepfake-detection-model.h5')" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [] 280 | } 281 | ], 282 | "metadata": { 283 | "kernelspec": { 284 | "display_name": "Python [conda env:root] *", 285 | "language": "python", 286 | "name": "conda-root-py" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 3 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython3", 298 | "version": "3.7.4" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 4 303 | } 304 | -------------------------------------------------------------------------------- /model_play.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf\n", 10 | "import dlib\n", 11 | "import cv2\n", 12 | "import os\n", 13 | "import numpy as np\n", 14 | "from PIL import Image, ImageChops, ImageEnhance\n", 15 | "from tensorflow.keras.models import load_model\n", 16 | "from tensorflow.keras.preprocessing.image import img_to_array, load_img" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "'2.0.0'" 28 | ] 29 | }, 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "tf.__version__" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "model = load_model('deepfake-detection-model.h5')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "[1]\n", 58 | "[1]\n", 59 | "[1]\n", 60 | "[1]\n", 61 | "[1]\n", 62 | "[1]\n", 63 | "[1]\n", 64 | "[1]\n", 65 | "[1]\n", 66 | "[1]\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "input_shape = (128, 128, 3)\n", 72 | "pr_data = []\n", 73 | "detector = dlib.get_frontal_face_detector()\n", 74 | "cap = cv2.VideoCapture('test_videos/jzmzdispyo.mp4')\n", 75 | "frameRate = cap.get(5)\n", 76 | "while cap.isOpened():\n", 77 | " frameId = cap.get(1)\n", 78 | " ret, frame = cap.read()\n", 79 | " if ret != True:\n", 80 | " break\n", 81 | " if frameId % ((int(frameRate)+1)*1) == 0:\n", 82 | " face_rects, scores, idx = detector.run(frame, 0)\n", 83 | " for i, d in enumerate(face_rects):\n", 84 | " x1 = d.left()\n", 85 | " y1 = d.top()\n", 86 | " x2 = d.right()\n", 87 | " y2 = d.bottom()\n", 88 | " crop_img = frame[y1:y2, x1:x2]\n", 89 | " data = img_to_array(cv2.resize(crop_img, (128, 128))).flatten() / 255.0\n", 90 | " data = data.reshape(-1, 128, 128, 3)\n", 91 | " print(model.predict_classes(data))" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python [conda env:root] *", 105 | "language": "python", 106 | "name": "conda-root-py" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.7.4" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 4 123 | } 124 | --------------------------------------------------------------------------------