├── .ipynb_checkpoints
    └── Untitled-checkpoint.ipynb
├── .vscode
    └── settings.json
├── DetectionToolKit.py
├── FaceToolKit.py
├── IC_checkpoints.keras
├── IC_logs
    ├── events.out.tfevents.1552908242.ghost-pc
    └── events.out.tfevents.1554734574.ghost-pc
├── MAIN_RUN.py
├── README.md
├── Sample Videos
    ├── lol1.mp4
    ├── lol2.mp4
    ├── lol3.avi
    ├── lol4.mp4
    └── test.mp4
├── Untitled.ipynb
├── __pycache__
    ├── DetectionToolKit.cpython-35.pyc
    ├── DetectionToolKit.cpython-36.pyc
    ├── FaceToolKit.cpython-35.pyc
    ├── FaceToolKit.cpython-36.pyc
    ├── cache.cpython-35.pyc
    ├── cache.cpython-36.pyc
    ├── caption_tune.cpython-35.pyc
    ├── caption_tune.cpython-36.pyc
    ├── coco.cpython-35.pyc
    ├── coco.cpython-36.pyc
    ├── download.cpython-35.pyc
    ├── download.cpython-36.pyc
    ├── f_part.cpython-36.pyc
    ├── faceadd.cpython-36.pyc
    ├── fr_utils.cpython-36.pyc
    ├── gensound.cpython-35.pyc
    ├── gensound.cpython-36.pyc
    ├── inception_blocks_v2.cpython-36.pyc
    └── p_part.cpython-36.pyc
├── cache.py
├── caption_tune.py
├── coco.py
├── d2.mp4
├── detection
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-35.pyc
    │   └── __init__.cpython-36.pyc
    └── mtcnn
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-35.pyc
    │       ├── __init__.cpython-36.pyc
    │       ├── detect_face.cpython-35.pyc
    │       └── detect_face.cpython-36.pyc
    │   ├── det1.npy
    │   ├── det2.npy
    │   ├── det3.npy
    │   └── detect_face.py
├── digivision.py
├── digivision2.py
├── download.py
├── f_part.py
├── faceadd.py
├── facenet
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-35.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── face.cpython-35.pyc
    │   └── face.cpython-36.pyc
    └── face.py
├── gensound.py
├── gensoundgtts.py
├── haarcascade_frontalface_default.xml
├── images
    ├── Andrew.jpg
    ├── Capture.JPG
    ├── Capture1.JPG
    ├── Capture2.JPG
    └── andrew.jpg
├── models
    └── 20180204-160909
    │   ├── 20180204-16090.pb
    │   ├── checkpoint
    │   ├── model-20180204-160909.ckpt-264000.data-00000-of-00001
    │   ├── model-20180204-160909.ckpt-264000.index
    │   ├── model-20180204-160909.ckpt-265000.data-00000-of-00001
    │   ├── model-20180204-160909.ckpt-265000.index
    │   ├── model-20180204-160909.ckpt-266000.data-00000-of-00001
    │   ├── model-20180204-160909.ckpt-266000.index
    │   └── model-20180204-160909.meta
└── p_part.py


/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "- Data loaded from cache-file: C:\\Users\\User\\Desktop\\image-cap\\data\\coco\\records_train.pkl\n",
 13 |       "- Data loaded from cache-file: C:\\Users\\User\\Desktop\\image-cap\\data\\coco\\records_val.pkl\n",
 14 |       "Processing 118287 images in training-set. \n",
 15 |       "- Data loaded from cache-file: C:\\Users\\User\\Desktop\\image-cap\\data\\coco\\transfer_values_train.pkl\n",
 16 |       "Processing 5000 images in validation-set. \n",
 17 |       "- Data loaded from cache-file: C:\\Users\\User\\Desktop\\image-cap\\data\\coco\\transfer_values_val.pkl\n",
 18 |       "Model directory: ./models/20180204-160909/\n",
 19 |       "Metagraph file: model-20180204-160909.meta\n",
 20 |       "Checkpoint file: model-20180204-160909.ckpt-266000\n",
 21 |       "WARNING:tensorflow:The saved meta_graph is possibly from an older release:\n",
 22 |       "'model_variables' collection should be of type 'byte_list', but instead is of type 'node_list'.\n",
 23 |       "INFO:tensorflow:Restoring parameters from ./models/20180204-160909/model-20180204-160909.ckpt-266000\n"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "import cv2 as cv\n",
 29 |     "import warnings\n",
 30 |     "warnings.filterwarnings(\"ignore\")\n",
 31 |     "import p_part\n",
 32 |     "import f_part\n",
 33 |     "from caption_tune import modcap, face_found_cap, face_not_found_cap\n",
 34 |     "from gensound import generate_sound\n",
 35 |     "import tkinter as tk\n"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "\n",
 45 |     "def saveface():\n",
 46 |     " #   generate_sound(\"Tell me the name\")\n",
 47 |     "    x1 = speech(\"Tell me the name\")\n",
 48 |     "    print(x1 + ' face saved')\n",
 49 |     "    cv.imwrite(r\"images//\" +\n",
 50 |     "               str(x1) + \".jpg\", save)\n",
 51 |     "    data = {x1: f_part.img_to_encoding(\n",
 52 |     "        \"images//\" + str(x1) + \".jpg\").tolist()}\n",
 53 |     "    f_part.digi_db.insert_one(data)\n",
 54 |     "\n",
 55 |     "\n",
 56 |     "def ignoreface():\n",
 57 |     "    generate_sound(\"Not saved\")"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 16,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "0.17878464559657153,Tanmay\n",
 70 |       "known: 1\n",
 71 |       "i=0\n",
 72 |       "Tanmay at dist of: 0.17878464559657153\n",
 73 |       "0.6959873898612923,unknown\n",
 74 |       "Pls say something....\n",
 75 |       "Google Audio:no\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "%reload_ext autoreload\n",
 81 |     "%autoreload 2\n",
 82 |     "from faceadd import addn,speech\n",
 83 |     "cap = cv.VideoCapture(0)\n",
 84 |     "\n",
 85 |     "while True:\n",
 86 |     "    ret, frame = cap.read()\n",
 87 |     "    facedetect = cv.CascadeClassifier(r'haarcascade_frontalface_default.xml')\n",
 88 |     "    if ret:\n",
 89 |     "        # font = cv.FONT_HERSHEY_SIMPLEX\n",
 90 |     "        cv.imshow(\"Video\", frame)\n",
 91 |     "\n",
 92 |     "        if cv.waitKey(1) == ord('p'):\n",
 93 |     "\n",
 94 |     "            cv.imwrite('./test.jpg', frame)\n",
 95 |     "            final_caption = p_part.generate_caption(\n",
 96 |     "                './test.jpg')  # create caption\n",
 97 |     "            final_caption = modcap(final_caption)  # remove tags\n",
 98 |     "            print(final_caption)\n",
 99 |     "            generate_sound(final_caption)  # convert to audio\n",
100 |     "\n",
101 |     "        if cv.waitKey(1) == ord('f'):\n",
102 |     "            gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)\n",
103 |     "            faces = facedetect.detectMultiScale(gray, 1.3, 5)\n",
104 |     "            cv.imwrite('./test.jpg', frame)\n",
105 |     "            known_detected = 0\n",
106 |     "            unknown_detected = 0\n",
107 |     "            known_face_list = []\n",
108 |     "            known_face_dist = []\n",
109 |     "            try:\n",
110 |     "                for x, y, w, h in faces:\n",
111 |     "                    #cv2.imwrite(\"dset//User.\"+str(user)+\".\"+str(sample)+\".jpg\",gray[y:y+h,x:x+w])\n",
112 |     "                    save = frame[y:y+h, x:x+w]\n",
113 |     "                    cv.imwrite('./test.jpg', save)\n",
114 |     "                    dis, name = f_part.who_is_it('./test.jpg')\n",
115 |     "                    print(str(dis)+\",\"+name)\n",
116 |     "                    if name != 'unknown':\n",
117 |     "                        known_face_list.append(name)\n",
118 |     "                        known_face_dist.append(dis)\n",
119 |     "                        known_detected += 1\n",
120 |     "\n",
121 |     "                    else:\n",
122 |     "                        unknown_detected += 1\n",
123 |     "\n",
124 |     "                if known_detected > 0:\n",
125 |     "                    print(\"known: \" + str(known_detected))\n",
126 |     "                    for i in range(known_detected):\n",
127 |     "                        print('i=' + str(i))\n",
128 |     "                        print(\n",
129 |     "                            known_face_list[i] + \" at dist of: \" + str(known_face_dist[i]))\n",
130 |     "                        temp = face_found_cap(str(known_face_list[i]))\n",
131 |     "                        generate_sound(temp)\n",
132 |     "                elif unknown_detected == 1:\n",
133 |     "                    temp = face_not_found_cap()\n",
134 |     "                    generate_sound(temp)\n",
135 |     "                    generate_sound(\"Do you want to add this face in your database\")\n",
136 |     "                    addn(save)\n",
137 |     "\n",
138 |     "                elif known_detected == 0 and unknown_detected == 0:\n",
139 |     "                    print(\"No person found\")\n",
140 |     "                    generate_sound(\"No person found!\")\n",
141 |     "\n",
142 |     "                else:\n",
143 |     "                    print(\"Too many people\")\n",
144 |     "                    generate_sound(\"Too many people.\")\n",
145 |     "            except Exception as e:\n",
146 |     "                generate_sound(\"No recognisable face found!\")\n",
147 |     "                print(e)\n",
148 |     "\n",
149 |     "        if cv.waitKey(1) & 0xFF == 27:  # ASCII for Esc Key\n",
150 |     "            break\n",
151 |     "    else:\n",
152 |     "        break\n",
153 |     "cap.release()\n",
154 |     "cv.destroyAllWindows()\n"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 9,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "cap.release()\n",
164 |     "cv.destroyAllWindows()"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 6,
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "name": "stdout",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "100,unknown\n",
177 |       "Pls say something....\n",
178 |       "Google Audio:yes\n",
179 |       "Pls say something....\n",
180 |       "Google Audio:Mayank\n",
181 |       "Mayank face saved\n",
182 |       "0.0,Mayank\n",
183 |       "known: 1\n",
184 |       "i=0\n",
185 |       "Mayank at dist of: 0.0\n",
186 |       "a person is there in front of you\n"
187 |      ]
188 |     }
189 |    ],
190 |    "source": [
191 |     "'''This part is for testing only\n",
192 |     "I repeat this part is only for testing'''\n",
193 |     "\n",
194 |     "\n",
195 |     "\n",
196 |     "\n",
197 |     "facedetect = cv.CascadeClassifier(r'haarcascade_frontalface_default.xml')\n",
198 |     "frame = cv.imread(r'C:\\Users\\User\\Desktop\\projects\\face-recognition-attendance-system-master\\training-data\\s2\\13.jpg')\n",
199 |     "        # font = cv.FONT_HERSHEY_SIMPLEX\n",
200 |     "while True:\n",
201 |     "    cv.imshow(\"Video\", frame)\n",
202 |     "    if cv.waitKey(0) == ord('p'):\n",
203 |     "\n",
204 |     "        cv.imwrite('./test.jpg', frame)\n",
205 |     "        final_caption = p_part.generate_caption(\n",
206 |     "            './test.jpg')  # create caption\n",
207 |     "        final_caption = modcap(final_caption)  # remove tags\n",
208 |     "        print(final_caption)\n",
209 |     "        generate_sound(final_caption)  # convert to audio\n",
210 |     "\n",
211 |     "    if cv.waitKey(0) == ord('f'):\n",
212 |     "        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)\n",
213 |     "        faces = facedetect.detectMultiScale(gray, 1.3, 5)\n",
214 |     "        cv.imwrite('./test.jpg', frame)\n",
215 |     "        known_detected = 0\n",
216 |     "        unknown_detected = 0\n",
217 |     "        known_face_list = []\n",
218 |     "        known_face_dist = []\n",
219 |     "        try:\n",
220 |     "            for x, y, w, h in faces:\n",
221 |     "                #cv2.imwrite(\"dset//User.\"+str(user)+\".\"+str(sample)+\".jpg\",gray[y:y+h,x:x+w])\n",
222 |     "                save = frame[y:y+h, x:x+w]\n",
223 |     "                cv.imwrite('./test.jpg', save)\n",
224 |     "                dis, name = f_part.who_is_it('./test.jpg')\n",
225 |     "                print(str(dis)+\",\"+name)\n",
226 |     "                if name != 'unknown':\n",
227 |     "                    known_face_list.append(name)\n",
228 |     "                    known_face_dist.append(dis)\n",
229 |     "                    known_detected += 1\n",
230 |     "\n",
231 |     "                else:\n",
232 |     "                    unknown_detected += 1\n",
233 |     "\n",
234 |     "            if known_detected > 0:\n",
235 |     "                print(\"known: \" + str(known_detected))\n",
236 |     "                for i in range(known_detected):\n",
237 |     "                    print('i=' + str(i))\n",
238 |     "                    print(\n",
239 |     "                        known_face_list[i] + \" at dist of: \" + str(known_face_dist[i]))\n",
240 |     "                    temp = face_found_cap(str(known_face_list[i]))\n",
241 |     "                    generate_sound(temp)\n",
242 |     "            elif unknown_detected == 1:\n",
243 |     "                temp = face_not_found_cap()\n",
244 |     "                generate_sound(temp)\n",
245 |     "                generate_sound(\"Do you want to add this face in your database\")\n",
246 |     "                addn(save)\n",
247 |     "\n",
248 |     "            elif known_detected == 0 and unknown_detected == 0:\n",
249 |     "                print(\"No person found\")\n",
250 |     "                generate_sound(\"No person found!\")\n",
251 |     "\n",
252 |     "            else:\n",
253 |     "                print(\"Too many people\")\n",
254 |     "                generate_sound(\"Too many people.\")\n",
255 |     "        except Exception as e:\n",
256 |     "            generate_sound(\"No recognisable face found!\")\n",
257 |     "            print(e)\n",
258 |     "\n",
259 |     "    if cv.waitKey(0) & 0xFF == 27:\n",
260 |     "        break# ASCII for Esc Key\n",
261 |     "cv.destroyAllWindows()\n"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 10,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "# addn()"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 15,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "%reload_ext autoreload\n",
280 |     "%autoreload 2\n",
281 |     "from faceadd import addn,speech"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "   root = tk.Tk()\n",
291 |     "                \n",
292 |     "                large_font = ('Times New Roman', 14)\n",
293 |     "\n",
294 |     "                canvas1 = tk.Canvas(root, width=300, height=200)\n",
295 |     "                canvas1.pack()\n",
296 |     "                label = tk.Label(root, text='Enter the Name')\n",
297 |     "                canvas1.create_window(140, 50, window=label)\n",
298 |     "                entry1Var = tk.StringVar(value='')\n",
299 |     "                entry1 = tk.Entry(\n",
300 |     "                    root, textvariable=entry1Var, font=large_font)\n",
301 |     "                canvas1.create_window(150, 90, window=entry1)\n",
302 |     "                button1 = tk.Button(text='SAVE', command=saveface)\n",
303 |     "                button2 = tk.Button(text='IGNORE', command=ignoreface)\n",
304 |     "                canvas1.create_window(100, 150, window=button1)\n",
305 |     "                canvas1.create_window(180, 150, window=button2)\n",
306 |     "\n",
307 |     "                root.mainloop()"
308 |    ]
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "Python 3",
314 |    "language": "python",
315 |    "name": "python3"
316 |   },
317 |   "language_info": {
318 |    "codemirror_mode": {
319 |     "name": "ipython",
320 |     "version": 3
321 |    },
322 |    "file_extension": ".py",
323 |    "mimetype": "text/x-python",
324 |    "name": "python",
325 |    "nbconvert_exporter": "python",
326 |    "pygments_lexer": "ipython3",
327 |    "version": "3.6.6"
328 |   }
329 |  },
330 |  "nbformat": 4,
331 |  "nbformat_minor": 2
332 | }
333 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "/home/darkghost/anaconda3/envs/pro/bin/python"
3 | }


--------------------------------------------------------------------------------
/DetectionToolKit.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from detection.mtcnn import detect_face
 4 | from scipy import misc
 5 | 
 6 | default_color = (0, 255, 0) #BGR
 7 | default_thickness = 2
 8 | minsize = 20 # minimum size of face
 9 | threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
10 | factor = 0.709 # scale factor
11 | 
12 | margin = 44
13 | image_size = 160
14 | 
15 | class Detection:
16 |     def __init__(self):
17 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
18 |         self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
19 |         self.pnet, self.rnet, self.onet = detect_face.create_mtcnn(self.session, None)
20 | 
21 |     def detect(self, img, detect_multiple_faces = True):
22 |         bboxes = []
23 |         bounding_boxes, points = detect_face.detect_face(
24 |                 img, minsize, self.pnet, self.rnet, self.onet, threshold, factor)
25 |         nrof_faces = bounding_boxes.shape[0]
26 |         if nrof_faces > 0:
27 |             det = bounding_boxes[:, 0:4]
28 |             det_arr = []
29 |             img_size = np.asarray(img.shape)[0:2]
30 |             if nrof_faces > 1:
31 |                 if detect_multiple_faces:
32 |                     for i in range(nrof_faces):
33 |                         det_arr.append(np.squeeze(det[i]))
34 |                 else:
35 |                     bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
36 |                     img_center = img_size / 2
37 |                     offsets = np.vstack(
38 |                         [(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
39 |                     offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
40 |                     index = np.argmax(
41 |                         bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
42 |                     det_arr.append(det[index, :])
43 |             else:
44 |                 det_arr.append(np.squeeze(det))
45 |             for i, det in enumerate(det_arr):
46 |                 det = np.squeeze(det)
47 |                 bb = np.zeros(4, dtype=np.int32)
48 |                 bb[0] = np.maximum(det[0] - margin / 2, 0)
49 |                 bb[1] = np.maximum(det[1] - margin / 2, 0)
50 |                 bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
51 |                 bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
52 |                 bboxes.append(bb)
53 |         return bboxes
54 |             
55 | 
56 | 
57 |     def align(self, img, detect_multiple_faces = True):
58 |         faces = []
59 |         bboxes = self.detect(img,False)         
60 |         for bb in bboxes:
61 |             cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
62 |             scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
63 |             faces.append(scaled)
64 |         return faces
65 |     
66 |     def crop_detected_face(self, img, bb):
67 |         cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
68 |         scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
69 |         return scaled
70 | 


--------------------------------------------------------------------------------
/FaceToolKit.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from facenet import face
 4 | 
 5 | class Verification:
 6 |     
 7 |     def __init__(self):
 8 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
 9 |         self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
10 |         self.images_placeholder = ''
11 |         self.embeddings = ''
12 |         self.phase_train_placeholder = ''
13 |         self.embedding_size = ''
14 |         self.session_closed = False
15 | 
16 |     def __del__(self):
17 |         if not self.session_closed:
18 |             self.session.close()
19 | 
20 |     def kill_session(self):
21 |         self.session_closed = True
22 |         self.session.close()
23 | 
24 |     def load_model(self, model):
25 |         
26 |         face.load_model(model, self.session)
27 | 
28 |     def initial_input_output_tensors(self):
29 |         
30 |         self.images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
31 |         self.embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
32 |         self.phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
33 |         self.embedding_size = self.embeddings.get_shape()[1]
34 | 
35 | 
36 |     def img_to_encoding(self, img, image_size):
37 |         
38 |         image = face.make_image_tensor(img, image_size)
39 |         
40 |         feed_dict = {self.images_placeholder: image, self.phase_train_placeholder:False }
41 |         emb_array = np.zeros((1, self.embedding_size))
42 |         emb_array[0, :] = self.session.run(self.embeddings, feed_dict=feed_dict)
43 | 
44 |         return np.squeeze(emb_array)
45 | 
46 | 


--------------------------------------------------------------------------------
/IC_checkpoints.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/IC_checkpoints.keras


--------------------------------------------------------------------------------
/IC_logs/events.out.tfevents.1552908242.ghost-pc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/IC_logs/events.out.tfevents.1552908242.ghost-pc


--------------------------------------------------------------------------------
/IC_logs/events.out.tfevents.1554734574.ghost-pc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/IC_logs/events.out.tfevents.1554734574.ghost-pc


--------------------------------------------------------------------------------
/MAIN_RUN.py:
--------------------------------------------------------------------------------
 1 | import cv2 as cv
 2 | import warnings
 3 | warnings.filterwarnings("ignore")
 4 | import p_part
 5 | import f_part
 6 | from caption_tune import modcap, face_found_cap, face_not_found_cap
 7 | from gensoundgtts import generate_sound
 8 | import tkinter as tk
 9 | 
10 | 
11 | def saveface():
12 |     x1 = entry1.get()
13 |     print(x1 + ' face saved')
14 |     root.destroy()
15 |     cv.imwrite(r"images//" +
16 |         str(x1) + ".jpg", frame)
17 |     data = {x1: f_part.img_to_encoding(
18 |         "images//" + str(x1) + ".jpg").tolist()}
19 |     f_part.digi_db.insert_one(data)
20 | 
21 | 
22 | def ignoreface():
23 |     print("Not saved")
24 |     root.destroy()
25 | 
26 | 
27 | cap = cv.VideoCapture('Sample Videos/test.mp4')
28 | 
29 | while True:
30 |     ret, frame = cap.read()
31 | 
32 |     if ret:
33 |         font = cv.FONT_HERSHEY_SIMPLEX
34 |         cv.imshow("Video", frame)
35 | 
36 |         if cv.waitKey(5) == ord('p'):
37 |             #             print(K.image_data_format())
38 |             cv.imwrite('./test.jpg', frame)
39 |             final_caption = p_part.generate_caption('./test.jpg')  # create caption
40 |             final_caption = modcap(final_caption)  # remove tags
41 |             print(final_caption)
42 |             generate_sound(final_caption)  # convert to audio
43 | 
44 |         if cv.waitKey(5) == ord('f'):
45 |             cv.imwrite('./test.jpg', frame)
46 |             try:
47 |                 dis, name = f_part.who_is_it('./test.jpg')
48 |                 print(str(dis)+","+name)
49 |                 temp = face_found_cap(name)
50 |                 generate_sound(temp)
51 |                 if(name == 'unknown'):
52 |                     temp = face_not_found_cap()
53 |                     generate_sound(temp)
54 | 
55 |                     root = tk.Tk()
56 | 
57 |                     large_font = ('Times New Roman', 14)
58 | 
59 |                     canvas1 = tk.Canvas(root, width=300, height=200)
60 |                     canvas1.pack()
61 |                     label = tk.Label(root, text='Enter the Name')
62 |                     canvas1.create_window(140, 50, window=label)
63 |                     entry1Var = tk.StringVar(value='')
64 |                     entry1 = tk.Entry(root, textvariable=entry1Var, font=large_font)
65 |                     canvas1.create_window(150, 90, window=entry1)
66 |                     button1 = tk.Button(text='SAVE', command=saveface)
67 |                     button2 = tk.Button(text='IGNORE', command=ignoreface)
68 |                     canvas1.create_window(100, 150, window=button1)
69 |                     canvas1.create_window(180, 150, window=button2)
70 | 
71 |                     root.mainloop()
72 |             except:
73 |                 print("No recognizable face detected")
74 |                 generate_sound("No recognizable face detected")
75 | 
76 |         if cv.waitKey(1) & 0xFF == 27:  # ASCII for Esc Key
77 |             break
78 |     else:
79 |         break
80 | cap.release()
81 | cv.destroyAllWindows()
82 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DigiVision
 2 | A deep learning based application which is entitled to help the visually impaired people. The application automatically generates the textual description of what's happening in front of the camera and conveys it to person through audio. It is capable of recognising faces and tell the user whether a known person is standing in front of him or not.
 3 | 
 4 | ![logo](images/Capture.JPG)
 5 | 
 6 | 
 7 | # Requirements
 8 | * Tensorflow (>1.9)
 9 | * Keras
10 | * OpenCV
11 | * Python 3.5+
12 | * gTTS
13 | * pygame
14 | * pymongo
15 | 
16 | # Dataset used
17 | MS COCO 2017 for Image Processing and Captioning.
18 | 
19 | Dataset for face Recognition is manually collected.
20 | 
21 | # Features
22 | 
23 | ![logo](images/Capture1.JPG)
24 | ![logo](images/Capture2.JPG)
25 | 
26 | # Setup
27 | - Install all the required frameworks, libraries and dependecies as mentioned in Requirements above.
28 | - Download the COCO dataset if not available, in order to train the model
29 |   - [Train images](http://images.cocodataset.org/zips/train2017.zip)
30 |   - [Test images](http://images.cocodataset.org/zips/test2017.zip)
31 |   - [Annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)
32 |  
33 |   Or run:
34 |  ```
35 |  python download.py
36 |  ```
37 | - Create your own MongoDB Cluster and replace MONGO_URI in line 16 of f_part.py with your own Mongo AccessID.
38 | - Run the project using:
39 |   - MAIN_RUN.py (for gTTS audio and dding names through Canvas/ python gtk)
40 |   - digivision.py (for Single face detection along with new face addition through python gtk)
41 |   - digivision2.py (for Multiface detection along with all Input/Outputs through Audio)
42 |   
43 |  ```
44 |  python <desired_file_name>.py
45 |  ```
46 |  - It will take around 90 minutes to process all images and approx 5 minutes to process Validation images.
47 |  - Takes around 22 minutes for a single epoch during training on batch size of 256 on NVIDIA GTX 960M.
48 |  - Don't need to re-train data on every single run. Once trained, weights gets loaded automatically.
49 |  
50 |  # Demo
51 |  [Click here for demo for MAIN_RUN.py](d2.mp4)
52 | 


--------------------------------------------------------------------------------
/Sample Videos/lol1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/Sample Videos/lol1.mp4


--------------------------------------------------------------------------------
/Sample Videos/lol2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/Sample Videos/lol2.mp4


--------------------------------------------------------------------------------
/Sample Videos/lol3.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/Sample Videos/lol3.avi


--------------------------------------------------------------------------------
/Sample Videos/lol4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/Sample Videos/lol4.mp4


--------------------------------------------------------------------------------
/Sample Videos/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/Sample Videos/test.mp4


--------------------------------------------------------------------------------
/Untitled.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "- Data loaded from cache-file: /mnt/MyDrive/Datasets/image-cap/data/coco/records_train.pkl\n",
 13 |       "- Data loaded from cache-file: /mnt/MyDrive/Datasets/image-cap/data/coco/records_val.pkl\n",
 14 |       "Processing 118287 images in training-set. \n",
 15 |       "- Data loaded from cache-file: /mnt/MyDrive/Datasets/image-cap/data/coco/transfer_values_train.pkl\n",
 16 |       "Processing 5000 images in validation-set. \n",
 17 |       "- Data loaded from cache-file: /mnt/MyDrive/Datasets/image-cap/data/coco/transfer_values_val.pkl\n",
 18 |       "Model directory: ./models/20180204-160909/\n",
 19 |       "Metagraph file: model-20180204-160909.meta\n",
 20 |       "Checkpoint file: model-20180204-160909.ckpt-266000\n",
 21 |       "WARNING:tensorflow:The saved meta_graph is possibly from an older release:\n",
 22 |       "'model_variables' collection should be of type 'byte_list', but instead is of type 'node_list'.\n",
 23 |       "INFO:tensorflow:Restoring parameters from ./models/20180204-160909/model-20180204-160909.ckpt-266000\n"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "import cv2 as cv\n",
 29 |     "import warnings\n",
 30 |     "warnings.filterwarnings(\"ignore\")\n",
 31 |     "import p_part\n",
 32 |     "import f_part\n",
 33 |     "from caption_tune import modcap, face_found_cap, face_not_found_cap\n",
 34 |     "from gensound import generate_sound\n",
 35 |     "import tkinter as tk\n"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "\n",
 45 |     "def saveface():\n",
 46 |     " #   generate_sound(\"Tell me the name\")\n",
 47 |     "    x1 = speech(\"Tell me the name\")\n",
 48 |     "    print(x1 + ' face saved')\n",
 49 |     "    cv.imwrite(r\"images//\" +\n",
 50 |     "               str(x1) + \".jpg\", save)\n",
 51 |     "    data = {x1: f_part.img_to_encoding(\n",
 52 |     "        \"images//\" + str(x1) + \".jpg\").tolist()}\n",
 53 |     "    f_part.digi_db.insert_one(data)\n",
 54 |     "\n",
 55 |     "\n",
 56 |     "def ignoreface():\n",
 57 |     "    generate_sound(\"Not saved\")"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "1.1346577982680615,unknown\n",
 70 |       "Pls say something....\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "%reload_ext autoreload\n",
 76 |     "%autoreload 2\n",
 77 |     "from faceadd import addn,speech\n",
 78 |     "cap = cv.VideoCapture(0)\n",
 79 |     "\n",
 80 |     "while True:\n",
 81 |     "    ret, frame = cap.read()\n",
 82 |     "    facedetect = cv.CascadeClassifier(r'haarcascade_frontalface_default.xml')\n",
 83 |     "    if ret:\n",
 84 |     "        # font = cv.FONT_HERSHEY_SIMPLEX\n",
 85 |     "        cv.imshow(\"Video\", frame)\n",
 86 |     "\n",
 87 |     "        if cv.waitKey(1) == ord('p'):\n",
 88 |     "\n",
 89 |     "            cv.imwrite('./test.jpg', frame)\n",
 90 |     "            final_caption = p_part.generate_caption(\n",
 91 |     "                './test.jpg')  # create caption\n",
 92 |     "            final_caption = modcap(final_caption)  # remove tags\n",
 93 |     "            print(final_caption)\n",
 94 |     "            generate_sound(final_caption)  # convert to audio\n",
 95 |     "\n",
 96 |     "        if cv.waitKey(1) == ord('f'):\n",
 97 |     "            gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)\n",
 98 |     "            faces = facedetect.detectMultiScale(gray, 1.3, 5)\n",
 99 |     "            cv.imwrite('./test.jpg', frame)\n",
100 |     "            known_detected = 0\n",
101 |     "            unknown_detected = 0\n",
102 |     "            known_face_list = []\n",
103 |     "            known_face_dist = []\n",
104 |     "            try:\n",
105 |     "                for x, y, w, h in faces:\n",
106 |     "                    #cv2.imwrite(\"dset//User.\"+str(user)+\".\"+str(sample)+\".jpg\",gray[y:y+h,x:x+w])\n",
107 |     "                    save = frame[y:y+h, x:x+w]\n",
108 |     "                    cv.imwrite('./test.jpg', save)\n",
109 |     "                    dis, name = f_part.who_is_it('./test.jpg')\n",
110 |     "                    print(str(dis)+\",\"+name)\n",
111 |     "                    if name != 'unknown':\n",
112 |     "                        known_face_list.append(name)\n",
113 |     "                        known_face_dist.append(dis)\n",
114 |     "                        known_detected += 1\n",
115 |     "\n",
116 |     "                    else:\n",
117 |     "                        unknown_detected += 1\n",
118 |     "\n",
119 |     "                if known_detected > 0:\n",
120 |     "                    print(\"known: \" + str(known_detected))\n",
121 |     "                    for i in range(known_detected):\n",
122 |     "                        print('i=' + str(i))\n",
123 |     "                        print(\n",
124 |     "                            known_face_list[i] + \" at dist of: \" + str(known_face_dist[i]))\n",
125 |     "                        temp = face_found_cap(str(known_face_list[i]))\n",
126 |     "                        generate_sound(temp)\n",
127 |     "                elif unknown_detected == 1:\n",
128 |     "                    temp = face_not_found_cap()\n",
129 |     "                    generate_sound(temp)\n",
130 |     "                    generate_sound(\"Do you want to add this face in your database\")\n",
131 |     "                    addn(save)\n",
132 |     "\n",
133 |     "                elif known_detected == 0 and unknown_detected == 0:\n",
134 |     "                    print(\"No person found\")\n",
135 |     "                    generate_sound(\"No person found!\")\n",
136 |     "\n",
137 |     "                else:\n",
138 |     "                    print(\"Too many people\")\n",
139 |     "                    generate_sound(\"Too many people.\")\n",
140 |     "            except Exception as e:\n",
141 |     "                generate_sound(\"No recognisable face found!\")\n",
142 |     "                print(e)\n",
143 |     "\n",
144 |     "        if cv.waitKey(1) & 0xFF == 27:  # ASCII for Esc Key\n",
145 |     "            break\n",
146 |     "    else:\n",
147 |     "        break\n",
148 |     "cap.release()\n",
149 |     "cv.destroyAllWindows()\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 9,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "cap.release()\n",
159 |     "cv.destroyAllWindows()"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 6,
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "100,unknown\n",
172 |       "Pls say something....\n",
173 |       "Google Audio:yes\n",
174 |       "Pls say something....\n",
175 |       "Google Audio:Mayank\n",
176 |       "Mayank face saved\n",
177 |       "0.0,Mayank\n",
178 |       "known: 1\n",
179 |       "i=0\n",
180 |       "Mayank at dist of: 0.0\n",
181 |       "a person is there in front of you\n"
182 |      ]
183 |     }
184 |    ],
185 |    "source": [
186 |     "'''This part is for testing only\n",
187 |     "I repeat this part is only for testing'''\n",
188 |     "\n",
189 |     "\n",
190 |     "\n",
191 |     "\n",
192 |     "facedetect = cv.CascadeClassifier(r'haarcascade_frontalface_default.xml')\n",
193 |     "frame = cv.imread(r'C:\\Users\\User\\Desktop\\projects\\face-recognition-attendance-system-master\\training-data\\s2\\13.jpg')\n",
194 |     "        # font = cv.FONT_HERSHEY_SIMPLEX\n",
195 |     "while True:\n",
196 |     "    cv.imshow(\"Video\", frame)\n",
197 |     "    if cv.waitKey(0) == ord('p'):\n",
198 |     "\n",
199 |     "        cv.imwrite('./test.jpg', frame)\n",
200 |     "        final_caption = p_part.generate_caption(\n",
201 |     "            './test.jpg')  # create caption\n",
202 |     "        final_caption = modcap(final_caption)  # remove tags\n",
203 |     "        print(final_caption)\n",
204 |     "        generate_sound(final_caption)  # convert to audio\n",
205 |     "\n",
206 |     "    if cv.waitKey(0) == ord('f'):\n",
207 |     "        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)\n",
208 |     "        faces = facedetect.detectMultiScale(gray, 1.3, 5)\n",
209 |     "        cv.imwrite('./test.jpg', frame)\n",
210 |     "        known_detected = 0\n",
211 |     "        unknown_detected = 0\n",
212 |     "        known_face_list = []\n",
213 |     "        known_face_dist = []\n",
214 |     "        try:\n",
215 |     "            for x, y, w, h in faces:\n",
216 |     "                #cv2.imwrite(\"dset//User.\"+str(user)+\".\"+str(sample)+\".jpg\",gray[y:y+h,x:x+w])\n",
217 |     "                save = frame[y:y+h, x:x+w]\n",
218 |     "                cv.imwrite('./test.jpg', save)\n",
219 |     "                dis, name = f_part.who_is_it('./test.jpg')\n",
220 |     "                print(str(dis)+\",\"+name)\n",
221 |     "                if name != 'unknown':\n",
222 |     "                    known_face_list.append(name)\n",
223 |     "                    known_face_dist.append(dis)\n",
224 |     "                    known_detected += 1\n",
225 |     "\n",
226 |     "                else:\n",
227 |     "                    unknown_detected += 1\n",
228 |     "\n",
229 |     "            if known_detected > 0:\n",
230 |     "                print(\"known: \" + str(known_detected))\n",
231 |     "                for i in range(known_detected):\n",
232 |     "                    print('i=' + str(i))\n",
233 |     "                    print(\n",
234 |     "                        known_face_list[i] + \" at dist of: \" + str(known_face_dist[i]))\n",
235 |     "                    temp = face_found_cap(str(known_face_list[i]))\n",
236 |     "                    generate_sound(temp)\n",
237 |     "            elif unknown_detected == 1:\n",
238 |     "                temp = face_not_found_cap()\n",
239 |     "                generate_sound(temp)\n",
240 |     "                generate_sound(\"Do you want to add this face in your database\")\n",
241 |     "                addn(save)\n",
242 |     "\n",
243 |     "            elif known_detected == 0 and unknown_detected == 0:\n",
244 |     "                print(\"No person found\")\n",
245 |     "                generate_sound(\"No person found!\")\n",
246 |     "\n",
247 |     "            else:\n",
248 |     "                print(\"Too many people\")\n",
249 |     "                generate_sound(\"Too many people.\")\n",
250 |     "        except Exception as e:\n",
251 |     "            generate_sound(\"No recognisable face found!\")\n",
252 |     "            print(e)\n",
253 |     "\n",
254 |     "    if cv.waitKey(0) & 0xFF == 27:\n",
255 |     "        break# ASCII for Esc Key\n",
256 |     "cv.destroyAllWindows()\n"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 10,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "# addn()"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 15,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "%reload_ext autoreload\n",
275 |     "%autoreload 2\n",
276 |     "from faceadd import addn,speech"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {},
283 |    "outputs": [],
284 |    "source": [
285 |     "   root = tk.Tk()\n",
286 |     "                \n",
287 |     "                large_font = ('Times New Roman', 14)\n",
288 |     "\n",
289 |     "                canvas1 = tk.Canvas(root, width=300, height=200)\n",
290 |     "                canvas1.pack()\n",
291 |     "                label = tk.Label(root, text='Enter the Name')\n",
292 |     "                canvas1.create_window(140, 50, window=label)\n",
293 |     "                entry1Var = tk.StringVar(value='')\n",
294 |     "                entry1 = tk.Entry(\n",
295 |     "                    root, textvariable=entry1Var, font=large_font)\n",
296 |     "                canvas1.create_window(150, 90, window=entry1)\n",
297 |     "                button1 = tk.Button(text='SAVE', command=saveface)\n",
298 |     "                button2 = tk.Button(text='IGNORE', command=ignoreface)\n",
299 |     "                canvas1.create_window(100, 150, window=button1)\n",
300 |     "                canvas1.create_window(180, 150, window=button2)\n",
301 |     "\n",
302 |     "                root.mainloop()"
303 |    ]
304 |   }
305 |  ],
306 |  "metadata": {
307 |   "kernelspec": {
308 |    "display_name": "Python 3",
309 |    "language": "python",
310 |    "name": "python3"
311 |   },
312 |   "language_info": {
313 |    "codemirror_mode": {
314 |     "name": "ipython",
315 |     "version": 3
316 |    },
317 |    "file_extension": ".py",
318 |    "mimetype": "text/x-python",
319 |    "name": "python",
320 |    "nbconvert_exporter": "python",
321 |    "pygments_lexer": "ipython3",
322 |    "version": "3.6.6"
323 |   }
324 |  },
325 |  "nbformat": 4,
326 |  "nbformat_minor": 2
327 | }
328 | 


--------------------------------------------------------------------------------
/__pycache__/DetectionToolKit.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/DetectionToolKit.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/DetectionToolKit.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/DetectionToolKit.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/FaceToolKit.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/FaceToolKit.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/FaceToolKit.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/FaceToolKit.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/cache.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/cache.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/cache.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/cache.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/caption_tune.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/caption_tune.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/caption_tune.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/caption_tune.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/coco.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/coco.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/coco.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/download.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/download.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/download.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/download.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/f_part.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/f_part.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/faceadd.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/faceadd.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/fr_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/fr_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/gensound.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/gensound.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/gensound.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/gensound.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/inception_blocks_v2.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/inception_blocks_v2.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/p_part.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/__pycache__/p_part.cpython-36.pyc


--------------------------------------------------------------------------------
/cache.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import numpy as np
  4 | 
  5 | ########################################################################
  6 | 
  7 | 
  8 | def cache(cache_path, fn, *args, **kwargs):
  9 |     """
 10 |     Cache-wrapper for a function or class. If the cache-file exists
 11 |     then the data is reloaded and returned, otherwise the function
 12 |     is called and the result is saved to cache. The fn-argument can
 13 |     also be a class instead, in which case an object-instance is
 14 |     created and saved to the cache-file.
 15 |     :param cache_path:
 16 |         File-path for the cache-file.
 17 |     :param fn:
 18 |         Function or class to be called.
 19 |     :param args:
 20 |         Arguments to the function or class-init.
 21 |     :param kwargs:
 22 |         Keyword arguments to the function or class-init.
 23 |     :return:
 24 |         The result of calling the function or creating the object-instance.
 25 |     """
 26 | 
 27 |     # If the cache-file exists.
 28 |     if os.path.exists(cache_path):
 29 |         # Load the cached data from the file.
 30 |         with open(cache_path, mode='rb') as file:
 31 |             obj = pickle.load(file)
 32 | 
 33 |         print("- Data loaded from cache-file: " + cache_path)
 34 |     else:
 35 |         # The cache-file does not exist.
 36 | 
 37 |         # Call the function / class-init with the supplied arguments.
 38 |         obj = fn(*args, **kwargs)
 39 | 
 40 |         # Save the data to a cache-file.
 41 |         with open(cache_path, mode='wb') as file:
 42 |             pickle.dump(obj, file)
 43 | 
 44 |         print("- Data saved to cache-file: " + cache_path)
 45 | 
 46 |     return obj
 47 | 
 48 | 
 49 | ########################################################################
 50 | 
 51 | 
 52 | def convert_numpy2pickle(in_path, out_path):
 53 |     """
 54 |     Convert a numpy-file to pickle-file.
 55 |     The first version of the cache-function used numpy for saving the data.
 56 |     Instead of re-calculating all the data, you can just convert the
 57 |     cache-file using this function.
 58 |     :param in_path:
 59 |         Input file in numpy-format written using numpy.save().
 60 |     :param out_path:
 61 |         Output file written as a pickle-file.
 62 |     :return:
 63 |         Nothing.
 64 |     """
 65 | 
 66 |     # Load the data using numpy.
 67 |     data = np.load(in_path)
 68 | 
 69 |     # Save the data using pickle.
 70 |     with open(out_path, mode='wb') as file:
 71 |         pickle.dump(data, file)
 72 | 
 73 | 
 74 | ########################################################################
 75 | 
 76 | if __name__ == '__main__':
 77 |     # This is a short example of using a cache-file.
 78 | 
 79 |     # This is the function that will only get called if the result
 80 |     # is not already saved in the cache-file. This would normally
 81 |     # be a function that takes a long time to compute, or if you
 82 |     # need persistent data for some other reason.
 83 |     def expensive_function(a, b):
 84 |         return a * b
 85 | 
 86 |     print('Computing expensive_function() ...')
 87 | 
 88 |     # Either load the result from a cache-file if it already exists,
 89 |     # otherwise calculate expensive_function(a=123, b=456) and
 90 |     # save the result to the cache-file for next time.
 91 |     result = cache(cache_path='cache_expensive_function.pkl',
 92 |                    fn=expensive_function, a=123, b=456)
 93 | 
 94 |     print('result =', result)
 95 | 
 96 |     # Newline.
 97 |     print()
 98 | 
 99 |     # This is another example which saves an object to a cache-file.
100 | 
101 |     # We want to cache an object-instance of this class.
102 |     # The motivation is to do an expensive computation only once,
103 |     # or if we need to persist the data for some other reason.
104 |     class ExpensiveClass:
105 |         def __init__(self, c, d):
106 |             self.c = c
107 |             self.d = d
108 |             self.result = c * d
109 | 
110 |         def print_result(self):
111 |             print('c =', self.c)
112 |             print('d =', self.d)
113 |             print('result = c * d =', self.result)
114 | 
115 |     print('Creating object from ExpensiveClass() ...')
116 | 
117 |     # Either load the object from a cache-file if it already exists,
118 |     # otherwise make an object-instance ExpensiveClass(c=123, d=456)
119 |     # and save the object to the cache-file for the next time.
120 |     obj = cache(cache_path='cache_ExpensiveClass.pkl',
121 |                 fn=ExpensiveClass, c=123, d=456)
122 | 
123 |     obj.print_result()
124 | 
125 | ########################################################################


--------------------------------------------------------------------------------
/caption_tune.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | def face_found_cap(text):
 4 |     cap_list = [
 5 |         'Hey, it is ' + text + '. Say Hello!',
 6 |         'I see ' + text + '. Go and say Hello!',
 7 |         'I see a familiar face. It seems to be ' + text
 8 |     ]
 9 | 
10 |     return random.choice(cap_list)
11 | 
12 | def face_not_found_cap():
13 |     cap_list = [
14 |         'I bet I have never seen this person in my life before',
15 |         'No, I do not know who this person is.',
16 |         'Unknown person alert.'
17 |     ]
18 | 
19 |     return random.choice(cap_list)
20 | 
21 | def modcap(text):
22 |     text = text[:-4]
23 |     text = text.strip()
24 |     if text == "a man in a suit and tie holding a glass of wine":
25 |         return "a person standing just in front."
26 |     else:
27 |         return text
28 | 


--------------------------------------------------------------------------------
/coco.py:
--------------------------------------------------------------------------------
  1 | ########################################################################
  2 | #
  3 | # Functions for downloading the COCO data-set from the internet
  4 | # and loading it into memory. This data-set contains images and
  5 | # various associated data such as text-captions describing the images.
  6 | #
  7 | # http://cocodataset.org
  8 | #
  9 | # Implemented in Python 3.6
 10 | #
 11 | # Usage:
 12 | # 1) Call set_data_dir() to set the desired storage directory.
 13 | # 2) Call maybe_download_and_extract() to download the data-set
 14 | #    if it is not already located in the given data_dir.
 15 | # 3) Call load_records(train=True) and load_records(train=False)
 16 | #    to load the data-records for the training- and validation sets.
 17 | # 5) Use the returned data in your own program.
 18 | #
 19 | # Format:
 20 | # The COCO data-set contains a large number of images and various
 21 | # data for each image stored in a JSON-file.
 22 | # Functionality is provided for getting a list of image-filenames
 23 | # (but not actually loading the images) along with their associated
 24 | # data such as text-captions describing the contents of the images.
 25 | #
 26 | ########################################################################
 27 | #
 28 | # This file is part of the TensorFlow Tutorials available at:
 29 | #
 30 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials
 31 | #
 32 | # Published under the MIT License. See the file LICENSE for details.
 33 | #
 34 | # Copyright 2018 by Magnus Erik Hvass Pedersen
 35 | #
 36 | ########################################################################
 37 | 
 38 | import json
 39 | import os
 40 | import download
 41 | from cache import cache
 42 | 
 43 | ########################################################################
 44 | 
 45 | # Directory where you want to download and save the data-set.
 46 | # Set this before you start calling any of the functions below.
 47 | # Use the function set_data_dir() to also update train_dir and val_dir.
 48 | data_dir = "data/coco/"
 49 | 
 50 | # Sub-directories for the training- and validation-sets.
 51 | train_dir = "data/coco/train2017"
 52 | val_dir = "data/coco/val2017"
 53 | 
 54 | # Base-URL for the data-sets on the internet.
 55 | data_url = "http://images.cocodataset.org/"
 56 | 
 57 | 
 58 | ########################################################################
 59 | # Private helper-functions.
 60 | 
 61 | def _load_records(train=True):
 62 |     """
 63 |     Load the image-filenames and captions
 64 |     for either the training-set or the validation-set.
 65 |     """
 66 | 
 67 |     if train:
 68 |         # Training-set.
 69 |         filename = "captions_train2017.json"
 70 |     else:
 71 |         # Validation-set.
 72 |         filename = "captions_val2017.json"
 73 | 
 74 |     # Full path for the data-file.
 75 |     path = os.path.join(data_dir, "annotations", filename)
 76 | 
 77 |     # Load the file.
 78 |     with open(path, "r", encoding="utf-8") as file:
 79 |         data_raw = json.load(file)
 80 | 
 81 |     # Convenience variables.
 82 |     images = data_raw['images']
 83 |     annotations = data_raw['annotations']
 84 | 
 85 |     # Initialize the dict for holding our data.
 86 |     # The lookup-key is the image-id.
 87 |     records = dict()
 88 | 
 89 |     # Collect all the filenames for the images.
 90 |     for image in images:
 91 |         # Get the id and filename for this image.
 92 |         image_id = image['id']
 93 |         filename = image['file_name']
 94 | 
 95 |         # Initialize a new data-record.
 96 |         record = dict()
 97 | 
 98 |         # Set the image-filename in the data-record.
 99 |         record['filename'] = filename
100 | 
101 |         # Initialize an empty list of image-captions
102 |         # which will be filled further below.
103 |         record['captions'] = list()
104 | 
105 |         # Save the record using the the image-id as the lookup-key.
106 |         records[image_id] = record
107 | 
108 |     # Collect all the captions for the images.
109 |     for ann in annotations:
110 |         # Get the id and caption for an image.
111 |         image_id = ann['image_id']
112 |         caption = ann['caption']
113 | 
114 |         # Lookup the data-record for this image-id.
115 |         # This data-record should already exist from the loop above.
116 |         record = records[image_id]
117 | 
118 |         # Append the current caption to the list of captions in the
119 |         # data-record that was initialized in the loop above.
120 |         record['captions'].append(caption)
121 | 
122 |     # Convert the records-dict to a list of tuples.
123 |     records_list = [(key, record['filename'], record['captions'])
124 |                     for key, record in sorted(records.items())]
125 | 
126 |     # Convert the list of tuples to separate tuples with the data.
127 |     ids, filenames, captions = zip(*records_list)
128 | 
129 |     return ids, filenames, captions
130 | 
131 | 
132 | ########################################################################
133 | # Public functions that you may call to download the data-set from
134 | # the internet and load the data into memory.
135 | 
136 | 
137 | def set_data_dir(new_data_dir):
138 |     """
139 |     Set the base-directory for data-files and then
140 |     set the sub-dirs for training and validation data.
141 |     """
142 | 
143 |     # Ensure we update the global variables.
144 |     global data_dir, train_dir, val_dir
145 | 
146 |     data_dir = new_data_dir
147 |     train_dir = os.path.join(new_data_dir, "train2017")
148 |     val_dir = os.path.join(new_data_dir, "val2017")
149 | 
150 | 
151 | def maybe_download_and_extract():
152 |     """
153 |     Download and extract the COCO data-set if the data-files don't
154 |     already exist in data_dir.
155 |     """
156 | 
157 |     # Filenames to download from the internet.
158 |     filenames = ["zips/train2017.zip", "zips/val2017.zip",
159 |                  "annotations/annotations_trainval2017.zip"]
160 | 
161 |     # Download these files.
162 |     for filename in filenames:
163 |         # Create the full URL for the given file.
164 |         url = data_url + filename
165 | 
166 |         print("Downloading " + url)
167 | 
168 |         download.maybe_download_and_extract(url=url, download_dir=data_dir)
169 | 
170 | 
171 | def load_records(train=True):
172 |     """
173 |     Load the data-records for the data-set. This returns the image ids,
174 |     filenames and text-captions for either the training-set or validation-set.
175 |     
176 |     This wraps _load_records() above with a cache, so if the cache-file already
177 |     exists then it is loaded instead of processing the original data-file.
178 |     
179 |     :param train:
180 |         Bool whether to load the training-set (True) or validation-set (False).
181 |     :return: 
182 |         ids, filenames, captions for the images in the data-set.
183 |     """
184 | 
185 |     if train:
186 |         # Cache-file for the training-set data.
187 |         cache_filename = "records_train.pkl"
188 |     else:
189 |         # Cache-file for the validation-set data.
190 |         cache_filename = "records_val.pkl"
191 | 
192 |     # Path for the cache-file.
193 |     cache_path = os.path.join(data_dir, cache_filename)
194 | 
195 |     # If the data-records already exist in a cache-file then load it,
196 |     # otherwise call the _load_records() function and save its
197 |     # return-values to the cache-file so it can be loaded the next time.
198 |     records = cache(cache_path=cache_path,
199 |                     fn=_load_records,
200 |                     train=train)
201 | 
202 |     return records
203 | 
204 | ########################################################################
205 | 


--------------------------------------------------------------------------------
/d2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/d2.mp4


--------------------------------------------------------------------------------
/detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/__init__.py


--------------------------------------------------------------------------------
/detection/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/detection/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/detection/mtcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/__init__.py


--------------------------------------------------------------------------------
/detection/mtcnn/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/detection/mtcnn/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/detection/mtcnn/__pycache__/detect_face.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/__pycache__/detect_face.cpython-35.pyc


--------------------------------------------------------------------------------
/detection/mtcnn/__pycache__/detect_face.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/__pycache__/detect_face.cpython-36.pyc


--------------------------------------------------------------------------------
/detection/mtcnn/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/det1.npy


--------------------------------------------------------------------------------
/detection/mtcnn/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/det2.npy


--------------------------------------------------------------------------------
/detection/mtcnn/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/detection/mtcnn/det3.npy


--------------------------------------------------------------------------------
/detection/mtcnn/detect_face.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from six import string_types, iteritems
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | #from math import floor
  9 | import cv2
 10 | import os
 11 | 
 12 | def layer(op):
 13 |     """Decorator for composable network layers."""
 14 | 
 15 |     def layer_decorated(self, *args, **kwargs):
 16 |         # Automatically set a name if not provided.
 17 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 18 |         # Figure out the layer inputs.
 19 |         if len(self.terminals) == 0:
 20 |             raise RuntimeError('No input variables found for layer %s.' % name)
 21 |         elif len(self.terminals) == 1:
 22 |             layer_input = self.terminals[0]
 23 |         else:
 24 |             layer_input = list(self.terminals)
 25 |         # Perform the operation and get the output.
 26 |         layer_output = op(self, layer_input, *args, **kwargs)
 27 |         # Add to layer LUT.
 28 |         self.layers[name] = layer_output
 29 |         # This output is now the input for the next layer.
 30 |         self.feed(layer_output)
 31 |         # Return self for chained calls.
 32 |         return self
 33 | 
 34 |     return layer_decorated
 35 | 
 36 | class Network(object):
 37 | 
 38 |     def __init__(self, inputs, trainable=True):
 39 |         # The input nodes for this network
 40 |         self.inputs = inputs
 41 |         # The current list of terminal nodes
 42 |         self.terminals = []
 43 |         # Mapping from layer names to layers
 44 |         self.layers = dict(inputs)
 45 |         # If true, the resulting variables are set as trainable
 46 |         self.trainable = trainable
 47 | 
 48 |         self.setup()
 49 | 
 50 |     def setup(self):
 51 |         """Construct the network. """
 52 |         raise NotImplementedError('Must be implemented by the subclass.')
 53 | 
 54 |     def load(self, data_path, session, ignore_missing=False):
 55 |         """Load network weights.
 56 |         data_path: The path to the numpy-serialized network weights
 57 |         session: The current TensorFlow session
 58 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 59 |         """
 60 |         data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
 61 | 
 62 |         for op_name in data_dict:
 63 |             with tf.variable_scope(op_name, reuse=True):
 64 |                 for param_name, data in iteritems(data_dict[op_name]):
 65 |                     try:
 66 |                         var = tf.get_variable(param_name)
 67 |                         session.run(var.assign(data))
 68 |                     except ValueError:
 69 |                         if not ignore_missing:
 70 |                             raise
 71 | 
 72 |     def feed(self, *args):
 73 |         """Set the input(s) for the next operation by replacing the terminal nodes.
 74 |         The arguments can be either layer names or the actual layers.
 75 |         """
 76 |         assert len(args) != 0
 77 |         self.terminals = []
 78 |         for fed_layer in args:
 79 |             if isinstance(fed_layer, string_types):
 80 |                 try:
 81 |                     fed_layer = self.layers[fed_layer]
 82 |                 except KeyError:
 83 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
 84 |             self.terminals.append(fed_layer)
 85 |         return self
 86 | 
 87 |     def get_output(self):
 88 |         """Returns the current network output."""
 89 |         return self.terminals[-1]
 90 | 
 91 |     def get_unique_name(self, prefix):
 92 |         """Returns an index-suffixed unique name for the given prefix.
 93 |         This is used for auto-generating layer names based on the type-prefix.
 94 |         """
 95 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
 96 |         return '%s_%d' % (prefix, ident)
 97 | 
 98 |     def make_var(self, name, shape):
 99 |         """Creates a new TensorFlow variable."""
100 |         return tf.get_variable(name, shape, trainable=self.trainable)
101 | 
102 |     def validate_padding(self, padding):
103 |         """Verifies that the padding is one of the supported ones."""
104 |         assert padding in ('SAME', 'VALID')
105 | 
106 |     @layer
107 |     def conv(self,
108 |              inp,
109 |              k_h,
110 |              k_w,
111 |              c_o,
112 |              s_h,
113 |              s_w,
114 |              name,
115 |              relu=True,
116 |              padding='SAME',
117 |              group=1,
118 |              biased=True):
119 |         # Verify that the padding is acceptable
120 |         self.validate_padding(padding)
121 |         # Get the number of channels in the input
122 |         c_i = int(inp.get_shape()[-1])
123 |         # Verify that the grouping parameter is valid
124 |         assert c_i % group == 0
125 |         assert c_o % group == 0
126 |         # Convolution for a given input and kernel
127 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
128 |         with tf.variable_scope(name) as scope:
129 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
130 |             # This is the common-case. Convolve the input without any further complications.
131 |             output = convolve(inp, kernel)
132 |             # Add the biases
133 |             if biased:
134 |                 biases = self.make_var('biases', [c_o])
135 |                 output = tf.nn.bias_add(output, biases)
136 |             if relu:
137 |                 # ReLU non-linearity
138 |                 output = tf.nn.relu(output, name=scope.name)
139 |             return output
140 | 
141 |     @layer
142 |     def prelu(self, inp, name):
143 |         with tf.variable_scope(name):
144 |             i = int(inp.get_shape()[-1])
145 |             alpha = self.make_var('alpha', shape=(i,))
146 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
147 |         return output
148 | 
149 |     @layer
150 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
151 |         self.validate_padding(padding)
152 |         return tf.nn.max_pool(inp,
153 |                               ksize=[1, k_h, k_w, 1],
154 |                               strides=[1, s_h, s_w, 1],
155 |                               padding=padding,
156 |                               name=name)
157 | 
158 |     @layer
159 |     def fc(self, inp, num_out, name, relu=True):
160 |         with tf.variable_scope(name):
161 |             input_shape = inp.get_shape()
162 |             if input_shape.ndims == 4:
163 |                 # The input is spatial. Vectorize it first.
164 |                 dim = 1
165 |                 for d in input_shape[1:].as_list():
166 |                     dim *= int(d)
167 |                 feed_in = tf.reshape(inp, [-1, dim])
168 |             else:
169 |                 feed_in, dim = (inp, input_shape[-1].value)
170 |             weights = self.make_var('weights', shape=[dim, num_out])
171 |             biases = self.make_var('biases', [num_out])
172 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
173 |             fc = op(feed_in, weights, biases, name=name)
174 |             return fc
175 | 
176 | 
177 |     """
178 |     Multi dimensional softmax,
179 |     refer to https://github.com/tensorflow/tensorflow/issues/210
180 |     compute softmax along the dimension of target
181 |     the native softmax only supports batch_size x dimension
182 |     """
183 |     @layer
184 |     def softmax(self, target, axis, name=None):
185 |         max_axis = tf.reduce_max(target, axis, keepdims=True)
186 |         target_exp = tf.exp(target-max_axis)
187 |         normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
188 |         softmax = tf.div(target_exp, normalize, name)
189 |         return softmax
190 |     
191 | class PNet(Network):
192 |     def setup(self):
193 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
194 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
195 |              .prelu(name='PReLU1')
196 |              .max_pool(2, 2, 2, 2, name='pool1')
197 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
198 |              .prelu(name='PReLU2')
199 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
200 |              .prelu(name='PReLU3')
201 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
202 |              .softmax(3,name='prob1'))
203 | 
204 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
205 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
206 |         
207 | class RNet(Network):
208 |     def setup(self):
209 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
210 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
211 |              .prelu(name='prelu1')
212 |              .max_pool(3, 3, 2, 2, name='pool1')
213 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
214 |              .prelu(name='prelu2')
215 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
216 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
217 |              .prelu(name='prelu3')
218 |              .fc(128, relu=False, name='conv4')
219 |              .prelu(name='prelu4')
220 |              .fc(2, relu=False, name='conv5-1')
221 |              .softmax(1,name='prob1'))
222 | 
223 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
224 |              .fc(4, relu=False, name='conv5-2'))
225 | 
226 | class ONet(Network):
227 |     def setup(self):
228 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
229 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
230 |              .prelu(name='prelu1')
231 |              .max_pool(3, 3, 2, 2, name='pool1')
232 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
233 |              .prelu(name='prelu2')
234 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
235 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
236 |              .prelu(name='prelu3')
237 |              .max_pool(2, 2, 2, 2, name='pool3')
238 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
239 |              .prelu(name='prelu4')
240 |              .fc(256, relu=False, name='conv5')
241 |              .prelu(name='prelu5')
242 |              .fc(2, relu=False, name='conv6-1')
243 |              .softmax(1, name='prob1'))
244 | 
245 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
246 |              .fc(4, relu=False, name='conv6-2'))
247 | 
248 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
249 |              .fc(10, relu=False, name='conv6-3'))
250 | 
251 | def create_mtcnn(sess, model_path):
252 |     if not model_path:
253 |         model_path,_ = os.path.split(os.path.realpath(__file__))
254 | 
255 |     with tf.variable_scope('pnet'):
256 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
257 |         pnet = PNet({'data':data})
258 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
259 |     with tf.variable_scope('rnet'):
260 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
261 |         rnet = RNet({'data':data})
262 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
263 |     with tf.variable_scope('onet'):
264 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
265 |         onet = ONet({'data':data})
266 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
267 |         
268 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
269 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
270 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
271 |     return pnet_fun, rnet_fun, onet_fun
272 | 
273 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
274 |     """Detects faces in an image, and returns bounding boxes and points for them.
275 |     img: input image
276 |     minsize: minimum faces' size
277 |     pnet, rnet, onet: caffemodel
278 |     threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
279 |     factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
280 |     """
281 |     factor_count=0
282 |     total_boxes=np.empty((0,9))
283 |     points=np.empty(0)
284 |     h=img.shape[0]
285 |     w=img.shape[1]
286 |     minl=np.amin([h, w])
287 |     m=12.0/minsize
288 |     minl=minl*m
289 |     # create scale pyramid
290 |     scales=[]
291 |     while minl>=12:
292 |         scales += [m*np.power(factor, factor_count)]
293 |         minl = minl*factor
294 |         factor_count += 1
295 | 
296 |     # first stage
297 |     for scale in scales:
298 |         hs=int(np.ceil(h*scale))
299 |         ws=int(np.ceil(w*scale))
300 |         im_data = imresample(img, (hs, ws))
301 |         im_data = (im_data-127.5)*0.0078125
302 |         img_x = np.expand_dims(im_data, 0)
303 |         img_y = np.transpose(img_x, (0,2,1,3))
304 |         out = pnet(img_y)
305 |         out0 = np.transpose(out[0], (0,2,1,3))
306 |         out1 = np.transpose(out[1], (0,2,1,3))
307 |         
308 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
309 |         
310 |         # inter-scale nms
311 |         pick = nms(boxes.copy(), 0.5, 'Union')
312 |         if boxes.size>0 and pick.size>0:
313 |             boxes = boxes[pick,:]
314 |             total_boxes = np.append(total_boxes, boxes, axis=0)
315 | 
316 |     numbox = total_boxes.shape[0]
317 |     if numbox>0:
318 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
319 |         total_boxes = total_boxes[pick,:]
320 |         regw = total_boxes[:,2]-total_boxes[:,0]
321 |         regh = total_boxes[:,3]-total_boxes[:,1]
322 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
323 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
324 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
325 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
326 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
327 |         total_boxes = rerec(total_boxes.copy())
328 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
329 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
330 | 
331 |     numbox = total_boxes.shape[0]
332 |     if numbox>0:
333 |         # second stage
334 |         tempimg = np.zeros((24,24,3,numbox))
335 |         for k in range(0,numbox):
336 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
337 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
338 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
339 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
340 |             else:
341 |                 return np.empty()
342 |         tempimg = (tempimg-127.5)*0.0078125
343 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
344 |         out = rnet(tempimg1)
345 |         out0 = np.transpose(out[0])
346 |         out1 = np.transpose(out[1])
347 |         score = out1[1,:]
348 |         ipass = np.where(score>threshold[1])
349 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
350 |         mv = out0[:,ipass[0]]
351 |         if total_boxes.shape[0]>0:
352 |             pick = nms(total_boxes, 0.7, 'Union')
353 |             total_boxes = total_boxes[pick,:]
354 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
355 |             total_boxes = rerec(total_boxes.copy())
356 | 
357 |     numbox = total_boxes.shape[0]
358 |     if numbox>0:
359 |         # third stage
360 |         total_boxes = np.fix(total_boxes).astype(np.int32)
361 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
362 |         tempimg = np.zeros((48,48,3,numbox))
363 |         for k in range(0,numbox):
364 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
365 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
366 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
367 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
368 |             else:
369 |                 return np.empty()
370 |         tempimg = (tempimg-127.5)*0.0078125
371 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
372 |         out = onet(tempimg1)
373 |         out0 = np.transpose(out[0])
374 |         out1 = np.transpose(out[1])
375 |         out2 = np.transpose(out[2])
376 |         score = out2[1,:]
377 |         points = out1
378 |         ipass = np.where(score>threshold[2])
379 |         points = points[:,ipass[0]]
380 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
381 |         mv = out0[:,ipass[0]]
382 | 
383 |         w = total_boxes[:,2]-total_boxes[:,0]+1
384 |         h = total_boxes[:,3]-total_boxes[:,1]+1
385 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
386 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
387 |         if total_boxes.shape[0]>0:
388 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
389 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
390 |             total_boxes = total_boxes[pick,:]
391 |             points = points[:,pick]
392 |                 
393 |     return total_boxes, points
394 | 
395 | 
396 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
397 |     """Detects faces in a list of images
398 |     images: list containing input images
399 |     detection_window_size_ratio: ratio of minimum face size to smallest image dimension
400 |     pnet, rnet, onet: caffemodel
401 |     threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
402 |     factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
403 |     """
404 |     all_scales = [None] * len(images)
405 |     images_with_boxes = [None] * len(images)
406 | 
407 |     for i in range(len(images)):
408 |         images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
409 | 
410 |     # create scale pyramid
411 |     for index, img in enumerate(images):
412 |         all_scales[index] = []
413 |         h = img.shape[0]
414 |         w = img.shape[1]
415 |         minsize = int(detection_window_size_ratio * np.minimum(w, h))
416 |         factor_count = 0
417 |         minl = np.amin([h, w])
418 |         if minsize <= 12:
419 |             minsize = 12
420 | 
421 |         m = 12.0 / minsize
422 |         minl = minl * m
423 |         while minl >= 12:
424 |             all_scales[index].append(m * np.power(factor, factor_count))
425 |             minl = minl * factor
426 |             factor_count += 1
427 | 
428 |     # # # # # # # # # # # # #
429 |     # first stage - fast proposal network (pnet) to obtain face candidates
430 |     # # # # # # # # # # # # #
431 | 
432 |     images_obj_per_resolution = {}
433 | 
434 |     # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
435 | 
436 |     for index, scales in enumerate(all_scales):
437 |         h = images[index].shape[0]
438 |         w = images[index].shape[1]
439 | 
440 |         for scale in scales:
441 |             hs = int(np.ceil(h * scale))
442 |             ws = int(np.ceil(w * scale))
443 | 
444 |             if (ws, hs) not in images_obj_per_resolution:
445 |                 images_obj_per_resolution[(ws, hs)] = []
446 | 
447 |             im_data = imresample(images[index], (hs, ws))
448 |             im_data = (im_data - 127.5) * 0.0078125
449 |             img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
450 |             images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
451 | 
452 |     for resolution in images_obj_per_resolution:
453 |         images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
454 |         outs = pnet(images_per_resolution)
455 | 
456 |         for index in range(len(outs[0])):
457 |             scale = images_obj_per_resolution[resolution][index]['scale']
458 |             image_index = images_obj_per_resolution[resolution][index]['index']
459 |             out0 = np.transpose(outs[0][index], (1, 0, 2))
460 |             out1 = np.transpose(outs[1][index], (1, 0, 2))
461 | 
462 |             boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
463 | 
464 |             # inter-scale nms
465 |             pick = nms(boxes.copy(), 0.5, 'Union')
466 |             if boxes.size > 0 and pick.size > 0:
467 |                 boxes = boxes[pick, :]
468 |                 images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
469 |                                                                           boxes,
470 |                                                                           axis=0)
471 | 
472 |     for index, image_obj in enumerate(images_with_boxes):
473 |         numbox = image_obj['total_boxes'].shape[0]
474 |         if numbox > 0:
475 |             h = images[index].shape[0]
476 |             w = images[index].shape[1]
477 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
478 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
479 |             regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
480 |             regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
481 |             qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
482 |             qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
483 |             qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
484 |             qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
485 |             image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
486 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
487 |             image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
488 |             dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
489 | 
490 |             numbox = image_obj['total_boxes'].shape[0]
491 |             tempimg = np.zeros((24, 24, 3, numbox))
492 | 
493 |             if numbox > 0:
494 |                 for k in range(0, numbox):
495 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
496 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
497 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
498 |                         tempimg[:, :, :, k] = imresample(tmp, (24, 24))
499 |                     else:
500 |                         return np.empty()
501 | 
502 |                 tempimg = (tempimg - 127.5) * 0.0078125
503 |                 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
504 | 
505 |     # # # # # # # # # # # # #
506 |     # second stage - refinement of face candidates with rnet
507 |     # # # # # # # # # # # # #
508 | 
509 |     bulk_rnet_input = np.empty((0, 24, 24, 3))
510 |     for index, image_obj in enumerate(images_with_boxes):
511 |         if 'rnet_input' in image_obj:
512 |             bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
513 | 
514 |     out = rnet(bulk_rnet_input)
515 |     out0 = np.transpose(out[0])
516 |     out1 = np.transpose(out[1])
517 |     score = out1[1, :]
518 | 
519 |     i = 0
520 |     for index, image_obj in enumerate(images_with_boxes):
521 |         if 'rnet_input' not in image_obj:
522 |             continue
523 | 
524 |         rnet_input_count = image_obj['rnet_input'].shape[0]
525 |         score_per_image = score[i:i + rnet_input_count]
526 |         out0_per_image = out0[:, i:i + rnet_input_count]
527 | 
528 |         ipass = np.where(score_per_image > threshold[1])
529 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
530 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
531 | 
532 |         mv = out0_per_image[:, ipass[0]]
533 | 
534 |         if image_obj['total_boxes'].shape[0] > 0:
535 |             h = images[index].shape[0]
536 |             w = images[index].shape[1]
537 |             pick = nms(image_obj['total_boxes'], 0.7, 'Union')
538 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
539 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
540 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
541 | 
542 |             numbox = image_obj['total_boxes'].shape[0]
543 | 
544 |             if numbox > 0:
545 |                 tempimg = np.zeros((48, 48, 3, numbox))
546 |                 image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
547 |                 dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
548 | 
549 |                 for k in range(0, numbox):
550 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
551 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
552 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
553 |                         tempimg[:, :, :, k] = imresample(tmp, (48, 48))
554 |                     else:
555 |                         return np.empty()
556 |                 tempimg = (tempimg - 127.5) * 0.0078125
557 |                 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
558 | 
559 |         i += rnet_input_count
560 | 
561 |     # # # # # # # # # # # # #
562 |     # third stage - further refinement and facial landmarks positions with onet
563 |     # # # # # # # # # # # # #
564 | 
565 |     bulk_onet_input = np.empty((0, 48, 48, 3))
566 |     for index, image_obj in enumerate(images_with_boxes):
567 |         if 'onet_input' in image_obj:
568 |             bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
569 | 
570 |     out = onet(bulk_onet_input)
571 | 
572 |     out0 = np.transpose(out[0])
573 |     out1 = np.transpose(out[1])
574 |     out2 = np.transpose(out[2])
575 |     score = out2[1, :]
576 |     points = out1
577 | 
578 |     i = 0
579 |     ret = []
580 |     for index, image_obj in enumerate(images_with_boxes):
581 |         if 'onet_input' not in image_obj:
582 |             ret.append(None)
583 |             continue
584 | 
585 |         onet_input_count = image_obj['onet_input'].shape[0]
586 | 
587 |         out0_per_image = out0[:, i:i + onet_input_count]
588 |         score_per_image = score[i:i + onet_input_count]
589 |         points_per_image = points[:, i:i + onet_input_count]
590 | 
591 |         ipass = np.where(score_per_image > threshold[2])
592 |         points_per_image = points_per_image[:, ipass[0]]
593 | 
594 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
595 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
596 |         mv = out0_per_image[:, ipass[0]]
597 | 
598 |         w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
599 |         h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
600 |         points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
601 |             image_obj['total_boxes'][:, 0], (5, 1)) - 1
602 |         points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
603 |             image_obj['total_boxes'][:, 1], (5, 1)) - 1
604 | 
605 |         if image_obj['total_boxes'].shape[0] > 0:
606 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
607 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
608 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
609 |             points_per_image = points_per_image[:, pick]
610 | 
611 |             ret.append((image_obj['total_boxes'], points_per_image))
612 |         else:
613 |             ret.append(None)
614 | 
615 |         i += onet_input_count
616 | 
617 |     return ret
618 | 
619 | 
620 | # function [boundingbox] = bbreg(boundingbox,reg)
621 | def bbreg(boundingbox,reg):
622 |     """Calibrate bounding boxes"""
623 |     if reg.shape[1]==1:
624 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
625 | 
626 |     w = boundingbox[:,2]-boundingbox[:,0]+1
627 |     h = boundingbox[:,3]-boundingbox[:,1]+1
628 |     b1 = boundingbox[:,0]+reg[:,0]*w
629 |     b2 = boundingbox[:,1]+reg[:,1]*h
630 |     b3 = boundingbox[:,2]+reg[:,2]*w
631 |     b4 = boundingbox[:,3]+reg[:,3]*h
632 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
633 |     return boundingbox
634 |  
635 | def generateBoundingBox(imap, reg, scale, t):
636 |     """Use heatmap to generate bounding boxes"""
637 |     stride=2
638 |     cellsize=12
639 | 
640 |     imap = np.transpose(imap)
641 |     dx1 = np.transpose(reg[:,:,0])
642 |     dy1 = np.transpose(reg[:,:,1])
643 |     dx2 = np.transpose(reg[:,:,2])
644 |     dy2 = np.transpose(reg[:,:,3])
645 |     y, x = np.where(imap >= t)
646 |     if y.shape[0]==1:
647 |         dx1 = np.flipud(dx1)
648 |         dy1 = np.flipud(dy1)
649 |         dx2 = np.flipud(dx2)
650 |         dy2 = np.flipud(dy2)
651 |     score = imap[(y,x)]
652 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
653 |     if reg.size==0:
654 |         reg = np.empty((0,3))
655 |     bb = np.transpose(np.vstack([y,x]))
656 |     q1 = np.fix((stride*bb+1)/scale)
657 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
658 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
659 |     return boundingbox, reg
660 |  
661 | # function pick = nms(boxes,threshold,type)
662 | def nms(boxes, threshold, method):
663 |     if boxes.size==0:
664 |         return np.empty((0,3))
665 |     x1 = boxes[:,0]
666 |     y1 = boxes[:,1]
667 |     x2 = boxes[:,2]
668 |     y2 = boxes[:,3]
669 |     s = boxes[:,4]
670 |     area = (x2-x1+1) * (y2-y1+1)
671 |     I = np.argsort(s)
672 |     pick = np.zeros_like(s, dtype=np.int16)
673 |     counter = 0
674 |     while I.size>0:
675 |         i = I[-1]
676 |         pick[counter] = i
677 |         counter += 1
678 |         idx = I[0:-1]
679 |         xx1 = np.maximum(x1[i], x1[idx])
680 |         yy1 = np.maximum(y1[i], y1[idx])
681 |         xx2 = np.minimum(x2[i], x2[idx])
682 |         yy2 = np.minimum(y2[i], y2[idx])
683 |         w = np.maximum(0.0, xx2-xx1+1)
684 |         h = np.maximum(0.0, yy2-yy1+1)
685 |         inter = w * h
686 |         if method is 'Min':
687 |             o = inter / np.minimum(area[i], area[idx])
688 |         else:
689 |             o = inter / (area[i] + area[idx] - inter)
690 |         I = I[np.where(o<=threshold)]
691 |     pick = pick[0:counter]
692 |     return pick
693 | 
694 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
695 | def pad(total_boxes, w, h):
696 |     """Compute the padding coordinates (pad the bounding boxes to square)"""
697 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
698 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
699 |     numbox = total_boxes.shape[0]
700 | 
701 |     dx = np.ones((numbox), dtype=np.int32)
702 |     dy = np.ones((numbox), dtype=np.int32)
703 |     edx = tmpw.copy().astype(np.int32)
704 |     edy = tmph.copy().astype(np.int32)
705 | 
706 |     x = total_boxes[:,0].copy().astype(np.int32)
707 |     y = total_boxes[:,1].copy().astype(np.int32)
708 |     ex = total_boxes[:,2].copy().astype(np.int32)
709 |     ey = total_boxes[:,3].copy().astype(np.int32)
710 | 
711 |     tmp = np.where(ex>w)
712 |     edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
713 |     ex[tmp] = w
714 |     
715 |     tmp = np.where(ey>h)
716 |     edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
717 |     ey[tmp] = h
718 | 
719 |     tmp = np.where(x<1)
720 |     dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
721 |     x[tmp] = 1
722 | 
723 |     tmp = np.where(y<1)
724 |     dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
725 |     y[tmp] = 1
726 |     
727 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
728 | 
729 | # function [bboxA] = rerec(bboxA)
730 | def rerec(bboxA):
731 |     """Convert bboxA to square."""
732 |     h = bboxA[:,3]-bboxA[:,1]
733 |     w = bboxA[:,2]-bboxA[:,0]
734 |     l = np.maximum(w, h)
735 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
736 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
737 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
738 |     return bboxA
739 | 
740 | def imresample(img, sz):
741 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
742 |     return im_data
743 | 
744 |     # This method is kept for debugging purpose
745 | #     h=img.shape[0]
746 | #     w=img.shape[1]
747 | #     hs, ws = sz
748 | #     dx = float(w) / ws
749 | #     dy = float(h) / hs
750 | #     im_data = np.zeros((hs,ws,3))
751 | #     for a1 in range(0,hs):
752 | #         for a2 in range(0,ws):
753 | #             for a3 in range(0,3):
754 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
755 | #     return im_data
756 | 
757 | 


--------------------------------------------------------------------------------
/digivision.py:
--------------------------------------------------------------------------------
  1 | import cv2 as cv
  2 | import warnings
  3 | warnings.filterwarnings("ignore")
  4 | import p_part
  5 | import f_part
  6 | from caption_tune import modcap, face_found_cap, face_not_found_cap
  7 | from gensoundgtts import generate_sound
  8 | import tkinter as tk
  9 | 
 10 | def saveface():
 11 |     x1 = entry1.get()
 12 |     print(x1 + ' face saved')
 13 |     root.destroy()
 14 |     cv.imwrite(r"images//" +
 15 |                str(x1) + ".jpg", save)
 16 |     data = {x1: f_part.img_to_encoding(
 17 |         "images//" + str(x1) + ".jpg").tolist()}
 18 |     f_part.digi_db.insert_one(data)
 19 | 
 20 | 
 21 | def ignoreface():
 22 |     print("Not saved")
 23 |     root.destroy()
 24 | 
 25 | 
 26 | cap = cv.VideoCapture('Sample Videos/test.mp4')
 27 | 
 28 | while True:
 29 |     ret, frame = cap.read()
 30 |     facedetect = cv.CascadeClassifier(r'haarcascade_frontalface_default.xml')
 31 |     if ret:
 32 |         # font = cv.FONT_HERSHEY_SIMPLEX
 33 |         cv.imshow("Video", frame)
 34 | 
 35 |         if cv.waitKey(5) == ord('p'):
 36 | 
 37 |             cv.imwrite('./test.jpg', frame)
 38 |             final_caption = p_part.generate_caption(
 39 |                 './test.jpg')  # create caption
 40 |             final_caption = modcap(final_caption)  # remove tags
 41 |             print(final_caption)
 42 |             generate_sound(final_caption)  # convert to audio
 43 | 
 44 |         if cv.waitKey(5) == ord('f'):
 45 |             gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
 46 |             faces = facedetect.detectMultiScale(gray, 1.3, 5)
 47 |             cv.imwrite('./test.jpg', frame)
 48 |             known_detected = 0
 49 |             unknown_detected = 0
 50 |             known_face_list = []
 51 |             known_face_dist = []
 52 |             try:
 53 |                 for x, y, w, h in faces:
 54 |                     #cv2.imwrite("dset//User."+str(user)+"."+str(sample)+".jpg",gray[y:y+h,x:x+w])
 55 |                     save = frame[y:y+h, x:x+w]
 56 |                     cv.imwrite('./test.jpg', save)
 57 |                     dis, name = f_part.who_is_it('./test.jpg')
 58 |                     print(str(dis)+","+name)
 59 |                     if name != 'unknown':
 60 |                         known_face_list.append(name)
 61 |                         known_face_dist.append(dis)
 62 |                         known_detected += 1
 63 | 
 64 |                     else:
 65 |                         unknown_detected += 1
 66 | 
 67 |                 if known_detected > 0:
 68 |                     print("known: " + str(known_detected))
 69 |                     for i in range(known_detected):
 70 |                         print('i=' + str(i))
 71 |                         print(
 72 |                             known_face_list[i] + " at dist of: " + str(known_face_dist[i]))
 73 |                         temp = face_found_cap(str(known_face_list[i]))
 74 |                         generate_sound(temp)
 75 |                 elif unknown_detected == 1:
 76 |                     temp = face_not_found_cap()
 77 |                     generate_sound(temp)
 78 | 
 79 |                     root = tk.Tk()
 80 | 
 81 |                     large_font = ('Times New Roman', 14)
 82 | 
 83 |                     canvas1 = tk.Canvas(root, width=300, height=200)
 84 |                     canvas1.pack()
 85 |                     label = tk.Label(root, text='Enter the Name')
 86 |                     canvas1.create_window(140, 50, window=label)
 87 |                     entry1Var = tk.StringVar(value='')
 88 |                     entry1 = tk.Entry(
 89 |                         root, textvariable=entry1Var, font=large_font)
 90 |                     canvas1.create_window(150, 90, window=entry1)
 91 |                     button1 = tk.Button(text='SAVE', command=saveface)
 92 |                     button2 = tk.Button(text='IGNORE', command=ignoreface)
 93 |                     canvas1.create_window(100, 150, window=button1)
 94 |                     canvas1.create_window(180, 150, window=button2)
 95 | 
 96 |                     root.mainloop()
 97 | 
 98 |                 elif known_detected == 0 and unknown_detected == 0:
 99 |                     print("No person found")
100 |                     generate_sound("No person found!")
101 | 
102 |                 else:
103 |                     print("Too many unknown people")
104 |                     generate_sound("Too many unknown people.")
105 |             except:
106 |                 generate_sound("No recognisable face found!")
107 | 
108 |         if cv.waitKey(1) & 0xFF == 27:  # ASCII for Esc Key
109 |             break
110 |     else:
111 |         break
112 | cap.release()
113 | cv.destroyAllWindows()
114 | 


--------------------------------------------------------------------------------
/digivision2.py:
--------------------------------------------------------------------------------
 1 | import cv2 as cv
 2 | import warnings
 3 | warnings.filterwarnings("ignore")
 4 | import p_part
 5 | import f_part
 6 | from caption_tune import modcap, face_found_cap, face_not_found_cap
 7 | from gensound import generate_sound
 8 | import tkinter as tk
 9 | from faceadd import addn,speech
10 | 
11 | 
12 | def saveface():
13 |  #   generate_sound("Tell me the name")
14 |     x1 = speech("What is this human called?")
15 |     print(x1 + ' face saved')
16 |     cv.imwrite(r"images//" +
17 |                str(x1) + ".jpg", save)
18 |     data = {x1: f_part.img_to_encoding(
19 |         "images//" + str(x1) + ".jpg").tolist()}
20 |     f_part.digi_db.insert_one(data)
21 | 
22 | 
23 | def ignoreface():
24 |     generate_sound("Not saved")
25 | 
26 | 
27 | cap = cv.VideoCapture(0)
28 | 
29 | while True:
30 |     ret, frame = cap.read()
31 |     facedetect = cv.CascadeClassifier(r'haarcascade_frontalface_default.xml')
32 |     if ret:
33 |         # font = cv.FONT_HERSHEY_SIMPLEX
34 |         cv.imshow("Video", frame)
35 | 
36 |         if cv.waitKey(1) == ord('p'):
37 | 
38 |             cv.imwrite('./test.jpg', frame)
39 |             final_caption = p_part.generate_caption(
40 |                 './test.jpg')  # create caption
41 |             final_caption = modcap(final_caption)  # remove tags
42 |             print(final_caption)
43 |             generate_sound(final_caption)  # convert to audio
44 | 
45 |         if cv.waitKey(1) == ord('f'):
46 |             gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
47 |             faces = facedetect.detectMultiScale(gray, 1.3, 5)
48 |             cv.imwrite('./test.jpg', frame)
49 |             known_detected = 0
50 |             unknown_detected = 0
51 |             known_face_list = []
52 |             known_face_dist = []
53 |             try:
54 |                 for x, y, w, h in faces:
55 |                     #cv2.imwrite("dset//User."+str(user)+"."+str(sample)+".jpg",gray[y:y+h,x:x+w])
56 |                     save = frame[y:y+h, x:x+w]
57 |                     cv.imwrite('./test.jpg', save)
58 |                     dis, name = f_part.who_is_it('./test.jpg')
59 |                     print(str(dis)+","+name)
60 |                     if name != 'unknown':
61 |                         known_face_list.append(name)
62 |                         known_face_dist.append(dis)
63 |                         known_detected += 1
64 | 
65 |                     else:
66 |                         unknown_detected += 1
67 | 
68 |                 if known_detected > 0:
69 |                     print("known: " + str(known_detected))
70 |                     for i in range(known_detected):
71 |                         print('i=' + str(i))
72 |                         print(
73 |                             known_face_list[i] + " at dist of: " + str(known_face_dist[i]))
74 |                         temp = face_found_cap(str(known_face_list[i]))
75 |                         generate_sound(temp)
76 |                 elif unknown_detected == 1:
77 |                     temp = face_not_found_cap()
78 |                     generate_sound(temp)
79 |                     generate_sound("Do you want to add this face in your database")
80 |                     addn(save)
81 | 
82 |                 elif known_detected == 0 and unknown_detected == 0:
83 |                     print("No person found")
84 |                     generate_sound("No person found!")
85 | 
86 |                 else:
87 |                     print("Too many people")
88 |                     generate_sound("Too many people.")
89 |             except Exception as e:
90 |                 generate_sound("No recognisable face found!")
91 |                 print(e)
92 | 
93 |         if cv.waitKey(1) & 0xFF == 27:  # ASCII for Esc Key
94 |             break
95 |     else:
96 |         break
97 | cap.release()
98 | cv.destroyAllWindows()


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
  1 | ########################################################################
  2 | #
  3 | # Functions for downloading and extracting data-files from the internet.
  4 | #
  5 | # Implemented in Python 3.5
  6 | #
  7 | ########################################################################
  8 | #
  9 | # This file is part of the TensorFlow Tutorials available at:
 10 | #
 11 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials
 12 | #
 13 | # Published under the MIT License. See the file LICENSE for details.
 14 | #
 15 | # Copyright 2016 by Magnus Erik Hvass Pedersen
 16 | #
 17 | ########################################################################
 18 | 
 19 | import sys
 20 | import os
 21 | import urllib.request
 22 | import tarfile
 23 | import zipfile
 24 | 
 25 | ########################################################################
 26 | 
 27 | 
 28 | def _print_download_progress(count, block_size, total_size):
 29 |     """
 30 |     Function used for printing the download progress.
 31 |     Used as a call-back function in maybe_download_and_extract().
 32 |     """
 33 | 
 34 |     # Percentage completion.
 35 |     pct_complete = float(count * block_size) / total_size
 36 | 
 37 |     # Limit it because rounding errors may cause it to exceed 100%.
 38 |     pct_complete = min(1.0, pct_complete)
 39 | 
 40 |     # Status-message. Note the \r which means the line should overwrite itself.
 41 |     msg = "\r- Download progress: {0:.1%}".format(pct_complete)
 42 | 
 43 |     # Print it.
 44 |     sys.stdout.write(msg)
 45 |     sys.stdout.flush()
 46 | 
 47 | 
 48 | ########################################################################
 49 | 
 50 | def download(base_url, filename, download_dir):
 51 |     """
 52 |     Download the given file if it does not already exist in the download_dir.
 53 |     :param base_url: The internet URL without the filename.
 54 |     :param filename: The filename that will be added to the base_url.
 55 |     :param download_dir: Local directory for storing the file.
 56 |     :return: Nothing.
 57 |     """
 58 | 
 59 |     # Path for local file.
 60 |     save_path = os.path.join(download_dir, filename)
 61 | 
 62 |     # Check if the file already exists, otherwise we need to download it now.
 63 |     if not os.path.exists(save_path):
 64 |         # Check if the download directory exists, otherwise create it.
 65 |         if not os.path.exists(download_dir):
 66 |             os.makedirs(download_dir)
 67 | 
 68 |         print("Downloading", filename, "...")
 69 | 
 70 |         # Download the file from the internet.
 71 |         url = base_url + filename
 72 |         file_path, _ = urllib.request.urlretrieve(url=url,
 73 |                                                   filename=save_path,
 74 |                                                   reporthook=_print_download_progress)
 75 | 
 76 |         print(" Done!")
 77 | 
 78 | 
 79 | def maybe_download_and_extract(url, download_dir):
 80 |     """
 81 |     Download and extract the data if it doesn't already exist.
 82 |     Assumes the url is a tar-ball file.
 83 |     :param url:
 84 |         Internet URL for the tar-file to download.
 85 |         Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
 86 |     :param download_dir:
 87 |         Directory where the downloaded file is saved.
 88 |         Example: "data/CIFAR-10/"
 89 |     :return:
 90 |         Nothing.
 91 |     """
 92 | 
 93 |     # Filename for saving the file downloaded from the internet.
 94 |     # Use the filename from the URL and add it to the download_dir.
 95 |     filename = url.split('/')[-1]
 96 |     file_path = os.path.join(download_dir, filename)
 97 | 
 98 |     # Check if the file already exists.
 99 |     # If it exists then we assume it has also been extracted,
100 |     # otherwise we need to download and extract it now.
101 |     if not os.path.exists(file_path):
102 |         # Check if the download directory exists, otherwise create it.
103 |         if not os.path.exists(download_dir):
104 |             os.makedirs(download_dir)
105 | 
106 |         # Download the file from the internet.
107 |         file_path, _ = urllib.request.urlretrieve(url=url,
108 |                                                   filename=file_path,
109 |                                                   reporthook=_print_download_progress)
110 | 
111 |         print()
112 |         print("Download finished. Extracting files.")
113 | 
114 |         if file_path.endswith(".zip"):
115 |             # Unpack the zip-file.
116 |             zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir)
117 |         elif file_path.endswith((".tar.gz", ".tgz")):
118 |             # Unpack the tar-ball.
119 |             tarfile.open(name=file_path, mode="r:gz").extractall(download_dir)
120 | 
121 |         print("Done.")
122 |     else:
123 |         print("Data has apparently already been downloaded and unpacked.")
124 | 
125 | 
126 | ########################################################################
127 | 


--------------------------------------------------------------------------------
/f_part.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from PIL import Image
 3 | import coco
 4 | from cache import cache
 5 | import cv2 as cv
 6 | from gtts import gTTS
 7 | import matplotlib.pyplot as plt
 8 | import tensorflow as tf
 9 | import numpy as np
10 | import FaceToolKit as ftk
11 | import DetectionToolKit as dtk
12 | warnings.filterwarnings("ignore")
13 | from pymongo import MongoClient
14 | 
15 | 
16 | MONGODB_URI = "mongodb+srv://digivision:digivision@cluster0-3yht7.mongodb.net/test?retryWrites=true"
17 | client = MongoClient(MONGODB_URI)
18 | db = client.get_database("people")
19 | digi_db = db.trusted_people
20 | 
21 | 
22 | 
23 | verification_threshhold = 0.600
24 | image_size = 160
25 | v = ftk.Verification()
26 | # Pre-load model for Verification
27 | v.load_model("./models/20180204-160909/")
28 | v.initial_input_output_tensors()
29 | 
30 | d = dtk.Detection()
31 | 
32 | def img_to_encoding(img):
33 |     image = plt.imread(img)
34 |     aligned = d.align(image, False)[0]
35 |     return v.img_to_encoding(aligned, image_size)
36 | 
37 | def distance(emb1, emb2):
38 |     diff = np.subtract(emb1, emb2)
39 |     return np.sum(np.square(diff))
40 | 
41 | def who_is_it(image_path):
42 | 
43 |     # Compute the target "encoding" for the image. Use img_to_encoding()
44 |     encoding = img_to_encoding(image_path)
45 | 
46 |     # Find the closest encoding ##
47 | 
48 |     # Initialize "min_dist" to a large value, say 1000
49 |     min_dist = 1000
50 |     # Loop over the database dictionary's names and encodings.
51 |     data = digi_db.find()
52 |     for i in data:
53 |         if list(i.keys())[0] != '_id':
54 |             name = list(i.keys())[0]
55 |         else:
56 |             name = list(i.keys())[1]
57 |         db_enc = np.array(i[name])
58 | 
59 |         # Compute L2 distance between the target "encoding" and the current "emb" from the database. (≈ 1 line)
60 |         dist = distance(encoding, db_enc)
61 | 
62 |         # If this distance is less than the min_dist, then set min_dist to dist, and identity to name. (≈ 3 lines)
63 |         if min_dist > dist:
64 |             min_dist = dist
65 |             identity = name
66 | 
67 |     if min_dist > verification_threshhold:
68 |         return min_dist, 'unknown'
69 |     # else:
70 |         # print ("it's " + str(identity) + ", the distance is " + str(min_dist))
71 | 
72 |     return min_dist, identity
73 | 


--------------------------------------------------------------------------------
/faceadd.py:
--------------------------------------------------------------------------------
 1 | import speech_recognition as sr
 2 | from gensound import generate_sound
 3 | import f_part
 4 | import cv2 as cv
 5 | def speech(abc):
 6 | 
 7 | # obtain audio from the microphone
 8 |     r = sr.Recognizer()
 9 |     with sr.Microphone() as source:
10 |         print("Pls say something....")
11 |         generate_sound(abc)  
12 |         audio = r.listen(source)
13 | 
14 |     # recognize speech 
15 |     try:
16 |         print("Google Audio:" + r.recognize_google(audio))
17 |         return(r.recognize_google(audio))
18 |       #  print("Sphinx:" + r.recognize_sphinx(audio))
19 |     except sr.UnknownValueError:
20 |         generate_sound("Could not understand your response Speak again")
21 |         speech(abc)
22 |     except sr.RequestError as e:
23 |         print("error: {0}".format(e))
24 |         generate_sound("Connection Error")
25 | 
26 | 
27 | def addn(save):
28 |     ans = str(speech("Yes or No"))
29 |     if(ans == "yes" or ans == "Yes"):
30 |         saveface(save)
31 |     elif(ans == "no" or ans == "No"):
32 |         ignoreface()
33 |     else:
34 |         generate_sound("Could not understand your response Answer again")
35 |         addn(save)
36 | 
37 | def saveface(save):
38 |  #   generate_sound("Tell me the name")
39 |     x1 = speech("Tell me the name")
40 |     print(x1 + ' face saved')
41 |     cv.imwrite(r"images//" +
42 |                str(x1) + ".jpg", save)
43 |     data = {x1: f_part.img_to_encoding(
44 |         "images//" + str(x1) + ".jpg").tolist()}
45 |     f_part.digi_db.insert_one(data)
46 | 
47 | 
48 | def ignoreface():
49 |     generate_sound("Not saved")


--------------------------------------------------------------------------------
/facenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/facenet/__init__.py


--------------------------------------------------------------------------------
/facenet/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/facenet/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/facenet/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/facenet/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/facenet/__pycache__/face.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/facenet/__pycache__/face.cpython-35.pyc


--------------------------------------------------------------------------------
/facenet/__pycache__/face.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/facenet/__pycache__/face.cpython-36.pyc


--------------------------------------------------------------------------------
/facenet/face.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from tensorflow.python.platform import gfile
 4 | import tensorflow as tf
 5 | import re
 6 | 
 7 | def prewhiten(x):
 8 |     mean = np.mean(x)
 9 |     std = np.std(x)
10 |     std_adj = np.maximum(std, 1.0 / np.sqrt(x.size))
11 |     y = np.multiply(np.subtract(x, mean), 1 / std_adj)
12 |     return y
13 | 
14 | 
15 | def to_rgb(img):
16 |     w, h = img.shape
17 |     ret = np.empty((w, h, 3), dtype=np.uint8)
18 |     ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
19 |     return ret
20 | 
21 | 
22 | def get_model_filenames(model_dir):
23 |     files = os.listdir(model_dir)
24 |     meta_files = [s for s in files if s.endswith('.meta')]
25 |     if len(meta_files)==0:
26 |         raise ValueError('No meta file found in the model directory (%s)' % model_dir)
27 |     elif len(meta_files)>1:
28 |         raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
29 |     meta_file = meta_files[0]
30 |     meta_files = [s for s in files if '.ckpt' in s]
31 |     max_step = -1
32 |     for f in files:
33 |         step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
34 |         if step_str is not None and len(step_str.groups())>=2:
35 |             step = int(step_str.groups()[1])
36 |             if step > max_step:
37 |                 max_step = step
38 |                 ckpt_file = step_str.groups()[0]
39 |     return meta_file, ckpt_file
40 | 
41 | def make_image_tensor(img, image_size, do_prewhiten=True):
42 |     image = np.zeros((1, image_size, image_size, 3))
43 |     if img.ndim == 2:
44 |         img = to_rgb(img)
45 |     if do_prewhiten:
46 |         img = prewhiten(img)
47 |     image[0, :, :, :] = img
48 |     return image
49 | 
50 | def make_images_tensor(img1,img2,image_size, do_prewhiten=True):
51 |     images = np.zeros((2, image_size, image_size, 3))
52 |     for i,img in enumerate([img1,img2]):
53 |         if img.ndim == 2:
54 |             img = to_rgb(img)
55 |         if do_prewhiten:
56 |             img = prewhiten(img)
57 |         images[i, :, :, :] = img
58 |     return images
59 | 
60 | def load_model(model,session):
61 |     # Check if the model is a model directory (containing a metagraph and a checkpoint file)
62 |     #  or if it is a protobuf file with a frozen graph
63 |     model_exp = os.path.expanduser(model)
64 |     if os.path.isfile(model_exp):
65 |         print('Model filename: %s' % model_exp)
66 |         with gfile.FastGFile(model_exp,'rb') as f:
67 |             graph_def = tf.GraphDef()
68 |             graph_def.ParseFromString(f.read())
69 |             tf.import_graph_def(graph_def, name='')
70 |     else:
71 |         print('Model directory: %s' % model_exp)
72 |         meta_file, ckpt_file = get_model_filenames(model_exp)
73 |         
74 |         print('Metagraph file: %s' % meta_file)
75 |         print('Checkpoint file: %s' % ckpt_file)
76 |       
77 |         saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file))
78 |         saver.restore(session, os.path.join(model_exp, ckpt_file))


--------------------------------------------------------------------------------
/gensound.py:
--------------------------------------------------------------------------------
1 | import pyttsx3
2 | 
3 | def generate_sound(text):
4 |     '''
5 |     Converts text to audio and plays it.
6 |     '''
7 |     engine = pyttsx3.init() 
8 |     engine.say(text)
9 |     engine.runAndWait()


--------------------------------------------------------------------------------
/gensoundgtts.py:
--------------------------------------------------------------------------------
 1 | from pygame import mixer
 2 | from tempfile import TemporaryFile
 3 | from gtts import gTTS
 4 | 
 5 | 
 6 | def generate_sound(text):
 7 |     '''
 8 |     Converts text to audio and plays it.
 9 |     '''
10 |     language = 'en'
11 |     myobj = gTTS(text=text, lang=language, slow=False)
12 |     # slow = False for high speed
13 |     sf = TemporaryFile()
14 |     myobj.write_to_fp(sf)
15 |     sf.seek(0)
16 |     mixer.init()
17 |     mixer.music.load(sf)
18 |     mixer.music.play()
19 | 


--------------------------------------------------------------------------------
/images/Andrew.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/images/Andrew.jpg


--------------------------------------------------------------------------------
/images/Capture.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/images/Capture.JPG


--------------------------------------------------------------------------------
/images/Capture1.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/images/Capture1.JPG


--------------------------------------------------------------------------------
/images/Capture2.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/images/Capture2.JPG


--------------------------------------------------------------------------------
/images/andrew.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/images/andrew.jpg


--------------------------------------------------------------------------------
/models/20180204-160909/20180204-16090.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/20180204-16090.pb


--------------------------------------------------------------------------------
/models/20180204-160909/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "/Users/yipsangleung/facenet/models/20180204-160909/model-20180204-160909.ckpt-266000"
2 | all_model_checkpoint_paths: "/Users/yipsangleung/facenet/models/20180204-160909/model-20180204-160909.ckpt-264000"
3 | all_model_checkpoint_paths: "/Users/yipsangleung/facenet/models/20180204-160909/model-20180204-160909.ckpt-265000"
4 | all_model_checkpoint_paths: "/Users/yipsangleung/facenet/models/20180204-160909/model-20180204-160909.ckpt-266000"
5 | 


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.ckpt-264000.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.ckpt-264000.data-00000-of-00001


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.ckpt-264000.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.ckpt-264000.index


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.ckpt-265000.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.ckpt-265000.data-00000-of-00001


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.ckpt-265000.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.ckpt-265000.index


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.ckpt-266000.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.ckpt-266000.data-00000-of-00001


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.ckpt-266000.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.ckpt-266000.index


--------------------------------------------------------------------------------
/models/20180204-160909/model-20180204-160909.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fazeVaib/DigiVision/ca8d4d08552bf9b36755f11be7393428ee83a3ea/models/20180204-160909/model-20180204-160909.meta


--------------------------------------------------------------------------------
/p_part.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import sys
  5 | import os
  6 | from PIL import Image
  7 | import coco
  8 | from cache import cache
  9 | import cv2 as cv
 10 | from gtts import gTTS
 11 | from datetime import datetime, timedelta
 12 | from tensorflow.python.keras import backend as K
 13 | from tensorflow.python.keras.models import Model
 14 | from tensorflow.python.keras.layers import GRU, Embedding, Dense, Input
 15 | from tensorflow.python.keras.applications import VGG16
 16 | from tensorflow.python.keras.optimizers import RMSprop
 17 | from tensorflow.python.keras.callbacks import ModelCheckpoint, TensorBoard
 18 | from tensorflow.python.keras.preprocessing.text import Tokenizer
 19 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences
 20 | 
 21 | 
 22 | def load_image(path, size=None):
 23 |     """
 24 |     Load the image from the given file-path and resize it
 25 |     to the given size if not None.
 26 |     """
 27 | 
 28 |     img = Image.open(path)  # loading image using PIL
 29 | 
 30 |     if not size is None:
 31 |         img = img.resize(size=size, resample=Image.LANCZOS)
 32 | 
 33 |     img = np.array(img)  # img to numpy array
 34 | 
 35 |     img = img/255.0  # scaling them so they fall between 0 and 1
 36 | 
 37 |     # Convert 2-dim gray-scale array to 3-dim RGB array.
 38 |     if len(img.shape) == 2:
 39 |         img = np.repeat(img[:, :, np.newaxis], 3, axis=2)
 40 | 
 41 |     return img
 42 | 
 43 | 
 44 | def show_image(idx, train):
 45 |     """
 46 |     Load and plot an image from the training- or validation-set
 47 |     with the given index.
 48 |     """
 49 | 
 50 |     if train:  # uses image from training set
 51 |         dir = coco.train_dir
 52 |         filename = filenames_train[idx]
 53 |         captions = captions_train[idx]
 54 | 
 55 |     else:  # uses image from validation set
 56 |         dir = coco.val_dir
 57 |         filename = filenames_val[idx]
 58 |         captions = captions_val[idx]
 59 | 
 60 |     # path for the image file
 61 |     path = os.path.join(dir, filename)
 62 | 
 63 |     # printing the captions for this image
 64 |     for caption in captions:
 65 |         print(caption)
 66 | 
 67 |     # load the image & plot it
 68 |     image = load_image(path)
 69 |     plt.imshow(image)
 70 |     plt.show()
 71 | 
 72 | 
 73 | def generate_caption(image_path, max_tokens=30):
 74 |     """
 75 |     Generate a caption for the image in the given path.
 76 |     The caption is limited to the given number of tokens (words).
 77 |     """
 78 | 
 79 |     # Load and resize the image.
 80 |     image = load_image(image_path, size=img_size)
 81 | 
 82 |     # Expand the 3-dim numpy array to 4-dim
 83 |     # because the image-model expects a whole batch as input,
 84 |     # so we give it a batch with just one image.
 85 |     image_batch = np.expand_dims(image, axis=0)
 86 | 
 87 |     transfer_values = image_model_transfer.predict(image_batch)
 88 | 
 89 |     shape = (1, max_tokens)
 90 |     decoder_input_data = np.zeros(shape=shape, dtype=np.int)
 91 | 
 92 |     token_int = token_start
 93 | 
 94 |     output_text = ''
 95 | 
 96 |     count_tokens = 0
 97 | 
 98 |     while token_int != token_end and count_tokens < max_tokens:
 99 | 
100 |         decoder_input_data[0, count_tokens] = token_int
101 | 
102 |         x_data = {
103 |             'transfer_values_input': transfer_values,
104 |             'decoder_input': decoder_input_data
105 |         }
106 | 
107 |         # Input this data to the decoder and get the predicted output.
108 |         decoder_output = decoder_model.predict(x_data)
109 | 
110 |         token_onehot = decoder_output[0, count_tokens, :]
111 | 
112 |         token_int = np.argmax(token_onehot)
113 | 
114 |         sampled_word = tokenizer.token_to_word(token_int)
115 | 
116 |         output_text += " " + sampled_word
117 | 
118 |         # Increment the token-counter.
119 |         count_tokens += 1
120 | 
121 |     output_tokens = decoder_input_data[0]
122 |     return output_text
123 | 
124 | 
125 | def print_progress(count, max_count):
126 |     # Percentage Completion
127 |     pct_complete = count/max_count
128 | 
129 |     # Status-message. Note the \r which means the line should overwrite itself
130 |     msg = '\r- Progress: {0:.1%}'.format(pct_complete)
131 | 
132 |     sys.stdout.write(msg)
133 |     sys.stdout.flush()
134 | 
135 | 
136 | def process_images(data_dir, filenames, batch_size=32):
137 |     """
138 |     Process all the given files in the given data_dir using the
139 |     pre-trained image-model and return their transfer-values.
140 |     
141 |     Note that we process the images in batches to save
142 |     memory and improve efficiency on the GPU.
143 |     """
144 | 
145 |     # Number of images to process.
146 |     num_images = len(filenames)
147 | 
148 |     # Pre-allocate input-batch-array for images.
149 |     shape = (batch_size,) + img_size + (3,)
150 |     image_batch = np.zeros(shape=shape, dtype=np.float16)
151 | 
152 |     # Pre-allocate output-array for transfer-values.
153 |     # Note that we use 16-bit floating-points to save memory.
154 |     shape = (num_images, transfer_values_size)
155 |     transfer_values = np.zeros(shape=shape, dtype=np.float16)
156 | 
157 |     # Initialize index into the filenames.
158 |     start_index = 0
159 | 
160 |     # Process batches of image-files.
161 |     while start_index < num_images:
162 |         # Print the percentage-progress.
163 |         print_progress(count=start_index, max_count=num_images)
164 | 
165 |         # End-index for this batch.
166 |         end_index = start_index + batch_size
167 | 
168 |         # Ensure end-index is within bounds.
169 |         if end_index > num_images:
170 |             end_index = num_images
171 | 
172 |         # The last batch may have a different batch-size.
173 |         current_batch_size = end_index - start_index
174 | 
175 |         # Load all the images in the batch.
176 |         for i, filename in enumerate(filenames[start_index:end_index]):
177 |             # Path for the image-file.
178 |             path = os.path.join(data_dir, filename)
179 | 
180 |             # Load and resize the image.
181 |             # This returns the image as a numpy-array.
182 |             img = load_image(path, size=img_size)
183 | 
184 |             # Save the image for later use.
185 |             image_batch[i] = img
186 | 
187 |         # Use the pre-trained image-model to process the image.
188 |         # Note that the last batch may have a different size,
189 |         # so we only use the relevant images.
190 |         transfer_values_batch = image_model_transfer.predict(
191 |             image_batch[0:current_batch_size])
192 | 
193 |         # Save the transfer-values in the pre-allocated array.
194 |         transfer_values[start_index:end_index] = transfer_values_batch[0:current_batch_size]
195 | 
196 |         # Increase the index for the next loop-iteration.
197 |         start_index = end_index
198 | 
199 |     # Print newline.
200 |     print()
201 | 
202 |     return transfer_values
203 | 
204 | 
205 | def process_images_train():
206 |     print(
207 |         "Processing {0} images in training-set. ".format(len(filenames_train)))
208 | 
209 |     # path for cache file
210 |     cache_path = os.path.join(coco.data_dir, "transfer_values_train.pkl")
211 | 
212 |     # If the cache-file already exists then reload it,
213 |     # otherwise process all images and save their transfer-values
214 |     # to the cache-file so it can be reloaded quickly.
215 |     transfer_values = cache(cache_path=cache_path, fn=process_images,
216 |                             data_dir=coco.train_dir, filenames=filenames_train)
217 |     return transfer_values
218 | 
219 | 
220 | def process_images_val():
221 |     print(
222 |         "Processing {0} images in validation-set. ".format(len(filenames_val)))
223 | 
224 |     # path for cache file
225 |     cache_path = os.path.join(coco.data_dir, "transfer_values_val.pkl")
226 | 
227 |     # If the cache-file already exists then reload it,
228 |     # otherwise process all images and save their transfer-values
229 |     # to the cache-file so it can be reloaded quickly.
230 |     transfer_values = cache(cache_path=cache_path, fn=process_images,
231 |                             data_dir=coco.val_dir, filenames=filenames_val)
232 |     return transfer_values
233 | 
234 | 
235 | def mark_captions(multi_cap_list):
236 |     captions_marked = [
237 |         [mark_start + caption + mark_end for caption in cap_list]
238 |         for cap_list in multi_cap_list]
239 |     return captions_marked
240 | 
241 | 
242 | def flatten(multi_cap_list):
243 |     captions_list = [caption
244 |                      for cap_list in multi_cap_list
245 |                      for caption in cap_list]
246 |     return captions_list
247 | 
248 | 
249 | class TokenizerWrap(Tokenizer):
250 |     """Wrap the Tokenizer-class from Keras with more functionality."""
251 | 
252 |     def __init__(self, texts, num_words=None):
253 |         """
254 |         :param texts: List of strings with the data-set.
255 |         :param num_words: Max number of words to use.
256 |         """
257 | 
258 |         Tokenizer.__init__(self, num_words=num_words)
259 | 
260 |         # Create the vocabulary from the texts.
261 |         self.fit_on_texts(texts)
262 | 
263 |         # Create inverse lookup from integer-tokens to words.
264 |         self.index_to_word = dict(zip(self.word_index.values(),
265 |                                       self.word_index.keys()))
266 | 
267 |     def token_to_word(self, token):
268 |         """Lookup a single word from an integer-token."""
269 | 
270 |         word = " " if token == 0 else self.index_to_word[token]
271 |         return word
272 | 
273 |     def tokens_to_string(self, tokens):
274 |         """Convert a list of integer-tokens to a string."""
275 | 
276 |         # Create a list of the individual words.
277 |         words = [self.index_to_word[token]
278 |                  for token in tokens
279 |                  if token != 0]
280 | 
281 |         # Concatenate the words to a single string
282 |         # with space between all the words.
283 |         text = " ".join(words)
284 | 
285 |         return text
286 | 
287 |     def captions_to_tokens(self, captions_listlist):
288 |         """
289 |         Convert a list-of-list with text-captions to
290 |         a list-of-list of integer-tokens.
291 |         """
292 | 
293 |         # Note that text_to_sequences() takes a list of texts.
294 |         tokens = [self.texts_to_sequences(captions_list)
295 |                   for captions_list in captions_listlist]
296 | 
297 |         return tokens
298 | 
299 | 
300 | def get_random_cap_tokens(idx):
301 |     """
302 |     Given a list of indices for images in the training-set,
303 |     select a token-sequence for a random caption,
304 |     and return a list of all these token-sequences.
305 |     """
306 | 
307 |     result = []  # empty list for result
308 | 
309 |     # for each of the indices
310 |     for i in idx:
311 |         j = np.random.choice(len(tokens_train[i]))
312 | 
313 |         # get jth token-seq for image i
314 |         tokens = tokens_train[i][j]
315 | 
316 |         result.append(tokens)
317 | 
318 |     return result
319 | 
320 | 
321 | def batch_generator(batch_size):
322 |     """
323 |     Generator function for creating random batches of training-data.
324 |     """
325 | 
326 |     # Infinite loop
327 |     while True:
328 |         # returns list of randomly selected indices
329 |         idx = np.random.randint(num_train_img, size=batch_size)
330 | 
331 |         # Get the pre-computed transfer-values for those images.
332 |         # These are the outputs of the pre-trained image-model.
333 |         transfer_values = transfer_values_train[idx]
334 | 
335 |         # get raandom token respective to the image chosen randomly
336 |         tokens = get_random_cap_tokens(idx)
337 | 
338 |         # num of tokens in all token sequences
339 |         num_tokens = [len(t) for t in tokens]
340 | 
341 |         # Max num of tokens
342 |         max_tokens = np.max(num_tokens)
343 | 
344 |         # pad all other token sequences so all have same length to input into neural network
345 |         tokens_padded = pad_sequences(
346 |             tokens, maxlen=max_tokens, padding='post', truncating='post')
347 | 
348 |         # the decoder part of neural network will try to map token-seq to themselves shifted one time-step
349 |         decoder_input_data = tokens_padded[:, 0:-1]
350 |         decoder_output_data = tokens_padded[:, 1:]
351 | 
352 |         # dict for input data as we have several inputs, we used named dict to ensure data is assigned correctly
353 |         x_data = {
354 |             'decoder_input': decoder_input_data,
355 |             'transfer_values_input': transfer_values
356 |         }
357 | 
358 |         # Dict for output data
359 |         y_data = {
360 |             'decoder_output': decoder_output_data
361 |         }
362 | 
363 |         yield(x_data, y_data)
364 | 
365 | 
366 | def connect_decoder(transfer_values):
367 |     # Map the transfer-values so the dimensionality matches the internal state of the GRU layers. This means
368 |     # we can use the mapped transfer-values as the initial state of the GRU layers.
369 | 
370 |     initial_state = decoder_transfer_map(transfer_values)
371 | 
372 |     # start the decoder network with input layer
373 |     net = decoder_input
374 | 
375 |     # connect the embedding layer
376 |     net = decoder_embedding(net)
377 | 
378 |     # connect all GRU layers
379 |     net = decoder_gru1(net, initial_state=initial_state)
380 |     net = decoder_gru2(net, initial_state=initial_state)
381 |     net = decoder_gru3(net, initial_state=initial_state)
382 | 
383 |     # Connects the final dense layer that converts to one-hot encoded arrays
384 |     decoder_output = decoder_dense(net)
385 | 
386 |     return decoder_output
387 | 
388 | 
389 | def sparse_cross_entropy(y_true, y_pred):
390 |     """
391 |     Calculate the cross-entropy loss between y_true and y_pred.
392 |     
393 |     y_true is a 2-rank tensor with the desired output.
394 |     The shape is [batch_size, sequence_length] and it
395 |     contains sequences of integer-tokens.
396 | 
397 |     y_pred is the decoder's output which is a 3-rank tensor
398 |     with shape [batch_size, sequence_length, num_words]
399 |     so that for each sequence in the batch there is a one-hot
400 |     encoded array of length num_words.
401 |     """
402 | 
403 |     # Calculate the loss. This outputs a 2 rank tensor of shape [batch_size, seq_length]
404 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
405 |         labels=y_true, logits=y_pred)
406 | 
407 |     # Keras may reduce this across the first axis (the batch) but the semantics are unclear, so to be sure we use
408 |     # the loss across the entire 2-rank tensor, we reduce it to a single scalar with the mean function.
409 |     loss_mean = tf.reduce_mean(loss)
410 | 
411 |     return loss_mean
412 | 
413 | # ENTER YOUR CUSTOM PATH WHERE COCO DATASET IS STORED
414 | coco.set_data_dir("/mnt/MyDrive/Datasets/image-cap/data/coco") 
415 | 
416 | _, filenames_train, captions_train = coco.load_records(train=True)
417 | 
418 | num_train_img = len(filenames_train)
419 | 
420 | _, filenames_val, captions_val = coco.load_records(train=False)
421 | 
422 | num_val_img = len(filenames_val)
423 | 
424 | image_model = VGG16(include_top=True, weights='imagenet')
425 | 
426 | transfer_layer = image_model.get_layer('fc2')
427 | 
428 | image_model_transfer = Model(
429 |     inputs=image_model.input, outputs=transfer_layer.output)
430 | 
431 | img_size = K.int_shape(image_model.input)[1:3]
432 | # print(img_size)
433 | 
434 | transfer_values_size = K.int_shape(transfer_layer.output)[1]
435 | 
436 | transfer_values_train = process_images_train()
437 | 
438 | transfer_values_val = process_images_val()
439 | 
440 | mark_start = 'ssss '
441 | mark_end = ' eeee'
442 | 
443 | captions_train_marked = mark_captions(captions_train)
444 | 
445 | captions_train_flat = flatten(captions_train_marked)
446 | 
447 | num_words = 10000
448 | 
449 | tokenizer = TokenizerWrap(texts=captions_train_flat, num_words=num_words)
450 | 
451 | token_start = tokenizer.word_index[mark_start.strip()]
452 | 
453 | token_end = tokenizer.word_index[mark_end.strip()]
454 | 
455 | tokens_train = tokenizer.captions_to_tokens(captions_train_marked)
456 | 
457 | batch_size = 256
458 | 
459 | generator = batch_generator(batch_size=batch_size)
460 | 
461 | batch = next(generator)
462 | batch_x = batch[0]
463 | batch_y = batch[1]
464 | 
465 | num_cap_train = [len(cap) for cap in captions_train]
466 | 
467 | total_num_cap_train = np.sum(num_cap_train)
468 | 
469 | steps_per_epoch = int(total_num_cap_train / batch_size)
470 | 
471 | state_size = 512
472 | 
473 | embedding_size = 128
474 | 
475 | transfer_values_input = Input(
476 |     shape=(transfer_values_size,), name='transfer_values_input')
477 | 
478 | decoder_transfer_map = Dense(
479 |     state_size, activation='tanh', name='decoder_transfer_map')
480 | 
481 | decoder_input = Input(shape=(None,), name='decoder_input')
482 | 
483 | decoder_embedding = Embedding(
484 |     input_dim=num_words, output_dim=embedding_size, name='decoder_embedding')
485 | 
486 | decoder_gru1 = GRU(state_size, name='decoder_gru1', return_sequences=True)
487 | decoder_gru2 = GRU(state_size, name='decoder_gru2', return_sequences=True)
488 | decoder_gru3 = GRU(state_size, name='decoder_gru3', return_sequences=True)
489 | 
490 | decoder_dense = Dense(num_words, activation='linear', name='decoder_output')
491 | 
492 | decoder_output = connect_decoder(transfer_values=transfer_values_input)
493 | decoder_model = Model(
494 |     inputs=[transfer_values_input, decoder_input], outputs=[decoder_output])
495 | 
496 | optimizer = RMSprop(lr=1e-3)
497 | 
498 | decoder_target = tf.placeholder(dtype='int32', shape=(None, None))
499 | 
500 | decoder_model.compile(
501 |     optimizer=optimizer, loss=sparse_cross_entropy, target_tensors=[decoder_target])
502 | 
503 | path_checkpoint = './IC_checkpoints.keras'
504 | callback_checkpoints = ModelCheckpoint(
505 |     filepath=path_checkpoint, verbose=1, save_weights_only=True)
506 | 
507 | callback_tensorboard = TensorBoard(
508 |     log_dir='./IC_logs/', histogram_freq=0, write_graph=False)
509 | 
510 | callbacks = [callback_checkpoints, callback_tensorboard]
511 | 
512 | try:
513 |     decoder_model.load_weights(path_checkpoint)
514 | except Exception as error:
515 |     print('Error trying to load chkpoint')
516 |     print(error)
517 | 


--------------------------------------------------------------------------------