├── App.py ├── README.md ├── encod_list.data ├── face_list.data ├── facecam.py ├── object_camera.py ├── requirements.txt └── templates ├── face_index.html ├── face_index2.html ├── object_index.html └── object_index2.html /App.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, Response, jsonify, redirect,url_for,request 2 | from object_camera import VideoCamera 3 | from facecam import Facecamera 4 | import cv2 5 | 6 | app = Flask(__name__) 7 | 8 | video_stream = VideoCamera() 9 | face_video_stream = Facecamera() 10 | 11 | 12 | @app.route('/') 13 | def index(): 14 | return render_template('object_index.html') 15 | 16 | def gen(camera): 17 | flag=1 18 | while True: 19 | frame = camera.get_frame(flag) 20 | yield (b'--frame\r\n' 21 | b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') 22 | flag=0 23 | 24 | @app.route('/object_start_video') 25 | def object_start_video(): 26 | return Response(gen(video_stream), 27 | mimetype='multipart/x-mixed-replace; boundary=frame') 28 | 29 | @app.route('/object_close_video') 30 | def object_close_video(): 31 | video_stream.close() 32 | flag=1 33 | return render_template('object_index2.html') 34 | 35 | ###############connectivity 36 | 37 | @app.route('/face_recog') 38 | def face_recog(): 39 | video_stream.switching() 40 | return render_template('face_index.html') 41 | 42 | ################face 43 | 44 | def gen_face(camera): 45 | flag=1 46 | while True: 47 | frame = camera.Detection(flag) 48 | yield (b'--frame\r\n' 49 | b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') 50 | flag=0 51 | 52 | @app.route('/start_video') 53 | def start_video(): 54 | return Response(gen_face(face_video_stream),mimetype='multipart/x-mixed-replace; boundary=frame') 55 | 56 | @app.route('/Add_face') 57 | def Add_face(): 58 | face_video_stream.AddNewFace() 59 | return redirect('/face_recog') 60 | 61 | @app.route('/close_video') 62 | def close_video(): 63 | face_video_stream.close() 64 | flag=1 65 | return render_template('face_index2.html') 66 | 67 | 68 | if __name__ == '__main__': 69 | app.run(threaded=False) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Virtual Assistant 2 | It is a virtual assistant for visually impaired which include models like face recognition, object detection, text to speech, speech recognition. The idea behind this project is to host this website and use it from mobile phones and therefore the website's UI is designed according to mobile phones. 3 | 4 | ### Requirements and usage 5 | * Clone this repository using :- `git clone https://github.com/Deimos-M/DL-Virtual-Assistant.git` 6 | * Install dependencies using :- `pip install -r requirements.txt` 7 | * Downlaod Tiny yolo pre-trained model from here :- **Download** 8 | * Run the application using :- `python App.py` 9 | * Once the link appears, open it in your web browser and do either of the following :- 10 | * Press `ctrl+shift+M` in Mozilla. 11 | * Press `F12` then `ctrl+shift+M` in Chrome. 12 | 13 | Note :- This website runs on the local machine and is not yet hosted on the internet. (:-|) 14 | 15 | -------------------------------------------------------------------------------- /encod_list.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deimos-M/DL-Virtual-Assistant/615bc4eeb95b5e3b47a74ca52c8524212bdf4f53/encod_list.data -------------------------------------------------------------------------------- /face_list.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deimos-M/DL-Virtual-Assistant/615bc4eeb95b5e3b47a74ca52c8524212bdf4f53/face_list.data -------------------------------------------------------------------------------- /facecam.py: -------------------------------------------------------------------------------- 1 | import face_recognition 2 | import cv2 3 | import numpy as np 4 | import pickle 5 | 6 | import pyttsx3 7 | import threading 8 | 9 | import speech_recognition as sr 10 | import os,sys 11 | os.chdir(r'C:\Users\***\Desktop\web app\finalapp') 12 | import nest_asyncio 13 | nest_asyncio.apply() 14 | 15 | class Facecamera(object): 16 | 17 | # Initialize some variables 18 | face_locations = [] 19 | face_encodings = [] 20 | face_names = [] 21 | process_this_frame = True 22 | new_frame = None 23 | 24 | #loading list 25 | with open('encod_list.data', 'rb') as filehandle: 26 | # read the data as binary data stream 27 | known_face_encodings = pickle.load(filehandle) 28 | with open('face_list.data', 'rb') as filehandle1: 29 | # read the data as binary data stream 30 | known_face_names = pickle.load(filehandle1) 31 | 32 | All_faces = [0,0,0,0,0,0] 33 | def second(self,name): 34 | self.All_faces[self.All_faces.index(name)] = 0 35 | 36 | def __init__(self): 37 | self.video_capture = cv2.VideoCapture(0) 38 | 39 | def __del__(self): 40 | self.video_capture.release() 41 | 42 | #to add new face 43 | def AddNewFace(self): 44 | if("Unknown" in self.All_faces): 45 | newadd_frame = self.new_frame[:, :, ::-1] 46 | newadd_frame = cv2.resize(newadd_frame, (0, 0), fx=3, fy=3) 47 | 48 | cv2.imwrite("NewPicture.jpg",newadd_frame) 49 | 50 | engine = pyttsx3.init() 51 | engine.say("Adding new face..") 52 | engine.runAndWait() 53 | 54 | new = self.speech() 55 | global dst 56 | dst=None 57 | for file in os.listdir(): 58 | src=file 59 | if src=='NewPicture.jpg': 60 | #global dst 61 | dst=new+".jpg" 62 | os.rename(src,dst) 63 | 64 | new_image = face_recognition.load_image_file(dst) 65 | new_face_encoding = face_recognition.face_encodings(new_image)[0] 66 | self.known_face_encodings.append(new_face_encoding) 67 | self.known_face_names.append(new) 68 | 69 | engine = pyttsx3.init() 70 | engine.say("The face has been added succesfully.") 71 | engine.runAndWait() 72 | 73 | else : 74 | engine = pyttsx3.init() 75 | engine.say("No unknown face detected. ") 76 | engine.runAndWait() 77 | 78 | 79 | #speech to text 80 | def speech(self): 81 | # get audio from the microphone 82 | r = sr.Recognizer() 83 | 84 | engine = pyttsx3.init() 85 | 86 | with sr.Microphone() as source: 87 | engine.say("Please wait. Calibrating microphone...") 88 | engine.runAndWait() 89 | # listen for 3 seconds and create the ambient noise energy level 90 | r.adjust_for_ambient_noise(source, duration=3) 91 | 92 | engine.say(" please Speak the full name of the person") 93 | engine.runAndWait() 94 | 95 | audio = r.listen(source) 96 | 97 | try: 98 | name = r.recognize_google(audio) 99 | engine.say("You said " + name ) 100 | engine.runAndWait() 101 | 102 | except sr.UnknownValueError: 103 | engine.say("Could not understand audio") 104 | engine.runAndWait() 105 | return self.speech() 106 | 107 | except sr.RequestError as e: 108 | engine.say("Check internet") 109 | engine.runAndWait() 110 | return self.speech() 111 | 112 | return name 113 | 114 | def Detection(self,flagrun): 115 | self.process_this_frame = True 116 | flag = 1 117 | 118 | if flagrun==1: 119 | engine = pyttsx3.init() 120 | engine.say('starting face recognition live stream.') 121 | engine.runAndWait() 122 | self.video_capture = cv2.VideoCapture(0) 123 | 124 | while (flag==1): 125 | # Grab a single frame of video 126 | ret, frame = self.video_capture.read() 127 | 128 | # Resize frame of video to 1/4 size for faster face recognition processing 129 | small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) 130 | 131 | # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) 132 | rgb_small_frame = small_frame[:, :, ::-1] 133 | 134 | self.new_frame = rgb_small_frame#copy 135 | 136 | # Only process every other frame of video to save time 137 | if self.process_this_frame: 138 | # Find all the faces and face encodings in the current frame of video 139 | self.face_locations = face_recognition.face_locations(rgb_small_frame) 140 | self.face_encodings = face_recognition.face_encodings(rgb_small_frame, self.face_locations) 141 | 142 | self.face_names = [] 143 | for face_encoding in self.face_encodings: 144 | matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding) 145 | name = "Unknown" 146 | 147 | # Or instead, use the known face with the smallest distance to the new face 148 | face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding) 149 | 150 | best_match_index = np.argmin(face_distances) 151 | if matches[best_match_index]: 152 | name = self.known_face_names[best_match_index] 153 | self.face_names.append(name) 154 | print(self.All_faces) 155 | if name not in self.All_faces: 156 | print(name) 157 | print(self.All_faces) 158 | if (self.All_faces[0] != 0): 159 | for i in range(5,0,-1): 160 | self.All_faces[i]=self.All_faces[i-1] 161 | self.All_faces[0]=name 162 | 163 | timer = threading.Timer(20, self.second,args=[name]) 164 | timer.start() 165 | 166 | engine = pyttsx3.init() 167 | engine.say('I see '+name) 168 | engine.runAndWait() 169 | 170 | self.process_this_frame = not self.process_this_frame 171 | 172 | 173 | # Display the results 174 | for (top, right, bottom, left), name in zip(self.face_locations, self.face_names): 175 | # Scale back up face locations since the frame we detected in was scaled to 1/4 size 176 | top *= 4 177 | right *= 4 178 | bottom *= 4 179 | left *= 4 180 | 181 | # Draw a box around the face 182 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) 183 | 184 | # Draw a label with a name below the face 185 | cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED) 186 | font = cv2.FONT_HERSHEY_DUPLEX 187 | cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1) 188 | 189 | ret , jpeg = cv2.imencode('.jpg',frame) 190 | return jpeg.tobytes() 191 | 192 | 193 | #closing 194 | def close(self): 195 | #saving list ---------------how/need to save final unkown 196 | with open('encod_list.data', 'wb') as filehandle: 197 | # store the data as binary data stream 198 | pickle.dump(self.known_face_encodings, filehandle) 199 | with open('face_list.data', 'wb') as filehandle1: 200 | # store the data as binary data stream 201 | pickle.dump(self.known_face_names, filehandle1) 202 | 203 | engine = pyttsx3.init() 204 | engine.say('stopping live stream.') 205 | engine.runAndWait() 206 | self.video_capture.release() 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /object_camera.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | #Ai libraries 3 | from imageai.Detection import ObjectDetection 4 | import os 5 | 6 | import pyttsx3 7 | import threading 8 | 9 | class VideoCamera(object): 10 | execution_path = os.getcwd() 11 | detector = ObjectDetection() 12 | detector.setModelTypeAsTinyYOLOv3() 13 | detector.setModelPath(os.path.join(execution_path , "yolo-tiny.h5")) 14 | detector.loadModel(detection_speed="flash") 15 | 16 | #buffer 17 | All_faces = [0,0,0,0,0,0] 18 | 19 | def __init__(self): 20 | self.video = cv2.VideoCapture(0) 21 | 22 | def __del__(self): 23 | self.video.release() 24 | 25 | #queue 26 | def second(self,name): 27 | self.All_faces[self.All_faces.index(name)] = 0 28 | 29 | #text-to-speech 30 | def speak(self,detections): 31 | for eachObject in detections: 32 | name = eachObject["name"] 33 | if name not in self.All_faces: 34 | if (self.All_faces[0] != 0): 35 | for i in range(5,0,-1): 36 | self.All_faces[i]=self.All_faces[i-1] 37 | self.All_faces[0]=name 38 | 39 | timer = threading.Timer(20, self.second,args=[name]) 40 | timer.start() 41 | 42 | engine = pyttsx3.init() 43 | engine.say('I see '+name) 44 | engine.runAndWait() 45 | 46 | #detection 47 | def get_frame(self,flag): 48 | 49 | if flag==1: 50 | engine = pyttsx3.init() 51 | engine.say('starting object detetction live stream.') 52 | engine.runAndWait() 53 | self.video = cv2.VideoCapture(0) 54 | 55 | ret, frame = self.video.read() 56 | 57 | detected_image_array, detections = self.detector.detectObjectsFromImage(input_type="array", input_image=frame, output_type="array",minimum_percentage_probability=30) 58 | 59 | self.speak(detections) 60 | 61 | ret, jpeg = cv2.imencode('.jpg', detected_image_array) 62 | return jpeg.tobytes() 63 | 64 | def switching(self): 65 | if(self.video.isOpened()): 66 | engine = pyttsx3.init() 67 | engine.say('switching to face recognition') 68 | engine.runAndWait() 69 | self.video.release() 70 | 71 | #closing 72 | def close(self): 73 | engine = pyttsx3.init() 74 | engine.say('stopping live stream.') 75 | engine.runAndWait() 76 | self.video.release() 77 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.9.0 2 | astor==0.8.1 3 | cachetools==4.1.0 4 | certifi==2020.4.5.1 5 | chardet==3.0.4 6 | click==7.1.1 7 | cycler==0.10.0 8 | dlib==19.19.0 9 | face-recognition==1.3.0 10 | face-recognition-models==0.3.0 11 | Flask==1.1.2 12 | gast==0.2.2 13 | google-auth==1.14.1 14 | google-auth-oauthlib==0.4.1 15 | google-pasta==0.2.0 16 | grpcio==1.28.1 17 | gunicorn==20.0.4 18 | h5py==2.10.0 19 | idna==2.9 20 | imageai==2.1.5 21 | itsdangerous==1.1.0 22 | Jinja2==2.11.1 23 | Keras==2.3.1 24 | Keras-Applications==1.0.8 25 | Keras-Preprocessing==1.1.0 26 | kiwisolver==1.2.0 27 | Markdown==3.2.1 28 | MarkupSafe==1.1.1 29 | matplotlib==3.2.1 30 | nest-asyncio==1.3.2 31 | numpy==1.18.3 32 | oauthlib==3.1.0 33 | opencv-python==4.2.0.34 34 | opt-einsum==3.2.1 35 | Pillow==7.1.2 36 | protobuf==3.11.3 37 | pyasn1==0.4.8 38 | pyasn1-modules==0.2.8 39 | PyAudio==0.2.11 40 | pyparsing==2.4.7 41 | pypiwin32==223 42 | python-dateutil==2.8.1 43 | pyttsx3==2.71 44 | pywin32==227 45 | PyYAML==5.3.1 46 | requests==2.23.0 47 | requests-oauthlib==1.3.0 48 | rsa==4.0 49 | scipy==1.4.1 50 | six==1.14.0 51 | SpeechRecognition==3.8.1 52 | tensorboard==1.14.0 53 | tensorflow==1.14.0 54 | tensorflow-estimator==1.14.0 55 | tensorflow-gpu==1.14.0 56 | termcolor==1.1.0 57 | urllib3==1.25.9 58 | Werkzeug==1.0.1 59 | wrapt==1.12.1 60 | -------------------------------------------------------------------------------- /templates/face_index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Video Stream 5 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /templates/face_index2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Video Stream 5 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /templates/object_index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Video Stream 5 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /templates/object_index2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Video Stream 5 | 22 | 23 | 24 | 25 | 26 | --------------------------------------------------------------------------------