├── Code ├── Install_Packages.txt ├── Install_Packages_gpu.txt ├── Rotate_images.py ├── cnn_model_train.py ├── create_gestures.py ├── display_gestures.py ├── final.py ├── gesture_db.db ├── hist ├── load_images.py └── set_hand_histogram.py ├── FUNDING.yml ├── LICENSE ├── README.md ├── Team Linear Digressors-Presentation.pdf └── img ├── Capture.PNG ├── Capture1.PNG ├── demo.gif ├── demo2.gif ├── demo3.gif └── demo4.gif /Code/Install_Packages.txt: -------------------------------------------------------------------------------- 1 | h5py 2 | numpy 3 | scikit-learn 4 | sklearn 5 | keras 6 | opencv-python 7 | pyttsx3 8 | -------------------------------------------------------------------------------- /Code/Install_Packages_gpu.txt: -------------------------------------------------------------------------------- 1 | h5py 2 | numpy 3 | scikit-learn 4 | sklearn 5 | tensorflow-gpu 6 | keras 7 | opencv-python 8 | pyttsx3 9 | -------------------------------------------------------------------------------- /Code/Rotate_images.py: -------------------------------------------------------------------------------- 1 | import cv2, os 2 | 3 | def flip_images(): 4 | gest_folder = "gestures" 5 | images_labels = [] 6 | images = [] 7 | labels = [] 8 | for g_id in os.listdir(gest_folder): 9 | for i in range(1200): 10 | path = gest_folder+"/"+g_id+"/"+str(i+1)+".jpg" 11 | new_path = gest_folder+"/"+g_id+"/"+str(i+1+1200)+".jpg" 12 | print(path) 13 | img = cv2.imread(path, 0) 14 | img = cv2.flip(img, 1) 15 | cv2.imwrite(new_path, img) 16 | 17 | flip_images() 18 | -------------------------------------------------------------------------------- /Code/cnn_model_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import cv2, os 4 | from glob import glob 5 | from keras import optimizers 6 | from keras.models import Sequential 7 | from keras.layers import Dense 8 | from keras.layers import Dropout 9 | from keras.layers import Flatten 10 | from keras.layers.convolutional import Conv2D 11 | from keras.layers.convolutional import MaxPooling2D 12 | from keras.utils import np_utils 13 | from keras.callbacks import ModelCheckpoint 14 | from keras import backend as K 15 | K.set_image_dim_ordering('tf') 16 | 17 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 18 | 19 | def get_image_size(): 20 | img = cv2.imread('gestures/1/100.jpg', 0) 21 | return img.shape 22 | 23 | def get_num_of_classes(): 24 | return len(glob('gestures/*')) 25 | 26 | image_x, image_y = get_image_size() 27 | 28 | def cnn_model(): 29 | num_of_classes = get_num_of_classes() 30 | model = Sequential() 31 | model.add(Conv2D(16, (2,2), input_shape=(image_x, image_y, 1), activation='relu')) 32 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')) 33 | model.add(Conv2D(32, (3,3), activation='relu')) 34 | model.add(MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='same')) 35 | model.add(Conv2D(64, (5,5), activation='relu')) 36 | model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same')) 37 | model.add(Flatten()) 38 | model.add(Dense(128, activation='relu')) 39 | model.add(Dropout(0.2)) 40 | model.add(Dense(num_of_classes, activation='softmax')) 41 | sgd = optimizers.SGD(lr=1e-2) 42 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 43 | filepath="cnn_model_keras2.h5" 44 | checkpoint1 = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 45 | callbacks_list = [checkpoint1] 46 | #from keras.utils import plot_model 47 | #plot_model(model, to_file='model.png', show_shapes=True) 48 | return model, callbacks_list 49 | 50 | def train(): 51 | with open("train_images", "rb") as f: 52 | train_images = np.array(pickle.load(f)) 53 | with open("train_labels", "rb") as f: 54 | train_labels = np.array(pickle.load(f), dtype=np.int32) 55 | 56 | with open("val_images", "rb") as f: 57 | val_images = np.array(pickle.load(f)) 58 | with open("val_labels", "rb") as f: 59 | val_labels = np.array(pickle.load(f), dtype=np.int32) 60 | 61 | train_images = np.reshape(train_images, (train_images.shape[0], image_x, image_y, 1)) 62 | val_images = np.reshape(val_images, (val_images.shape[0], image_x, image_y, 1)) 63 | train_labels = np_utils.to_categorical(train_labels) 64 | val_labels = np_utils.to_categorical(val_labels) 65 | 66 | print(val_labels.shape) 67 | 68 | model, callbacks_list = cnn_model() 69 | model.summary() 70 | model.fit(train_images, train_labels, validation_data=(val_images, val_labels), epochs=15, batch_size=500, callbacks=callbacks_list) 71 | scores = model.evaluate(val_images, val_labels, verbose=0) 72 | print("CNN Error: %.2f%%" % (100-scores[1]*100)) 73 | #model.save('cnn_model_keras2.h5') 74 | 75 | train() 76 | K.clear_session(); 77 | -------------------------------------------------------------------------------- /Code/create_gestures.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import pickle, os, sqlite3, random 4 | 5 | image_x, image_y = 50, 50 6 | 7 | def get_hand_hist(): 8 | with open("hist", "rb") as f: 9 | hist = pickle.load(f) 10 | return hist 11 | 12 | def init_create_folder_database(): 13 | # create the folder and database if not exist 14 | if not os.path.exists("gestures"): 15 | os.mkdir("gestures") 16 | if not os.path.exists("gesture_db.db"): 17 | conn = sqlite3.connect("gesture_db.db") 18 | create_table_cmd = "CREATE TABLE gesture ( g_id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, g_name TEXT NOT NULL )" 19 | conn.execute(create_table_cmd) 20 | conn.commit() 21 | 22 | def create_folder(folder_name): 23 | if not os.path.exists(folder_name): 24 | os.mkdir(folder_name) 25 | 26 | def store_in_db(g_id, g_name): 27 | conn = sqlite3.connect("gesture_db.db") 28 | cmd = "INSERT INTO gesture (g_id, g_name) VALUES (%s, \'%s\')" % (g_id, g_name) 29 | try: 30 | conn.execute(cmd) 31 | except sqlite3.IntegrityError: 32 | choice = input("g_id already exists. Want to change the record? (y/n): ") 33 | if choice.lower() == 'y': 34 | cmd = "UPDATE gesture SET g_name = \'%s\' WHERE g_id = %s" % (g_name, g_id) 35 | conn.execute(cmd) 36 | else: 37 | print("Doing nothing...") 38 | return 39 | conn.commit() 40 | 41 | def store_images(g_id): 42 | total_pics = 1200 43 | hist = get_hand_hist() 44 | cam = cv2.VideoCapture(1) 45 | if cam.read()[0]==False: 46 | cam = cv2.VideoCapture(0) 47 | x, y, w, h = 300, 100, 300, 300 48 | 49 | create_folder("gestures/"+str(g_id)) 50 | pic_no = 0 51 | flag_start_capturing = False 52 | frames = 0 53 | 54 | while True: 55 | img = cam.read()[1] 56 | img = cv2.flip(img, 1) 57 | imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 58 | dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1) 59 | disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10)) 60 | cv2.filter2D(dst,-1,disc,dst) 61 | blur = cv2.GaussianBlur(dst, (11,11), 0) 62 | blur = cv2.medianBlur(blur, 15) 63 | thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] 64 | thresh = cv2.merge((thresh,thresh,thresh)) 65 | thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY) 66 | thresh = thresh[y:y+h, x:x+w] 67 | contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1] 68 | 69 | if len(contours) > 0: 70 | contour = max(contours, key = cv2.contourArea) 71 | if cv2.contourArea(contour) > 10000 and frames > 50: 72 | x1, y1, w1, h1 = cv2.boundingRect(contour) 73 | pic_no += 1 74 | save_img = thresh[y1:y1+h1, x1:x1+w1] 75 | if w1 > h1: 76 | save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0)) 77 | elif h1 > w1: 78 | save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0)) 79 | save_img = cv2.resize(save_img, (image_x, image_y)) 80 | rand = random.randint(0, 10) 81 | if rand % 2 == 0: 82 | save_img = cv2.flip(save_img, 1) 83 | cv2.putText(img, "Capturing...", (30, 60), cv2.FONT_HERSHEY_TRIPLEX, 2, (127, 255, 255)) 84 | cv2.imwrite("gestures/"+str(g_id)+"/"+str(pic_no)+".jpg", save_img) 85 | 86 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2) 87 | cv2.putText(img, str(pic_no), (30, 400), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (127, 127, 255)) 88 | cv2.imshow("Capturing gesture", img) 89 | cv2.imshow("thresh", thresh) 90 | keypress = cv2.waitKey(1) 91 | if keypress == ord('c'): 92 | if flag_start_capturing == False: 93 | flag_start_capturing = True 94 | else: 95 | flag_start_capturing = False 96 | frames = 0 97 | if flag_start_capturing == True: 98 | frames += 1 99 | if pic_no == total_pics: 100 | break 101 | 102 | init_create_folder_database() 103 | g_id = input("Enter gesture no.: ") 104 | g_name = input("Enter gesture name/text: ") 105 | store_in_db(g_id, g_name) 106 | store_images(g_id) -------------------------------------------------------------------------------- /Code/display_gestures.py: -------------------------------------------------------------------------------- 1 | import cv2, os, random 2 | import numpy as np 3 | 4 | def get_image_size(): 5 | img = cv2.imread('gestures/0/100.jpg', 0) 6 | return img.shape 7 | 8 | gestures = os.listdir('gestures/') 9 | gestures.sort(key = int) 10 | begin_index = 0 11 | end_index = 5 12 | image_x, image_y = get_image_size() 13 | 14 | if len(gestures)%5 != 0: 15 | rows = int(len(gestures)/5)+1 16 | else: 17 | rows = int(len(gestures)/5) 18 | 19 | full_img = None 20 | for i in range(rows): 21 | col_img = None 22 | for j in range(begin_index, end_index): 23 | img_path = "gestures/%s/%d.jpg" % (j, random.randint(1, 1200)) 24 | img = cv2.imread(img_path, 0) 25 | if np.any(img == None): 26 | img = np.zeros((image_y, image_x), dtype = np.uint8) 27 | if np.any(col_img == None): 28 | col_img = img 29 | else: 30 | col_img = np.hstack((col_img, img)) 31 | 32 | begin_index += 5 33 | end_index += 5 34 | if np.any(full_img == None): 35 | full_img = col_img 36 | else: 37 | full_img = np.vstack((full_img, col_img)) 38 | 39 | 40 | cv2.imshow("gestures", full_img) 41 | cv2.imwrite('full_img.jpg', full_img) 42 | cv2.waitKey(0) 43 | -------------------------------------------------------------------------------- /Code/final.py: -------------------------------------------------------------------------------- 1 | import cv2, pickle 2 | import numpy as np 3 | import tensorflow as tf 4 | from cnn_tf import cnn_model_fn 5 | import os 6 | import sqlite3, pyttsx3 7 | from keras.models import load_model 8 | from threading import Thread 9 | 10 | engine = pyttsx3.init() 11 | engine.setProperty('rate', 150) 12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 13 | model = load_model('cnn_model_keras2.h5') 14 | 15 | def get_hand_hist(): 16 | with open("hist", "rb") as f: 17 | hist = pickle.load(f) 18 | return hist 19 | 20 | def get_image_size(): 21 | img = cv2.imread('gestures/0/100.jpg', 0) 22 | return img.shape 23 | 24 | image_x, image_y = get_image_size() 25 | 26 | def keras_process_image(img): 27 | img = cv2.resize(img, (image_x, image_y)) 28 | img = np.array(img, dtype=np.float32) 29 | img = np.reshape(img, (1, image_x, image_y, 1)) 30 | return img 31 | 32 | def keras_predict(model, image): 33 | processed = keras_process_image(image) 34 | pred_probab = model.predict(processed)[0] 35 | pred_class = list(pred_probab).index(max(pred_probab)) 36 | return max(pred_probab), pred_class 37 | 38 | def get_pred_text_from_db(pred_class): 39 | conn = sqlite3.connect("gesture_db.db") 40 | cmd = "SELECT g_name FROM gesture WHERE g_id="+str(pred_class) 41 | cursor = conn.execute(cmd) 42 | for row in cursor: 43 | return row[0] 44 | 45 | def get_pred_from_contour(contour, thresh): 46 | x1, y1, w1, h1 = cv2.boundingRect(contour) 47 | save_img = thresh[y1:y1+h1, x1:x1+w1] 48 | text = "" 49 | if w1 > h1: 50 | save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0)) 51 | elif h1 > w1: 52 | save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0)) 53 | pred_probab, pred_class = keras_predict(model, save_img) 54 | if pred_probab*100 > 70: 55 | text = get_pred_text_from_db(pred_class) 56 | return text 57 | 58 | def get_operator(pred_text): 59 | try: 60 | pred_text = int(pred_text) 61 | except: 62 | return "" 63 | operator = "" 64 | if pred_text == 1: 65 | operator = "+" 66 | elif pred_text == 2: 67 | operator = "-" 68 | elif pred_text == 3: 69 | operator = "*" 70 | elif pred_text == 4: 71 | operator = "/" 72 | elif pred_text == 5: 73 | operator = "%" 74 | elif pred_text == 6: 75 | operator = "**" 76 | elif pred_text == 7: 77 | operator = ">>" 78 | elif pred_text == 8: 79 | operator = "<<" 80 | elif pred_text == 9: 81 | operator = "&" 82 | elif pred_text == 0: 83 | operator = "|" 84 | return operator 85 | 86 | hist = get_hand_hist() 87 | x, y, w, h = 300, 100, 300, 300 88 | is_voice_on = True 89 | 90 | def get_img_contour_thresh(img): 91 | img = cv2.flip(img, 1) 92 | imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 93 | dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1) 94 | disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10)) 95 | cv2.filter2D(dst,-1,disc,dst) 96 | blur = cv2.GaussianBlur(dst, (11,11), 0) 97 | blur = cv2.medianBlur(blur, 15) 98 | thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] 99 | thresh = cv2.merge((thresh,thresh,thresh)) 100 | thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY) 101 | thresh = thresh[y:y+h, x:x+w] 102 | contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0] 103 | return img, contours, thresh 104 | 105 | def say_text(text): 106 | if not is_voice_on: 107 | return 108 | while engine._inLoop: 109 | pass 110 | engine.say(text) 111 | engine.runAndWait() 112 | 113 | def calculator_mode(cam): 114 | global is_voice_on 115 | flag = {"first": False, "operator": False, "second": False, "clear": False} 116 | count_same_frames = 0 117 | first, operator, second = "", "", "" 118 | pred_text = "" 119 | calc_text = "" 120 | info = "Enter first number" 121 | Thread(target=say_text, args=(info,)).start() 122 | count_clear_frames = 0 123 | while True: 124 | img = cam.read()[1] 125 | img = cv2.resize(img, (640, 480)) 126 | img, contours, thresh = get_img_contour_thresh(img) 127 | old_pred_text = pred_text 128 | if len(contours) > 0: 129 | contour = max(contours, key = cv2.contourArea) 130 | if cv2.contourArea(contour) > 10000: 131 | pred_text = get_pred_from_contour(contour, thresh) 132 | if old_pred_text == pred_text: 133 | count_same_frames += 1 134 | else: 135 | count_same_frames = 0 136 | 137 | if pred_text == "C": 138 | if count_same_frames > 5: 139 | count_same_frames = 0 140 | first, second, operator, pred_text, calc_text = '', '', '', '', '' 141 | flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False 142 | info = "Enter first number" 143 | Thread(target=say_text, args=(info,)).start() 144 | 145 | elif pred_text == "Best of Luck " and count_same_frames > 15: 146 | count_same_frames = 0 147 | if flag['clear']: 148 | first, second, operator, pred_text, calc_text = '', '', '', '', '' 149 | flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False 150 | info = "Enter first number" 151 | Thread(target=say_text, args=(info,)).start() 152 | elif second != '': 153 | flag['second'] = True 154 | info = "Clear screen" 155 | #Thread(target=say_text, args=(info,)).start() 156 | second = '' 157 | flag['clear'] = True 158 | try: 159 | calc_text += "= "+str(eval(calc_text)) 160 | except: 161 | calc_text = "Invalid operation" 162 | if is_voice_on: 163 | speech = calc_text 164 | speech = speech.replace('-', ' minus ') 165 | speech = speech.replace('/', ' divided by ') 166 | speech = speech.replace('**', ' raised to the power ') 167 | speech = speech.replace('*', ' multiplied by ') 168 | speech = speech.replace('%', ' mod ') 169 | speech = speech.replace('>>', ' bitwise right shift ') 170 | speech = speech.replace('<<', ' bitwise leftt shift ') 171 | speech = speech.replace('&', ' bitwise and ') 172 | speech = speech.replace('|', ' bitwise or ') 173 | Thread(target=say_text, args=(speech,)).start() 174 | elif first != '': 175 | flag['first'] = True 176 | info = "Enter operator" 177 | Thread(target=say_text, args=(info,)).start() 178 | first = '' 179 | 180 | elif pred_text != "Best of Luck " and pred_text.isnumeric(): 181 | if flag['first'] == False: 182 | if count_same_frames > 15: 183 | count_same_frames = 0 184 | Thread(target=say_text, args=(pred_text,)).start() 185 | first += pred_text 186 | calc_text += pred_text 187 | elif flag['operator'] == False: 188 | operator = get_operator(pred_text) 189 | if count_same_frames > 15: 190 | count_same_frames = 0 191 | flag['operator'] = True 192 | calc_text += operator 193 | info = "Enter second number" 194 | Thread(target=say_text, args=(info,)).start() 195 | operator = '' 196 | elif flag['second'] == False: 197 | if count_same_frames > 15: 198 | Thread(target=say_text, args=(pred_text,)).start() 199 | second += pred_text 200 | calc_text += pred_text 201 | count_same_frames = 0 202 | 203 | if count_clear_frames == 30: 204 | first, second, operator, pred_text, calc_text = '', '', '', '', '' 205 | flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False 206 | info = "Enter first number" 207 | Thread(target=say_text, args=(info,)).start() 208 | count_clear_frames = 0 209 | 210 | blackboard = np.zeros((480, 640, 3), dtype=np.uint8) 211 | cv2.putText(blackboard, "Calculator Mode", (100, 50), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 0,0)) 212 | cv2.putText(blackboard, "Predicted text- " + pred_text, (30, 100), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0)) 213 | cv2.putText(blackboard, "Operator " + operator, (30, 140), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 127)) 214 | cv2.putText(blackboard, calc_text, (30, 240), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255)) 215 | cv2.putText(blackboard, info, (30, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 255, 255) ) 216 | if is_voice_on: 217 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0)) 218 | else: 219 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0)) 220 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2) 221 | res = np.hstack((img, blackboard)) 222 | cv2.imshow("Recognizing gesture", res) 223 | cv2.imshow("thresh", thresh) 224 | keypress = cv2.waitKey(1) 225 | if keypress == ord('q') or keypress == ord('t'): 226 | break 227 | if keypress == ord('v') and is_voice_on: 228 | is_voice_on = False 229 | elif keypress == ord('v') and not is_voice_on: 230 | is_voice_on = True 231 | 232 | if keypress == ord('t'): 233 | return 1 234 | else: 235 | return 0 236 | 237 | def text_mode(cam): 238 | global is_voice_on 239 | text = "" 240 | word = "" 241 | count_same_frame = 0 242 | while True: 243 | img = cam.read()[1] 244 | img = cv2.resize(img, (640, 480)) 245 | img, contours, thresh = get_img_contour_thresh(img) 246 | old_text = text 247 | if len(contours) > 0: 248 | contour = max(contours, key = cv2.contourArea) 249 | if cv2.contourArea(contour) > 10000: 250 | text = get_pred_from_contour(contour, thresh) 251 | if old_text == text: 252 | count_same_frame += 1 253 | else: 254 | count_same_frame = 0 255 | 256 | if count_same_frame > 20: 257 | if len(text) == 1: 258 | Thread(target=say_text, args=(text, )).start() 259 | word = word + text 260 | if word.startswith('I/Me '): 261 | word = word.replace('I/Me ', 'I ') 262 | elif word.endswith('I/Me '): 263 | word = word.replace('I/Me ', 'me ') 264 | count_same_frame = 0 265 | 266 | elif cv2.contourArea(contour) < 1000: 267 | if word != '': 268 | #print('yolo') 269 | #say_text(text) 270 | Thread(target=say_text, args=(word, )).start() 271 | text = "" 272 | word = "" 273 | else: 274 | if word != '': 275 | #print('yolo1') 276 | #say_text(text) 277 | Thread(target=say_text, args=(word, )).start() 278 | text = "" 279 | word = "" 280 | blackboard = np.zeros((480, 640, 3), dtype=np.uint8) 281 | cv2.putText(blackboard, " ", (180, 50), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 0,0)) 282 | cv2.putText(blackboard, "Predicted text- " + text, (30, 100), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0)) 283 | cv2.putText(blackboard, word, (30, 240), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255)) 284 | if is_voice_on: 285 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0)) 286 | else: 287 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0)) 288 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2) 289 | res = np.hstack((img, blackboard)) 290 | cv2.imshow("Recognizing gesture", res) 291 | cv2.imshow("thresh", thresh) 292 | keypress = cv2.waitKey(1) 293 | if keypress == ord('q') or keypress == ord('c'): 294 | break 295 | if keypress == ord('v') and is_voice_on: 296 | is_voice_on = False 297 | elif keypress == ord('v') and not is_voice_on: 298 | is_voice_on = True 299 | 300 | if keypress == ord('c'): 301 | return 2 302 | else: 303 | return 0 304 | 305 | def recognize(): 306 | cam = cv2.VideoCapture(1) 307 | if cam.read()[0]==False: 308 | cam = cv2.VideoCapture(0) 309 | text = "" 310 | word = "" 311 | count_same_frame = 0 312 | keypress = 1 313 | while True: 314 | if keypress == 1: 315 | keypress = text_mode(cam) 316 | elif keypress == 2: 317 | keypress = calculator_mode(cam) 318 | else: 319 | break 320 | 321 | keras_predict(model, np.zeros((50, 50), dtype = np.uint8)) 322 | recognize() 323 | -------------------------------------------------------------------------------- /Code/gesture_db.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/Code/gesture_db.db -------------------------------------------------------------------------------- /Code/hist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/Code/hist -------------------------------------------------------------------------------- /Code/load_images.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from glob import glob 3 | import numpy as np 4 | import random 5 | from sklearn.utils import shuffle 6 | import pickle 7 | import os 8 | 9 | def pickle_images_labels(): 10 | images_labels = [] 11 | images = glob("gestures/*/*.jpg") 12 | images.sort() 13 | for image in images: 14 | print(image) 15 | label = image[image.find(os.sep)+1: image.rfind(os.sep)] 16 | img = cv2.imread(image, 0) 17 | images_labels.append((np.array(img, dtype=np.uint8), int(label))) 18 | return images_labels 19 | 20 | images_labels = pickle_images_labels() 21 | images_labels = shuffle(shuffle(shuffle(shuffle(images_labels)))) 22 | images, labels = zip(*images_labels) 23 | print("Length of images_labels", len(images_labels)) 24 | 25 | train_images = images[:int(5/6*len(images))] 26 | print("Length of train_images", len(train_images)) 27 | with open("train_images", "wb") as f: 28 | pickle.dump(train_images, f) 29 | del train_images 30 | 31 | train_labels = labels[:int(5/6*len(labels))] 32 | print("Length of train_labels", len(train_labels)) 33 | with open("train_labels", "wb") as f: 34 | pickle.dump(train_labels, f) 35 | del train_labels 36 | 37 | test_images = images[int(5/6*len(images)):int(11/12*len(images))] 38 | print("Length of test_images", len(test_images)) 39 | with open("test_images", "wb") as f: 40 | pickle.dump(test_images, f) 41 | del test_images 42 | 43 | test_labels = labels[int(5/6*len(labels)):int(11/12*len(images))] 44 | print("Length of test_labels", len(test_labels)) 45 | with open("test_labels", "wb") as f: 46 | pickle.dump(test_labels, f) 47 | del test_labels 48 | 49 | val_images = images[int(11/12*len(images)):] 50 | print("Length of test_images", len(val_images)) 51 | with open("val_images", "wb") as f: 52 | pickle.dump(val_images, f) 53 | del val_images 54 | 55 | val_labels = labels[int(11/12*len(labels)):] 56 | print("Length of val_labels", len(val_labels)) 57 | with open("val_labels", "wb") as f: 58 | pickle.dump(val_labels, f) 59 | del val_labels 60 | -------------------------------------------------------------------------------- /Code/set_hand_histogram.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import pickle 4 | 5 | def build_squares(img): 6 | x, y, w, h = 420, 140, 10, 10 7 | d = 10 8 | imgCrop = None 9 | crop = None 10 | for i in range(10): 11 | for j in range(5): 12 | if np.any(imgCrop == None): 13 | imgCrop = img[y:y+h, x:x+w] 14 | else: 15 | imgCrop = np.hstack((imgCrop, img[y:y+h, x:x+w])) 16 | #print(imgCrop.shape) 17 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 1) 18 | x+=w+d 19 | if np.any(crop == None): 20 | crop = imgCrop 21 | else: 22 | crop = np.vstack((crop, imgCrop)) 23 | imgCrop = None 24 | x = 420 25 | y+=h+d 26 | return crop 27 | 28 | def get_hand_hist(): 29 | cam = cv2.VideoCapture(1) 30 | if cam.read()[0]==False: 31 | cam = cv2.VideoCapture(0) 32 | x, y, w, h = 300, 100, 300, 300 33 | flagPressedC, flagPressedS = False, False 34 | imgCrop = None 35 | while True: 36 | img = cam.read()[1] 37 | img = cv2.flip(img, 1) 38 | img = cv2.resize(img, (640, 480)) 39 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 40 | 41 | keypress = cv2.waitKey(1) 42 | if keypress == ord('c'): 43 | hsvCrop = cv2.cvtColor(imgCrop, cv2.COLOR_BGR2HSV) 44 | flagPressedC = True 45 | hist = cv2.calcHist([hsvCrop], [0, 1], None, [180, 256], [0, 180, 0, 256]) 46 | cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX) 47 | elif keypress == ord('s'): 48 | flagPressedS = True 49 | break 50 | if flagPressedC: 51 | dst = cv2.calcBackProject([hsv], [0, 1], hist, [0, 180, 0, 256], 1) 52 | dst1 = dst.copy() 53 | disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10)) 54 | cv2.filter2D(dst,-1,disc,dst) 55 | blur = cv2.GaussianBlur(dst, (11,11), 0) 56 | blur = cv2.medianBlur(blur, 15) 57 | ret,thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) 58 | thresh = cv2.merge((thresh,thresh,thresh)) 59 | #cv2.imshow("res", res) 60 | cv2.imshow("Thresh", thresh) 61 | if not flagPressedS: 62 | imgCrop = build_squares(img) 63 | #cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2) 64 | cv2.imshow("Set hand histogram", img) 65 | cam.release() 66 | cv2.destroyAllWindows() 67 | with open("hist", "wb") as f: 68 | pickle.dump(hist, f) 69 | 70 | 71 | get_hand_hist() 72 | -------------------------------------------------------------------------------- /FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: 4 | patreon: # 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Harsh Gupta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Stars](https://img.shields.io/github/stars/harshbg/Sign-Language-Interpreter-using-Deep-Learning.svg?style=social) 2 | ![Forks](https://img.shields.io/github/forks/harshbg/Sign-Language-Interpreter-using-Deep-Learning.svg?style=social) 3 | ![GitHub contributors](https://img.shields.io/github/contributors/harshbg/Sign-Language-Interpreter-using-Deep-Learning.svg) 4 | ![Language](https://img.shields.io/github/languages/top/harshbg/Sign-Language-Interpreter-using-Deep-Learning.svg) 5 | [![GitHub](https://img.shields.io/github/license/harshbg/Sign-Language-Interpreter-using-Deep-Learning.svg)](https://choosealicense.com/licenses/mit) 6 | [![HitCount](http://hits.dwyl.io/harshbg/Sign-Language-Interpreter-using-Deep-Learning.svg)](http://hits.dwyl.io/harshbg/Sign-Language-Interpreter-using-Deep-Learning) 7 | 8 | 9 | # Sign Language Interpreter using Deep Learning 10 | > A sign language interpreter using live video feed from the camera. 11 | The project was completed in 24 hours as part of HackUNT-19, the University of North Texas's annual Hackathon. You can view the project demo on [YouTube](https://link.harshgupta.com/acd72). 12 | 13 | ## Table of contents 14 | * [General info](#general-info) 15 | * [Screenshots](#screenshots) 16 | * [Demo](#demo) 17 | * [Technologies and Tools](#technologies-and-tools) 18 | * [Setup](#setup) 19 | * [Process](#process) 20 | * [Code Examples](#code-examples) 21 | * [Features](#features) 22 | * [Status](#status) 23 | * [Contact](#contact) 24 | 25 | ## General info 26 | 27 | The theme at HACK UNT 19 was to use technology to improve accessibility by finding a creative solution to benefit the lives of those with a disability. 28 | We wanted to make it easy for 70 million deaf people across the world to be independent of translators for there daily communication needs, so we designed the app to work as a personal translator 24*7 for the deaf people. 29 | 30 | ## Demo 31 | ![Example screenshot](./img/demo4.gif) 32 | 33 | 34 | 35 | ![Example screenshot](./img/demo2.gif) 36 | 37 | 38 | 39 | ![Example screenshot](./img/demo3.gif) 40 | 41 | 42 | **The entire demo of the project can be found on [YouTube](https://link.harshgupta.com/acd72).** 43 | 44 | 45 | ## Screenshots 46 | 47 | ![Example screenshot](./img/Capture1.PNG) 48 | ![Example screenshot](./img/Capture.PNG) 49 | 50 | ## Technologies and Tools 51 | * Python 52 | * TensorFlow 53 | * Keras 54 | * OpenCV 55 | 56 | ## Setup 57 | 58 | * Use comand promt to setup environment by using install_packages.txt and install_packages_gpu.txt files. 59 | 60 | `pyton -m pip r install_packages.txt` 61 | 62 | This will help you in installing all the libraries required for the project. 63 | 64 | ## Process 65 | 66 | * Run `set_hand_histogram.py` to set the hand histogram for creating gestures. 67 | * Once you get a good histogram, save it in the code folder, or you can use the histogram created by us that can be found [here](https://github.com/harshbg/Sign-Language-Interpreter-using-Deep-Learning/blob/master/Code/hist). 68 | * Added gestures and label them using OpenCV which uses webcam feed. by running `create_gestures.py` and stores them in a database. Alternately, you can use the gestures created by us [here](https://github.com/harshbg/Sign-Language-Interpreter-using-Deep-Learning/tree/master/Code). 69 | * Add different variations to the captured gestures by flipping all the images by using `Rotate_images.py`. 70 | * Run `load_images.py` to split all the captured gestures into training, validation and test set. 71 | * To view all the gestures, run `display_gestures.py` . 72 | * Train the model using Keras by running `cnn_model_train.py`. 73 | * Run `final.py`. This will open up the gesture recognition window which will use your webcam to interpret the trained American Sign Language gestures. 74 | 75 | ## Code Examples 76 | 77 | ```` 78 | # Model Traiining using CNN 79 | 80 | import numpy as np 81 | import pickle 82 | import cv2, os 83 | from glob import glob 84 | from keras import optimizers 85 | from keras.models import Sequential 86 | from keras.layers import Dense 87 | from keras.layers import Dropout 88 | from keras.layers import Flatten 89 | from keras.layers.convolutional import Conv2D 90 | from keras.layers.convolutional import MaxPooling2D 91 | from keras.utils import np_utils 92 | from keras.callbacks import ModelCheckpoint 93 | from keras import backend as K 94 | K.set_image_dim_ordering('tf') 95 | 96 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 97 | 98 | def get_image_size(): 99 | img = cv2.imread('gestures/1/100.jpg', 0) 100 | return img.shape 101 | 102 | def get_num_of_classes(): 103 | return len(glob('gestures/*')) 104 | 105 | image_x, image_y = get_image_size() 106 | 107 | def cnn_model(): 108 | num_of_classes = get_num_of_classes() 109 | model = Sequential() 110 | model.add(Conv2D(16, (2,2), input_shape=(image_x, image_y, 1), activation='relu')) 111 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')) 112 | model.add(Conv2D(32, (3,3), activation='relu')) 113 | model.add(MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='same')) 114 | model.add(Conv2D(64, (5,5), activation='relu')) 115 | model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same')) 116 | model.add(Flatten()) 117 | model.add(Dense(128, activation='relu')) 118 | model.add(Dropout(0.2)) 119 | model.add(Dense(num_of_classes, activation='softmax')) 120 | sgd = optimizers.SGD(lr=1e-2) 121 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 122 | filepath="cnn_model_keras2.h5" 123 | checkpoint1 = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 124 | callbacks_list = [checkpoint1] 125 | #from keras.utils import plot_model 126 | #plot_model(model, to_file='model.png', show_shapes=True) 127 | return model, callbacks_list 128 | 129 | def train(): 130 | with open("train_images", "rb") as f: 131 | train_images = np.array(pickle.load(f)) 132 | with open("train_labels", "rb") as f: 133 | train_labels = np.array(pickle.load(f), dtype=np.int32) 134 | 135 | with open("val_images", "rb") as f: 136 | val_images = np.array(pickle.load(f)) 137 | with open("val_labels", "rb") as f: 138 | val_labels = np.array(pickle.load(f), dtype=np.int32) 139 | 140 | train_images = np.reshape(train_images, (train_images.shape[0], image_x, image_y, 1)) 141 | val_images = np.reshape(val_images, (val_images.shape[0], image_x, image_y, 1)) 142 | train_labels = np_utils.to_categorical(train_labels) 143 | val_labels = np_utils.to_categorical(val_labels) 144 | 145 | print(val_labels.shape) 146 | 147 | model, callbacks_list = cnn_model() 148 | model.summary() 149 | model.fit(train_images, train_labels, validation_data=(val_images, val_labels), epochs=15, batch_size=500, callbacks=callbacks_list) 150 | scores = model.evaluate(val_images, val_labels, verbose=0) 151 | print("CNN Error: %.2f%%" % (100-scores[1]*100)) 152 | #model.save('cnn_model_keras2.h5') 153 | 154 | train() 155 | K.clear_session(); 156 | 157 | ```` 158 | 159 | ## Features 160 | Our model was able to predict the 44 characters in the ASL with a prediction accuracy >95%. 161 | 162 | Features that can be added: 163 | * Deploy the project on cloud and create an API for using it. 164 | * Increase the vocabulary of our model 165 | * Incorporate feedback mechanism to make the model more robust 166 | * Add more sign languages 167 | 168 | ## Status 169 | Project is: _finished_. Our team was the winner of the UNT Hackaton 2019. You can find the our final submission post on [devpost](https://rebrand.ly/754c5). If you would like us to implement the project end-to-end for you please book a [session](https://link.harshgupta.com/5e580). 170 | 171 | ## Contact 172 | Created by me with my teammates [Siddharth Oza](https://github.com/siddharthoza), [Ashish Sharma](https://github.com/ashish1993utd), and [Manish Shukla](https://github.com/Manishms18). 173 | 174 | If you loved what you read here and feel like we can collaborate to produce some exciting stuff, or if you 175 | just want to shoot a question, please feel free to connect with me on email, 176 | LinkedIn, or 177 | Twitter. 178 | My other projects can be found [here](https://link.harshgupta.com/85f2e). 179 | 180 | [![GitHub](https://img.shields.io/github/followers/harshbg.svg?style=social)](https://link.harshgupta.com/e144a) 181 | [![Twitter](https://img.shields.io/twitter/follow/harshbg.svg?style=social)](https://link.harshgupta.com/34c63) 182 | 183 | -------------------------------------------------------------------------------- /Team Linear Digressors-Presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/Team Linear Digressors-Presentation.pdf -------------------------------------------------------------------------------- /img/Capture.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/Capture.PNG -------------------------------------------------------------------------------- /img/Capture1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/Capture1.PNG -------------------------------------------------------------------------------- /img/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo.gif -------------------------------------------------------------------------------- /img/demo2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo2.gif -------------------------------------------------------------------------------- /img/demo3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo3.gif -------------------------------------------------------------------------------- /img/demo4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo4.gif --------------------------------------------------------------------------------