├── Code
├── Install_Packages.txt
├── Install_Packages_gpu.txt
├── Rotate_images.py
├── cnn_model_train.py
├── create_gestures.py
├── display_gestures.py
├── final.py
├── gesture_db.db
├── hist
├── load_images.py
└── set_hand_histogram.py
├── FUNDING.yml
├── LICENSE
├── README.md
├── Team Linear Digressors-Presentation.pdf
└── img
├── Capture.PNG
├── Capture1.PNG
├── demo.gif
├── demo2.gif
├── demo3.gif
└── demo4.gif
/Code/Install_Packages.txt:
--------------------------------------------------------------------------------
1 | h5py
2 | numpy
3 | scikit-learn
4 | sklearn
5 | keras
6 | opencv-python
7 | pyttsx3
8 |
--------------------------------------------------------------------------------
/Code/Install_Packages_gpu.txt:
--------------------------------------------------------------------------------
1 | h5py
2 | numpy
3 | scikit-learn
4 | sklearn
5 | tensorflow-gpu
6 | keras
7 | opencv-python
8 | pyttsx3
9 |
--------------------------------------------------------------------------------
/Code/Rotate_images.py:
--------------------------------------------------------------------------------
1 | import cv2, os
2 |
3 | def flip_images():
4 | gest_folder = "gestures"
5 | images_labels = []
6 | images = []
7 | labels = []
8 | for g_id in os.listdir(gest_folder):
9 | for i in range(1200):
10 | path = gest_folder+"/"+g_id+"/"+str(i+1)+".jpg"
11 | new_path = gest_folder+"/"+g_id+"/"+str(i+1+1200)+".jpg"
12 | print(path)
13 | img = cv2.imread(path, 0)
14 | img = cv2.flip(img, 1)
15 | cv2.imwrite(new_path, img)
16 |
17 | flip_images()
18 |
--------------------------------------------------------------------------------
/Code/cnn_model_train.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pickle
3 | import cv2, os
4 | from glob import glob
5 | from keras import optimizers
6 | from keras.models import Sequential
7 | from keras.layers import Dense
8 | from keras.layers import Dropout
9 | from keras.layers import Flatten
10 | from keras.layers.convolutional import Conv2D
11 | from keras.layers.convolutional import MaxPooling2D
12 | from keras.utils import np_utils
13 | from keras.callbacks import ModelCheckpoint
14 | from keras import backend as K
15 | K.set_image_dim_ordering('tf')
16 |
17 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
18 |
19 | def get_image_size():
20 | img = cv2.imread('gestures/1/100.jpg', 0)
21 | return img.shape
22 |
23 | def get_num_of_classes():
24 | return len(glob('gestures/*'))
25 |
26 | image_x, image_y = get_image_size()
27 |
28 | def cnn_model():
29 | num_of_classes = get_num_of_classes()
30 | model = Sequential()
31 | model.add(Conv2D(16, (2,2), input_shape=(image_x, image_y, 1), activation='relu'))
32 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
33 | model.add(Conv2D(32, (3,3), activation='relu'))
34 | model.add(MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='same'))
35 | model.add(Conv2D(64, (5,5), activation='relu'))
36 | model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same'))
37 | model.add(Flatten())
38 | model.add(Dense(128, activation='relu'))
39 | model.add(Dropout(0.2))
40 | model.add(Dense(num_of_classes, activation='softmax'))
41 | sgd = optimizers.SGD(lr=1e-2)
42 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
43 | filepath="cnn_model_keras2.h5"
44 | checkpoint1 = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
45 | callbacks_list = [checkpoint1]
46 | #from keras.utils import plot_model
47 | #plot_model(model, to_file='model.png', show_shapes=True)
48 | return model, callbacks_list
49 |
50 | def train():
51 | with open("train_images", "rb") as f:
52 | train_images = np.array(pickle.load(f))
53 | with open("train_labels", "rb") as f:
54 | train_labels = np.array(pickle.load(f), dtype=np.int32)
55 |
56 | with open("val_images", "rb") as f:
57 | val_images = np.array(pickle.load(f))
58 | with open("val_labels", "rb") as f:
59 | val_labels = np.array(pickle.load(f), dtype=np.int32)
60 |
61 | train_images = np.reshape(train_images, (train_images.shape[0], image_x, image_y, 1))
62 | val_images = np.reshape(val_images, (val_images.shape[0], image_x, image_y, 1))
63 | train_labels = np_utils.to_categorical(train_labels)
64 | val_labels = np_utils.to_categorical(val_labels)
65 |
66 | print(val_labels.shape)
67 |
68 | model, callbacks_list = cnn_model()
69 | model.summary()
70 | model.fit(train_images, train_labels, validation_data=(val_images, val_labels), epochs=15, batch_size=500, callbacks=callbacks_list)
71 | scores = model.evaluate(val_images, val_labels, verbose=0)
72 | print("CNN Error: %.2f%%" % (100-scores[1]*100))
73 | #model.save('cnn_model_keras2.h5')
74 |
75 | train()
76 | K.clear_session();
77 |
--------------------------------------------------------------------------------
/Code/create_gestures.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import pickle, os, sqlite3, random
4 |
5 | image_x, image_y = 50, 50
6 |
7 | def get_hand_hist():
8 | with open("hist", "rb") as f:
9 | hist = pickle.load(f)
10 | return hist
11 |
12 | def init_create_folder_database():
13 | # create the folder and database if not exist
14 | if not os.path.exists("gestures"):
15 | os.mkdir("gestures")
16 | if not os.path.exists("gesture_db.db"):
17 | conn = sqlite3.connect("gesture_db.db")
18 | create_table_cmd = "CREATE TABLE gesture ( g_id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, g_name TEXT NOT NULL )"
19 | conn.execute(create_table_cmd)
20 | conn.commit()
21 |
22 | def create_folder(folder_name):
23 | if not os.path.exists(folder_name):
24 | os.mkdir(folder_name)
25 |
26 | def store_in_db(g_id, g_name):
27 | conn = sqlite3.connect("gesture_db.db")
28 | cmd = "INSERT INTO gesture (g_id, g_name) VALUES (%s, \'%s\')" % (g_id, g_name)
29 | try:
30 | conn.execute(cmd)
31 | except sqlite3.IntegrityError:
32 | choice = input("g_id already exists. Want to change the record? (y/n): ")
33 | if choice.lower() == 'y':
34 | cmd = "UPDATE gesture SET g_name = \'%s\' WHERE g_id = %s" % (g_name, g_id)
35 | conn.execute(cmd)
36 | else:
37 | print("Doing nothing...")
38 | return
39 | conn.commit()
40 |
41 | def store_images(g_id):
42 | total_pics = 1200
43 | hist = get_hand_hist()
44 | cam = cv2.VideoCapture(1)
45 | if cam.read()[0]==False:
46 | cam = cv2.VideoCapture(0)
47 | x, y, w, h = 300, 100, 300, 300
48 |
49 | create_folder("gestures/"+str(g_id))
50 | pic_no = 0
51 | flag_start_capturing = False
52 | frames = 0
53 |
54 | while True:
55 | img = cam.read()[1]
56 | img = cv2.flip(img, 1)
57 | imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
58 | dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1)
59 | disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
60 | cv2.filter2D(dst,-1,disc,dst)
61 | blur = cv2.GaussianBlur(dst, (11,11), 0)
62 | blur = cv2.medianBlur(blur, 15)
63 | thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
64 | thresh = cv2.merge((thresh,thresh,thresh))
65 | thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
66 | thresh = thresh[y:y+h, x:x+w]
67 | contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
68 |
69 | if len(contours) > 0:
70 | contour = max(contours, key = cv2.contourArea)
71 | if cv2.contourArea(contour) > 10000 and frames > 50:
72 | x1, y1, w1, h1 = cv2.boundingRect(contour)
73 | pic_no += 1
74 | save_img = thresh[y1:y1+h1, x1:x1+w1]
75 | if w1 > h1:
76 | save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0))
77 | elif h1 > w1:
78 | save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0))
79 | save_img = cv2.resize(save_img, (image_x, image_y))
80 | rand = random.randint(0, 10)
81 | if rand % 2 == 0:
82 | save_img = cv2.flip(save_img, 1)
83 | cv2.putText(img, "Capturing...", (30, 60), cv2.FONT_HERSHEY_TRIPLEX, 2, (127, 255, 255))
84 | cv2.imwrite("gestures/"+str(g_id)+"/"+str(pic_no)+".jpg", save_img)
85 |
86 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
87 | cv2.putText(img, str(pic_no), (30, 400), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (127, 127, 255))
88 | cv2.imshow("Capturing gesture", img)
89 | cv2.imshow("thresh", thresh)
90 | keypress = cv2.waitKey(1)
91 | if keypress == ord('c'):
92 | if flag_start_capturing == False:
93 | flag_start_capturing = True
94 | else:
95 | flag_start_capturing = False
96 | frames = 0
97 | if flag_start_capturing == True:
98 | frames += 1
99 | if pic_no == total_pics:
100 | break
101 |
102 | init_create_folder_database()
103 | g_id = input("Enter gesture no.: ")
104 | g_name = input("Enter gesture name/text: ")
105 | store_in_db(g_id, g_name)
106 | store_images(g_id)
--------------------------------------------------------------------------------
/Code/display_gestures.py:
--------------------------------------------------------------------------------
1 | import cv2, os, random
2 | import numpy as np
3 |
4 | def get_image_size():
5 | img = cv2.imread('gestures/0/100.jpg', 0)
6 | return img.shape
7 |
8 | gestures = os.listdir('gestures/')
9 | gestures.sort(key = int)
10 | begin_index = 0
11 | end_index = 5
12 | image_x, image_y = get_image_size()
13 |
14 | if len(gestures)%5 != 0:
15 | rows = int(len(gestures)/5)+1
16 | else:
17 | rows = int(len(gestures)/5)
18 |
19 | full_img = None
20 | for i in range(rows):
21 | col_img = None
22 | for j in range(begin_index, end_index):
23 | img_path = "gestures/%s/%d.jpg" % (j, random.randint(1, 1200))
24 | img = cv2.imread(img_path, 0)
25 | if np.any(img == None):
26 | img = np.zeros((image_y, image_x), dtype = np.uint8)
27 | if np.any(col_img == None):
28 | col_img = img
29 | else:
30 | col_img = np.hstack((col_img, img))
31 |
32 | begin_index += 5
33 | end_index += 5
34 | if np.any(full_img == None):
35 | full_img = col_img
36 | else:
37 | full_img = np.vstack((full_img, col_img))
38 |
39 |
40 | cv2.imshow("gestures", full_img)
41 | cv2.imwrite('full_img.jpg', full_img)
42 | cv2.waitKey(0)
43 |
--------------------------------------------------------------------------------
/Code/final.py:
--------------------------------------------------------------------------------
1 | import cv2, pickle
2 | import numpy as np
3 | import tensorflow as tf
4 | from cnn_tf import cnn_model_fn
5 | import os
6 | import sqlite3, pyttsx3
7 | from keras.models import load_model
8 | from threading import Thread
9 |
10 | engine = pyttsx3.init()
11 | engine.setProperty('rate', 150)
12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
13 | model = load_model('cnn_model_keras2.h5')
14 |
15 | def get_hand_hist():
16 | with open("hist", "rb") as f:
17 | hist = pickle.load(f)
18 | return hist
19 |
20 | def get_image_size():
21 | img = cv2.imread('gestures/0/100.jpg', 0)
22 | return img.shape
23 |
24 | image_x, image_y = get_image_size()
25 |
26 | def keras_process_image(img):
27 | img = cv2.resize(img, (image_x, image_y))
28 | img = np.array(img, dtype=np.float32)
29 | img = np.reshape(img, (1, image_x, image_y, 1))
30 | return img
31 |
32 | def keras_predict(model, image):
33 | processed = keras_process_image(image)
34 | pred_probab = model.predict(processed)[0]
35 | pred_class = list(pred_probab).index(max(pred_probab))
36 | return max(pred_probab), pred_class
37 |
38 | def get_pred_text_from_db(pred_class):
39 | conn = sqlite3.connect("gesture_db.db")
40 | cmd = "SELECT g_name FROM gesture WHERE g_id="+str(pred_class)
41 | cursor = conn.execute(cmd)
42 | for row in cursor:
43 | return row[0]
44 |
45 | def get_pred_from_contour(contour, thresh):
46 | x1, y1, w1, h1 = cv2.boundingRect(contour)
47 | save_img = thresh[y1:y1+h1, x1:x1+w1]
48 | text = ""
49 | if w1 > h1:
50 | save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0))
51 | elif h1 > w1:
52 | save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0))
53 | pred_probab, pred_class = keras_predict(model, save_img)
54 | if pred_probab*100 > 70:
55 | text = get_pred_text_from_db(pred_class)
56 | return text
57 |
58 | def get_operator(pred_text):
59 | try:
60 | pred_text = int(pred_text)
61 | except:
62 | return ""
63 | operator = ""
64 | if pred_text == 1:
65 | operator = "+"
66 | elif pred_text == 2:
67 | operator = "-"
68 | elif pred_text == 3:
69 | operator = "*"
70 | elif pred_text == 4:
71 | operator = "/"
72 | elif pred_text == 5:
73 | operator = "%"
74 | elif pred_text == 6:
75 | operator = "**"
76 | elif pred_text == 7:
77 | operator = ">>"
78 | elif pred_text == 8:
79 | operator = "<<"
80 | elif pred_text == 9:
81 | operator = "&"
82 | elif pred_text == 0:
83 | operator = "|"
84 | return operator
85 |
86 | hist = get_hand_hist()
87 | x, y, w, h = 300, 100, 300, 300
88 | is_voice_on = True
89 |
90 | def get_img_contour_thresh(img):
91 | img = cv2.flip(img, 1)
92 | imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
93 | dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1)
94 | disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
95 | cv2.filter2D(dst,-1,disc,dst)
96 | blur = cv2.GaussianBlur(dst, (11,11), 0)
97 | blur = cv2.medianBlur(blur, 15)
98 | thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
99 | thresh = cv2.merge((thresh,thresh,thresh))
100 | thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
101 | thresh = thresh[y:y+h, x:x+w]
102 | contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
103 | return img, contours, thresh
104 |
105 | def say_text(text):
106 | if not is_voice_on:
107 | return
108 | while engine._inLoop:
109 | pass
110 | engine.say(text)
111 | engine.runAndWait()
112 |
113 | def calculator_mode(cam):
114 | global is_voice_on
115 | flag = {"first": False, "operator": False, "second": False, "clear": False}
116 | count_same_frames = 0
117 | first, operator, second = "", "", ""
118 | pred_text = ""
119 | calc_text = ""
120 | info = "Enter first number"
121 | Thread(target=say_text, args=(info,)).start()
122 | count_clear_frames = 0
123 | while True:
124 | img = cam.read()[1]
125 | img = cv2.resize(img, (640, 480))
126 | img, contours, thresh = get_img_contour_thresh(img)
127 | old_pred_text = pred_text
128 | if len(contours) > 0:
129 | contour = max(contours, key = cv2.contourArea)
130 | if cv2.contourArea(contour) > 10000:
131 | pred_text = get_pred_from_contour(contour, thresh)
132 | if old_pred_text == pred_text:
133 | count_same_frames += 1
134 | else:
135 | count_same_frames = 0
136 |
137 | if pred_text == "C":
138 | if count_same_frames > 5:
139 | count_same_frames = 0
140 | first, second, operator, pred_text, calc_text = '', '', '', '', ''
141 | flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False
142 | info = "Enter first number"
143 | Thread(target=say_text, args=(info,)).start()
144 |
145 | elif pred_text == "Best of Luck " and count_same_frames > 15:
146 | count_same_frames = 0
147 | if flag['clear']:
148 | first, second, operator, pred_text, calc_text = '', '', '', '', ''
149 | flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False
150 | info = "Enter first number"
151 | Thread(target=say_text, args=(info,)).start()
152 | elif second != '':
153 | flag['second'] = True
154 | info = "Clear screen"
155 | #Thread(target=say_text, args=(info,)).start()
156 | second = ''
157 | flag['clear'] = True
158 | try:
159 | calc_text += "= "+str(eval(calc_text))
160 | except:
161 | calc_text = "Invalid operation"
162 | if is_voice_on:
163 | speech = calc_text
164 | speech = speech.replace('-', ' minus ')
165 | speech = speech.replace('/', ' divided by ')
166 | speech = speech.replace('**', ' raised to the power ')
167 | speech = speech.replace('*', ' multiplied by ')
168 | speech = speech.replace('%', ' mod ')
169 | speech = speech.replace('>>', ' bitwise right shift ')
170 | speech = speech.replace('<<', ' bitwise leftt shift ')
171 | speech = speech.replace('&', ' bitwise and ')
172 | speech = speech.replace('|', ' bitwise or ')
173 | Thread(target=say_text, args=(speech,)).start()
174 | elif first != '':
175 | flag['first'] = True
176 | info = "Enter operator"
177 | Thread(target=say_text, args=(info,)).start()
178 | first = ''
179 |
180 | elif pred_text != "Best of Luck " and pred_text.isnumeric():
181 | if flag['first'] == False:
182 | if count_same_frames > 15:
183 | count_same_frames = 0
184 | Thread(target=say_text, args=(pred_text,)).start()
185 | first += pred_text
186 | calc_text += pred_text
187 | elif flag['operator'] == False:
188 | operator = get_operator(pred_text)
189 | if count_same_frames > 15:
190 | count_same_frames = 0
191 | flag['operator'] = True
192 | calc_text += operator
193 | info = "Enter second number"
194 | Thread(target=say_text, args=(info,)).start()
195 | operator = ''
196 | elif flag['second'] == False:
197 | if count_same_frames > 15:
198 | Thread(target=say_text, args=(pred_text,)).start()
199 | second += pred_text
200 | calc_text += pred_text
201 | count_same_frames = 0
202 |
203 | if count_clear_frames == 30:
204 | first, second, operator, pred_text, calc_text = '', '', '', '', ''
205 | flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False
206 | info = "Enter first number"
207 | Thread(target=say_text, args=(info,)).start()
208 | count_clear_frames = 0
209 |
210 | blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
211 | cv2.putText(blackboard, "Calculator Mode", (100, 50), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 0,0))
212 | cv2.putText(blackboard, "Predicted text- " + pred_text, (30, 100), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))
213 | cv2.putText(blackboard, "Operator " + operator, (30, 140), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 127))
214 | cv2.putText(blackboard, calc_text, (30, 240), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255))
215 | cv2.putText(blackboard, info, (30, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 255, 255) )
216 | if is_voice_on:
217 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
218 | else:
219 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
220 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
221 | res = np.hstack((img, blackboard))
222 | cv2.imshow("Recognizing gesture", res)
223 | cv2.imshow("thresh", thresh)
224 | keypress = cv2.waitKey(1)
225 | if keypress == ord('q') or keypress == ord('t'):
226 | break
227 | if keypress == ord('v') and is_voice_on:
228 | is_voice_on = False
229 | elif keypress == ord('v') and not is_voice_on:
230 | is_voice_on = True
231 |
232 | if keypress == ord('t'):
233 | return 1
234 | else:
235 | return 0
236 |
237 | def text_mode(cam):
238 | global is_voice_on
239 | text = ""
240 | word = ""
241 | count_same_frame = 0
242 | while True:
243 | img = cam.read()[1]
244 | img = cv2.resize(img, (640, 480))
245 | img, contours, thresh = get_img_contour_thresh(img)
246 | old_text = text
247 | if len(contours) > 0:
248 | contour = max(contours, key = cv2.contourArea)
249 | if cv2.contourArea(contour) > 10000:
250 | text = get_pred_from_contour(contour, thresh)
251 | if old_text == text:
252 | count_same_frame += 1
253 | else:
254 | count_same_frame = 0
255 |
256 | if count_same_frame > 20:
257 | if len(text) == 1:
258 | Thread(target=say_text, args=(text, )).start()
259 | word = word + text
260 | if word.startswith('I/Me '):
261 | word = word.replace('I/Me ', 'I ')
262 | elif word.endswith('I/Me '):
263 | word = word.replace('I/Me ', 'me ')
264 | count_same_frame = 0
265 |
266 | elif cv2.contourArea(contour) < 1000:
267 | if word != '':
268 | #print('yolo')
269 | #say_text(text)
270 | Thread(target=say_text, args=(word, )).start()
271 | text = ""
272 | word = ""
273 | else:
274 | if word != '':
275 | #print('yolo1')
276 | #say_text(text)
277 | Thread(target=say_text, args=(word, )).start()
278 | text = ""
279 | word = ""
280 | blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
281 | cv2.putText(blackboard, " ", (180, 50), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 0,0))
282 | cv2.putText(blackboard, "Predicted text- " + text, (30, 100), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))
283 | cv2.putText(blackboard, word, (30, 240), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255))
284 | if is_voice_on:
285 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
286 | else:
287 | cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
288 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
289 | res = np.hstack((img, blackboard))
290 | cv2.imshow("Recognizing gesture", res)
291 | cv2.imshow("thresh", thresh)
292 | keypress = cv2.waitKey(1)
293 | if keypress == ord('q') or keypress == ord('c'):
294 | break
295 | if keypress == ord('v') and is_voice_on:
296 | is_voice_on = False
297 | elif keypress == ord('v') and not is_voice_on:
298 | is_voice_on = True
299 |
300 | if keypress == ord('c'):
301 | return 2
302 | else:
303 | return 0
304 |
305 | def recognize():
306 | cam = cv2.VideoCapture(1)
307 | if cam.read()[0]==False:
308 | cam = cv2.VideoCapture(0)
309 | text = ""
310 | word = ""
311 | count_same_frame = 0
312 | keypress = 1
313 | while True:
314 | if keypress == 1:
315 | keypress = text_mode(cam)
316 | elif keypress == 2:
317 | keypress = calculator_mode(cam)
318 | else:
319 | break
320 |
321 | keras_predict(model, np.zeros((50, 50), dtype = np.uint8))
322 | recognize()
323 |
--------------------------------------------------------------------------------
/Code/gesture_db.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/Code/gesture_db.db
--------------------------------------------------------------------------------
/Code/hist:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/Code/hist
--------------------------------------------------------------------------------
/Code/load_images.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from glob import glob
3 | import numpy as np
4 | import random
5 | from sklearn.utils import shuffle
6 | import pickle
7 | import os
8 |
9 | def pickle_images_labels():
10 | images_labels = []
11 | images = glob("gestures/*/*.jpg")
12 | images.sort()
13 | for image in images:
14 | print(image)
15 | label = image[image.find(os.sep)+1: image.rfind(os.sep)]
16 | img = cv2.imread(image, 0)
17 | images_labels.append((np.array(img, dtype=np.uint8), int(label)))
18 | return images_labels
19 |
20 | images_labels = pickle_images_labels()
21 | images_labels = shuffle(shuffle(shuffle(shuffle(images_labels))))
22 | images, labels = zip(*images_labels)
23 | print("Length of images_labels", len(images_labels))
24 |
25 | train_images = images[:int(5/6*len(images))]
26 | print("Length of train_images", len(train_images))
27 | with open("train_images", "wb") as f:
28 | pickle.dump(train_images, f)
29 | del train_images
30 |
31 | train_labels = labels[:int(5/6*len(labels))]
32 | print("Length of train_labels", len(train_labels))
33 | with open("train_labels", "wb") as f:
34 | pickle.dump(train_labels, f)
35 | del train_labels
36 |
37 | test_images = images[int(5/6*len(images)):int(11/12*len(images))]
38 | print("Length of test_images", len(test_images))
39 | with open("test_images", "wb") as f:
40 | pickle.dump(test_images, f)
41 | del test_images
42 |
43 | test_labels = labels[int(5/6*len(labels)):int(11/12*len(images))]
44 | print("Length of test_labels", len(test_labels))
45 | with open("test_labels", "wb") as f:
46 | pickle.dump(test_labels, f)
47 | del test_labels
48 |
49 | val_images = images[int(11/12*len(images)):]
50 | print("Length of test_images", len(val_images))
51 | with open("val_images", "wb") as f:
52 | pickle.dump(val_images, f)
53 | del val_images
54 |
55 | val_labels = labels[int(11/12*len(labels)):]
56 | print("Length of val_labels", len(val_labels))
57 | with open("val_labels", "wb") as f:
58 | pickle.dump(val_labels, f)
59 | del val_labels
60 |
--------------------------------------------------------------------------------
/Code/set_hand_histogram.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import pickle
4 |
5 | def build_squares(img):
6 | x, y, w, h = 420, 140, 10, 10
7 | d = 10
8 | imgCrop = None
9 | crop = None
10 | for i in range(10):
11 | for j in range(5):
12 | if np.any(imgCrop == None):
13 | imgCrop = img[y:y+h, x:x+w]
14 | else:
15 | imgCrop = np.hstack((imgCrop, img[y:y+h, x:x+w]))
16 | #print(imgCrop.shape)
17 | cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 1)
18 | x+=w+d
19 | if np.any(crop == None):
20 | crop = imgCrop
21 | else:
22 | crop = np.vstack((crop, imgCrop))
23 | imgCrop = None
24 | x = 420
25 | y+=h+d
26 | return crop
27 |
28 | def get_hand_hist():
29 | cam = cv2.VideoCapture(1)
30 | if cam.read()[0]==False:
31 | cam = cv2.VideoCapture(0)
32 | x, y, w, h = 300, 100, 300, 300
33 | flagPressedC, flagPressedS = False, False
34 | imgCrop = None
35 | while True:
36 | img = cam.read()[1]
37 | img = cv2.flip(img, 1)
38 | img = cv2.resize(img, (640, 480))
39 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
40 |
41 | keypress = cv2.waitKey(1)
42 | if keypress == ord('c'):
43 | hsvCrop = cv2.cvtColor(imgCrop, cv2.COLOR_BGR2HSV)
44 | flagPressedC = True
45 | hist = cv2.calcHist([hsvCrop], [0, 1], None, [180, 256], [0, 180, 0, 256])
46 | cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX)
47 | elif keypress == ord('s'):
48 | flagPressedS = True
49 | break
50 | if flagPressedC:
51 | dst = cv2.calcBackProject([hsv], [0, 1], hist, [0, 180, 0, 256], 1)
52 | dst1 = dst.copy()
53 | disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
54 | cv2.filter2D(dst,-1,disc,dst)
55 | blur = cv2.GaussianBlur(dst, (11,11), 0)
56 | blur = cv2.medianBlur(blur, 15)
57 | ret,thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
58 | thresh = cv2.merge((thresh,thresh,thresh))
59 | #cv2.imshow("res", res)
60 | cv2.imshow("Thresh", thresh)
61 | if not flagPressedS:
62 | imgCrop = build_squares(img)
63 | #cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
64 | cv2.imshow("Set hand histogram", img)
65 | cam.release()
66 | cv2.destroyAllWindows()
67 | with open("hist", "wb") as f:
68 | pickle.dump(hist, f)
69 |
70 |
71 | get_hand_hist()
72 |
--------------------------------------------------------------------------------
/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github:
4 | patreon: #
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom:
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Harsh Gupta
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | 
5 | [](https://choosealicense.com/licenses/mit)
6 | [](http://hits.dwyl.io/harshbg/Sign-Language-Interpreter-using-Deep-Learning)
7 |
8 |
9 | # Sign Language Interpreter using Deep Learning
10 | > A sign language interpreter using live video feed from the camera.
11 | The project was completed in 24 hours as part of HackUNT-19, the University of North Texas's annual Hackathon. You can view the project demo on [YouTube](https://link.harshgupta.com/acd72).
12 |
13 | ## Table of contents
14 | * [General info](#general-info)
15 | * [Screenshots](#screenshots)
16 | * [Demo](#demo)
17 | * [Technologies and Tools](#technologies-and-tools)
18 | * [Setup](#setup)
19 | * [Process](#process)
20 | * [Code Examples](#code-examples)
21 | * [Features](#features)
22 | * [Status](#status)
23 | * [Contact](#contact)
24 |
25 | ## General info
26 |
27 | The theme at HACK UNT 19 was to use technology to improve accessibility by finding a creative solution to benefit the lives of those with a disability.
28 | We wanted to make it easy for 70 million deaf people across the world to be independent of translators for there daily communication needs, so we designed the app to work as a personal translator 24*7 for the deaf people.
29 |
30 | ## Demo
31 | 
32 |
33 |
34 |
35 | 
36 |
37 |
38 |
39 | 
40 |
41 |
42 | **The entire demo of the project can be found on [YouTube](https://link.harshgupta.com/acd72).**
43 |
44 |
45 | ## Screenshots
46 |
47 | 
48 | 
49 |
50 | ## Technologies and Tools
51 | * Python
52 | * TensorFlow
53 | * Keras
54 | * OpenCV
55 |
56 | ## Setup
57 |
58 | * Use comand promt to setup environment by using install_packages.txt and install_packages_gpu.txt files.
59 |
60 | `pyton -m pip r install_packages.txt`
61 |
62 | This will help you in installing all the libraries required for the project.
63 |
64 | ## Process
65 |
66 | * Run `set_hand_histogram.py` to set the hand histogram for creating gestures.
67 | * Once you get a good histogram, save it in the code folder, or you can use the histogram created by us that can be found [here](https://github.com/harshbg/Sign-Language-Interpreter-using-Deep-Learning/blob/master/Code/hist).
68 | * Added gestures and label them using OpenCV which uses webcam feed. by running `create_gestures.py` and stores them in a database. Alternately, you can use the gestures created by us [here](https://github.com/harshbg/Sign-Language-Interpreter-using-Deep-Learning/tree/master/Code).
69 | * Add different variations to the captured gestures by flipping all the images by using `Rotate_images.py`.
70 | * Run `load_images.py` to split all the captured gestures into training, validation and test set.
71 | * To view all the gestures, run `display_gestures.py` .
72 | * Train the model using Keras by running `cnn_model_train.py`.
73 | * Run `final.py`. This will open up the gesture recognition window which will use your webcam to interpret the trained American Sign Language gestures.
74 |
75 | ## Code Examples
76 |
77 | ````
78 | # Model Traiining using CNN
79 |
80 | import numpy as np
81 | import pickle
82 | import cv2, os
83 | from glob import glob
84 | from keras import optimizers
85 | from keras.models import Sequential
86 | from keras.layers import Dense
87 | from keras.layers import Dropout
88 | from keras.layers import Flatten
89 | from keras.layers.convolutional import Conv2D
90 | from keras.layers.convolutional import MaxPooling2D
91 | from keras.utils import np_utils
92 | from keras.callbacks import ModelCheckpoint
93 | from keras import backend as K
94 | K.set_image_dim_ordering('tf')
95 |
96 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
97 |
98 | def get_image_size():
99 | img = cv2.imread('gestures/1/100.jpg', 0)
100 | return img.shape
101 |
102 | def get_num_of_classes():
103 | return len(glob('gestures/*'))
104 |
105 | image_x, image_y = get_image_size()
106 |
107 | def cnn_model():
108 | num_of_classes = get_num_of_classes()
109 | model = Sequential()
110 | model.add(Conv2D(16, (2,2), input_shape=(image_x, image_y, 1), activation='relu'))
111 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
112 | model.add(Conv2D(32, (3,3), activation='relu'))
113 | model.add(MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='same'))
114 | model.add(Conv2D(64, (5,5), activation='relu'))
115 | model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same'))
116 | model.add(Flatten())
117 | model.add(Dense(128, activation='relu'))
118 | model.add(Dropout(0.2))
119 | model.add(Dense(num_of_classes, activation='softmax'))
120 | sgd = optimizers.SGD(lr=1e-2)
121 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
122 | filepath="cnn_model_keras2.h5"
123 | checkpoint1 = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
124 | callbacks_list = [checkpoint1]
125 | #from keras.utils import plot_model
126 | #plot_model(model, to_file='model.png', show_shapes=True)
127 | return model, callbacks_list
128 |
129 | def train():
130 | with open("train_images", "rb") as f:
131 | train_images = np.array(pickle.load(f))
132 | with open("train_labels", "rb") as f:
133 | train_labels = np.array(pickle.load(f), dtype=np.int32)
134 |
135 | with open("val_images", "rb") as f:
136 | val_images = np.array(pickle.load(f))
137 | with open("val_labels", "rb") as f:
138 | val_labels = np.array(pickle.load(f), dtype=np.int32)
139 |
140 | train_images = np.reshape(train_images, (train_images.shape[0], image_x, image_y, 1))
141 | val_images = np.reshape(val_images, (val_images.shape[0], image_x, image_y, 1))
142 | train_labels = np_utils.to_categorical(train_labels)
143 | val_labels = np_utils.to_categorical(val_labels)
144 |
145 | print(val_labels.shape)
146 |
147 | model, callbacks_list = cnn_model()
148 | model.summary()
149 | model.fit(train_images, train_labels, validation_data=(val_images, val_labels), epochs=15, batch_size=500, callbacks=callbacks_list)
150 | scores = model.evaluate(val_images, val_labels, verbose=0)
151 | print("CNN Error: %.2f%%" % (100-scores[1]*100))
152 | #model.save('cnn_model_keras2.h5')
153 |
154 | train()
155 | K.clear_session();
156 |
157 | ````
158 |
159 | ## Features
160 | Our model was able to predict the 44 characters in the ASL with a prediction accuracy >95%.
161 |
162 | Features that can be added:
163 | * Deploy the project on cloud and create an API for using it.
164 | * Increase the vocabulary of our model
165 | * Incorporate feedback mechanism to make the model more robust
166 | * Add more sign languages
167 |
168 | ## Status
169 | Project is: _finished_. Our team was the winner of the UNT Hackaton 2019. You can find the our final submission post on [devpost](https://rebrand.ly/754c5). If you would like us to implement the project end-to-end for you please book a [session](https://link.harshgupta.com/5e580).
170 |
171 | ## Contact
172 | Created by me with my teammates [Siddharth Oza](https://github.com/siddharthoza), [Ashish Sharma](https://github.com/ashish1993utd), and [Manish Shukla](https://github.com/Manishms18).
173 |
174 | If you loved what you read here and feel like we can collaborate to produce some exciting stuff, or if you
175 | just want to shoot a question, please feel free to connect with me on email,
176 | LinkedIn, or
177 | Twitter.
178 | My other projects can be found [here](https://link.harshgupta.com/85f2e).
179 |
180 | [](https://link.harshgupta.com/e144a)
181 | [](https://link.harshgupta.com/34c63)
182 |
183 |
--------------------------------------------------------------------------------
/Team Linear Digressors-Presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/Team Linear Digressors-Presentation.pdf
--------------------------------------------------------------------------------
/img/Capture.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/Capture.PNG
--------------------------------------------------------------------------------
/img/Capture1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/Capture1.PNG
--------------------------------------------------------------------------------
/img/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo.gif
--------------------------------------------------------------------------------
/img/demo2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo2.gif
--------------------------------------------------------------------------------
/img/demo3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo3.gif
--------------------------------------------------------------------------------
/img/demo4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theashishgavade/Sign-Language-Interpreter-using-Deep-Learning/494bb28093f7a43a54614d22a84d9208427f1f74/img/demo4.gif
--------------------------------------------------------------------------------