├── .gitignore ├── Code ├── SignLanguageHacettepe.py ├── letters.txt ├── main.py └── train.py ├── Dataset ├── dataset_aybuke-20190603T083851Z-001.zip └── letters.txt ├── LICENSE ├── README.md └── right.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Code/SignLanguageHacettepe.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import keyboard 4 | import ctypes 5 | import time 6 | from torchvision import transforms,models 7 | import torch 8 | from PIL import Image 9 | from torch.autograd import Variable 10 | 11 | 12 | def nothing(x): 13 | pass 14 | 15 | 16 | class SignLanguageRecognizer: 17 | def __init__(self,torch_model=None,prediction_interval=0.5,save_key='s',quit_key='q',kernel_size=3): 18 | self.kernel_size=kernel_size 19 | self.torch_model = torch_model 20 | self.prediction_interval=prediction_interval 21 | self.save_key=save_key 22 | self.quit_key=quit_key 23 | 24 | def predict(self,img): 25 | data_transforms = transforms.Compose([ 26 | transforms.Resize(256), 27 | transforms.CenterCrop(224), 28 | transforms.ToTensor(), 29 | transforms.Normalize( 30 | mean=[0.485, 0.456, 0.406], 31 | std=[0.229, 0.224, 0.225] 32 | ) 33 | # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 34 | ]) 35 | self.torch_model.eval() 36 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 37 | img=cv2.cvtColor(img,cv2.COLOR_GRAY2RGB) 38 | img_pil = Image.fromarray(img) 39 | 40 | img_tensor = data_transforms(img_pil).float() 41 | img_tensor = img_tensor.unsqueeze_(0) 42 | inputs = Variable(img_tensor) 43 | inputs = inputs.to(device) 44 | self.torch_model = self.torch_model.to(device) 45 | fc_out = self.torch_model(inputs) 46 | _, predicted = torch.max(fc_out.data, 1) 47 | 48 | return predicted 49 | 50 | 51 | def Track(self,save_dir, human_letter): 52 | 53 | """Create Video Capture Screen""" 54 | cap = cv2.VideoCapture(0) 55 | ret = cap.set(3, 860) 56 | ret = cap.set(4, 620) 57 | 58 | kernel = np.ones((self.kernel_size, self.kernel_size), np.uint8) 59 | """Get Screen Size to adjust panels""" 60 | user32 = ctypes.windll.user32 61 | user32.SetProcessDPIAware() 62 | width, height = user32.GetSystemMetrics(0), user32.GetSystemMetrics(1) 63 | 64 | #####System GUI creation ##### 65 | """Create Windows""" 66 | cv2.namedWindow('frame', cv2.WINDOW_AUTOSIZE) 67 | cv2.moveWindow('frame', 0, 10) 68 | cv2.namedWindow('res', cv2.WINDOW_AUTOSIZE) 69 | cv2.moveWindow('res', int(width / 2), 10) 70 | cv2.namedWindow('information', cv2.WINDOW_AUTOSIZE) 71 | cv2.moveWindow('information', int(width / 2), int(height / 3 + height / 4 - 22)) 72 | cv2.namedWindow('detailed', cv2.WINDOW_AUTOSIZE) 73 | cv2.moveWindow('detailed', int(width / 2), int(height / 3)) 74 | """Create trackbars for color change""" 75 | cv2.createTrackbar('LowerR', 'information', 0, 255, nothing) 76 | cv2.createTrackbar('LowerG', 'information', 50, 255, nothing) 77 | cv2.createTrackbar('LowerB', 'information', 50, 255, nothing) 78 | cv2.createTrackbar('UpperR', 'information', 155, 255, nothing) 79 | cv2.createTrackbar('UpperG', 'information', 255, 255, nothing) 80 | cv2.createTrackbar('UpperB', 'information', 255, 255, nothing) 81 | 82 | """Create switch for ON/OFF functionality""" 83 | switch = '0 : OFF \n1 : ON' 84 | cv2.createTrackbar(switch, 'information', 0, 1, nothing) 85 | 86 | """define range of red color in HSV (red is [180, 0, 0])""" 87 | lower_red = np.array([0, 50, 50]) 88 | upper_red = np.array([155, 255, 255]) 89 | 90 | 91 | 92 | color = (102, 0, 102) # rgb 93 | white = (255, 225, 225) 94 | 95 | rect_margin = 30 96 | key = "c" 97 | predicted = 0 98 | while cv2.waitKey(24) & 0xFF != ord(self.quit_key): 99 | ret, frame = cap.read() 100 | """Convert to HSV""" 101 | lr = cv2.getTrackbarPos('LowerR', 'information') 102 | lg = cv2.getTrackbarPos('LowerG', 'information') 103 | lb = cv2.getTrackbarPos('LowerB', 'information') 104 | ur = cv2.getTrackbarPos('UpperR', 'information') 105 | ug = cv2.getTrackbarPos('UpperG', 'information') 106 | ub = cv2.getTrackbarPos('UpperB', 'information') 107 | s = cv2.getTrackbarPos(switch, 'information') 108 | 109 | if s == 0: 110 | lower_red = np.array([0, 50, 50]) 111 | upper_red = np.array([155, 255, 255]) 112 | 113 | else: 114 | lower_red = np.array([lr, lg, lb]) 115 | upper_red = np.array([ur, ug, ub]) 116 | 117 | 118 | 119 | frame = np.flip(frame, axis=1) 120 | """Draw a rectangle.""" 121 | im = np.copy(frame) 122 | cv2.rectangle(im, (int(width / 2) - 612, 30), (int(width / 2) - 388, 230), color, 0) 123 | 124 | 125 | rect = cv2.rectangle(im, (int(width / 2) - 612, 15), (int(width / 2) - 388, 29), color, 126 | cv2.FILLED) # background of prediction 127 | font = cv2.FONT_HERSHEY_SIMPLEX 128 | TopLeftCorner = (int(width / 2) - 612, 28) 129 | fontScale = 0.5 130 | fontColor = white 131 | lineType = 1 132 | 133 | cv2.putText(rect, str(human_letter[predicted]), 134 | TopLeftCorner, 135 | font, 136 | fontScale, 137 | fontColor, 138 | lineType) 139 | 140 | cropped = im[30:230, int(width / 2) - 612:int(width / 2) - 388] 141 | 142 | """Threshold the HSV image to get only blue colors""" 143 | hsv = cv2.cvtColor(cropped, cv2.COLOR_BGR2HSV) 144 | """Bitwise-AND mask and original image""" 145 | mask = cv2.inRange(hsv, lower_red, upper_red) 146 | 147 | mask_dilate= cv2.dilate(mask,kernel, iterations=3) 148 | res = cv2.bitwise_and(cropped, cropped, mask=mask_dilate) 149 | 150 | """Change the background to white""" 151 | res[np.all(res == [0, 0, 0], axis=2)] = [210, 210, 210] 152 | 153 | """Convert image to grayscale""" 154 | rgbRes=res 155 | res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) 156 | res = np.flip(res, axis=1) 157 | # construct black screen 158 | information = np.zeros((int(width / 2), int(height / 3), 3), np.uint8) 159 | detailed = np.zeros((int(width / 2), int(height / 4), 3), np.uint8) 160 | 161 | im = cv2.resize(im, (int(width / 2), height)) 162 | information = cv2.resize(information, (int(width / 2), int(height / 3))) 163 | detailed = cv2.resize(detailed, (int(width / 2), int(height / 5))) 164 | 165 | res = cv2.blur(res, (3, 3)) 166 | predicted=self.predict(res) 167 | 168 | 169 | if keyboard.is_pressed(self.save_key): 170 | img = cv2.resize(res, (224, 224)) 171 | cv2.imwrite(save_dir+"/saved_images" + str(predicted) + ".png", img) 172 | """Change Visual Screen""" 173 | if keyboard.is_pressed('b'): # if key 'b' is pressed 174 | key = "b" 175 | if keyboard.is_pressed('c'): # if key 'c' is pressed 176 | key = "c" 177 | if keyboard.is_pressed('h'): # if key 'h' is pressed 178 | key = "h" 179 | 180 | # check key value 181 | if key == "b": 182 | mask_dilate = np.flip(mask_dilate, axis=1) 183 | mask_dilate = cv2.resize(mask_dilate, (int(width / 2), int(height / 3))) 184 | cv2.imshow('res', mask_dilate) 185 | if key == "c": 186 | rgbRes = cv2.blur(rgbRes, (3, 3)) 187 | rgbRes = cv2.resize(rgbRes, (int(width / 2), int(height / 3))) 188 | cv2.imshow('res', rgbRes) 189 | if key == "h": 190 | # change to hog 191 | k_size = 3 192 | hog = np.float32(res) / 255.0 193 | gx = cv2.Sobel(hog, cv2.CV_64F, 1, 0, ksize=k_size) 194 | gy = cv2.Sobel(hog, cv2.CV_64F, 0, 1, ksize=k_size) 195 | mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True) 196 | mag = cv2.resize(mag, (int(width / 2), int(height / 3))) 197 | cv2.imshow('res', mag) 198 | 199 | 200 | # organize information window 201 | cv2.putText(detailed, 'suggested lowerRGB = [0, 50, 50] ; upperRGB = [155, 255, 225] in low-light environment', 202 | (10, 20), font, fontScale, (255, 196, 255), 1) 203 | cv2.putText(detailed, 'suggested lowerRGB = [0, 31, 2] ; upperRGB = [255, 255, 225] in multi-light environment', 204 | (10, 40), font, fontScale, (255, 196, 255), 1) 205 | cv2.putText(detailed, 'To see segmented hand press "c" from keyboard', (10, 60), font, fontScale, (255, 196, 255), 206 | 1) 207 | cv2.putText(detailed, 'To see binary form of hand press "b" from keyboard', (10, 80), font, fontScale, 208 | (255, 196, 255), 1) 209 | cv2.putText(detailed, 'To see hog form of hand press "h" from keyboard', (10, 100), font, fontScale, (255, 196, 255), 1) 210 | cv2.putText(detailed, 'Press "s" from keyboard to save a frame', (10, 120), font, fontScale, (255, 196, 255), 1) 211 | cv2.imshow('frame', im) 212 | cv2.imshow('information', information) 213 | cv2.imshow('detailed', detailed) 214 | 215 | time.sleep(self.prediction_interval) 216 | # When everything done, release the capture 217 | cap.release() 218 | cv2.destroyAllWindows() 219 | -------------------------------------------------------------------------------- /Code/letters.txt: -------------------------------------------------------------------------------- 1 | A 2 | B 3 | C 4 | D 5 | E 6 | F 7 | G 8 | H 9 | I 10 | K 11 | L 12 | M 13 | N 14 | O 15 | P 16 | Q 17 | R 18 | S 19 | T 20 | U 21 | V 22 | W 23 | X 24 | Y -------------------------------------------------------------------------------- /Code/main.py: -------------------------------------------------------------------------------- 1 | from SignLanguageHacettepe import SignLanguageRecognizer 2 | import torch 3 | """Use this callable parameter to take img input 4 | and return a prediction class""" 5 | 6 | 7 | 8 | 9 | save_dir='saved_images' 10 | 11 | #take letters 12 | letters = open("letters.txt", "r") 13 | human_letter = letters.read().split("\n") 14 | 15 | 16 | 17 | vgg16=torch.load("trained_vggModel") 18 | Recognizer=SignLanguageRecognizer(torch_model=vgg16,prediction_interval=0.1,save_key='s',quit_key='q',kernel_size=3) 19 | Recognizer.Track(save_dir,human_letter) 20 | 21 | -------------------------------------------------------------------------------- /Code/train.py: -------------------------------------------------------------------------------- 1 | import torchvision.models as models 2 | import torch 3 | from torch.utils.data import DataLoader 4 | from torchvision import datasets 5 | import os 6 | import torchvision 7 | import torchvision.transforms as transforms 8 | import torch.optim as optim 9 | import torch.nn as nn 10 | import copy 11 | from torch.optim import lr_scheduler 12 | import matplotlib.pyplot as plt 13 | import time 14 | 15 | 16 | #hardware is 'cpu' or 'gpu' 17 | #conv_layer_count is 0,1,2 or 3. Selects the number of conv. 18 | #layers to be trained(starting from bottom) 19 | def create_model(hardware,conv_layer_count): 20 | #load model vgg16 21 | model_vgg16 = models.vgg16(pretrained=True) 22 | 23 | 24 | #freeze layers 25 | for param in model_vgg16.parameters(): 26 | param.require_grad = False 27 | 28 | 29 | #add fc layers back so that their gradients will be calculated 30 | classifiers=nn.Sequential( 31 | #extra dropout layer 32 | #nn.Dropout(0.5), 33 | nn.Linear(25088,4096,True), 34 | nn.ReLU(True), 35 | nn.Dropout(0.5), 36 | nn.Linear(4096,4096,True), 37 | nn.ReLU(True), 38 | nn.Dropout(0.5), 39 | nn.Linear(4096,24,True), 40 | 41 | ) 42 | 43 | #update classifiers of the model 44 | model_vgg16.classifier=classifiers 45 | 46 | 47 | use_gpu=torch.cuda.is_available() 48 | 49 | if hardware=='gpu': 50 | if use_gpu: 51 | device = torch.device('cuda') 52 | model_vgg16=model_vgg16.cuda() 53 | else: 54 | device = torch.device('cpu') 55 | if hardware=='cpu': 56 | device = torch.device('cpu') 57 | 58 | print('Created model for '+str(hardware)) 59 | model_vgg16 = model_vgg16.to(device) 60 | return model_vgg16,device 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | def load_data(dir,normalize,batch): 75 | image_datasets = {x: datasets.ImageFolder(os.path.join(dir, x),normalize[x])for x in ['train', 'val','test']} 76 | 77 | 78 | dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch,shuffle=True, num_workers=0)for x in ['train', 'val','test']} 79 | dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val','test']} 80 | for phase in ['train', 'val','test']: 81 | print(str(dataset_sizes[phase]) +' images loaded for '+str(phase)) 82 | return dataloaders,dataset_sizes 83 | 84 | 85 | def draw(lst_iter, lst_loss, lst_acc, title,save): 86 | 87 | #draw loss or accuracy graph 88 | plt.plot(lst_iter, lst_loss, '-b', label='loss') 89 | plt.plot(lst_iter, lst_acc, '-r', label='accuracy') 90 | plt.xlabel("epochs") 91 | plt.legend(loc='upper left') 92 | plt.title(title) 93 | 94 | if save==True: 95 | plt.savefig(title+".png") # should before show method 96 | 97 | # show 98 | plt.show() 99 | 100 | 101 | def train(model,optimizer,criterion,scheduler,number_of_epochs,dataloaders,dataset_sizes,batch_size,device,learning_params): 102 | best_model_wts = copy.deepcopy(model.state_dict()) 103 | best_acc = 0.0 104 | 105 | epoch_losses_val=[] 106 | epoch_losses_train=[] 107 | epoch_acc_train=[] 108 | epoch_acc_val=[] 109 | stop_threshold=6 110 | no_improve=0 111 | min_loss=1000.0 112 | print('Initial learning rate is :'+str(learning_params['lr'])) 113 | updated_learningRate=learning_params['lr'] 114 | for epoch in range(number_of_epochs): 115 | print('Epoch {}/{}'.format(epoch+1, number_of_epochs)) 116 | # if epoch%learning_params['ss']==0 and epoch!=0: 117 | # updated_learningRate=updated_learningRate-updated_learningRate*learning_params['gamma'] 118 | # print('New learning rate is :'+str(updated_learningRate)) 119 | print('-' * 10) 120 | for phase in ['train','val']: 121 | if phase=='train': 122 | scheduler.step() 123 | model.train() 124 | if phase=='val': 125 | model.eval() 126 | 127 | running_loss = 0.0 128 | running_corrects = 0 129 | for i,data in enumerate(dataloaders[phase]): 130 | 131 | 132 | inputs, labels = data 133 | inputs = inputs.to(device) 134 | labels = labels.to(device) 135 | optimizer.zero_grad() 136 | with torch.set_grad_enabled(phase == 'train'): 137 | outputs = model(inputs) 138 | _, preds = torch.max(outputs, 1) 139 | 140 | loss = criterion(outputs, labels) 141 | if phase == 'train': 142 | loss.backward() 143 | optimizer.step() 144 | running_loss += loss.item() * inputs.size(0) 145 | running_corrects += torch.sum(preds == labels.data) 146 | 147 | 148 | epoch_loss = running_loss / dataset_sizes[phase] 149 | epoch_acc = running_corrects.double() / dataset_sizes[phase] 150 | 151 | if phase=='train': 152 | epoch_losses_train.append(epoch_loss) 153 | epoch_acc_train.append(epoch_acc) 154 | else: 155 | epoch_losses_val.append(epoch_loss) 156 | epoch_acc_val.append(epoch_acc) 157 | print('{} Loss: {:.4f} Acc: {:.4f}'.format( 158 | phase, epoch_loss, epoch_acc)) 159 | 160 | if phase == 'val' and epoch_acc > best_acc: 161 | best_acc = epoch_acc 162 | best_model_wts = copy.deepcopy(model.state_dict()) 163 | 164 | 165 | """--------EARLY STOPPING--------""" 166 | """--------EARLY STOPPING--------""" 167 | if phase == 'val' and epoch_loss=min_loss: 171 | no_improve+=1 172 | if stop_threshold==no_improve: 173 | print('Stop early at epoch:'+str(epoch+1)) 174 | print('Best val Acc: {:4f}'.format(best_acc)) 175 | epoch_arr=[] 176 | for i in range(epoch+1): 177 | epoch_arr.append(i+1) 178 | draw(epoch_arr,epoch_losses_train,epoch_acc_train,'Adam Optimizer Train',True) 179 | draw(epoch_arr,epoch_losses_val,epoch_acc_val,'Adam Optimizer Validation',True) 180 | model.load_state_dict(best_model_wts) 181 | return model 182 | """--------EARLY STOPPING--------""" 183 | """--------EARLY STOPPING--------""" 184 | 185 | 186 | print('Best val Acc: {:4f}'.format(best_acc)) 187 | 188 | epoch_arr=[] 189 | for i in range(number_of_epochs): 190 | epoch_arr.append(i+1) 191 | 192 | draw(epoch_arr,epoch_losses_train,epoch_acc_train,'Adam Optimizer Train',True) 193 | draw(epoch_arr,epoch_losses_val,epoch_acc_val,'Adam Optimizer Validation',True) 194 | model.load_state_dict(best_model_wts) 195 | return model 196 | 197 | 198 | def process_test_data(model,device,dataloaders,dataset_sizes): 199 | running_corrects=0 200 | top_five=0 201 | model.eval() 202 | for i,data in enumerate(dataloaders['test']): 203 | inputs, labels = data 204 | inputs = inputs.to(device) 205 | labels = labels.to(device) 206 | with torch.no_grad(): 207 | outputs = model(inputs) 208 | _, preds = torch.max(outputs, 1) 209 | running_corrects += torch.sum(preds == labels.data) 210 | probs,classes=outputs.topk(5,dim=1) 211 | size=labels.size(0) 212 | for i in range(size): 213 | if labels[i] in classes[i]: 214 | top_five+=1 215 | 216 | acc = running_corrects.double() / dataset_sizes['test'] 217 | five_acc=top_five/ dataset_sizes['test'] 218 | print('Test Model Acc: {:4f}'.format(acc)) 219 | print('Test Model Top-5 Acc: {:4f}'.format(five_acc)) 220 | 221 | 222 | 223 | def main(optim_name): 224 | start=time.time() 225 | 226 | 227 | data_transforms = { 228 | 'train': transforms.Compose([ 229 | transforms.RandomResizedCrop(224), 230 | transforms.CenterCrop(224), 231 | torchvision.transforms.ColorJitter(brightness=.05, contrast=.05), 232 | transforms.ToTensor(), 233 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 234 | # 235 | ]), 236 | 'val': transforms.Compose([ 237 | transforms.Resize(256), 238 | transforms.CenterCrop(224), 239 | torchvision.transforms.ColorJitter(brightness=.05, contrast=.05), 240 | transforms.ToTensor(), 241 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 242 | # torchvision.transforms.ColorJitter(brightness=.05, contrast=.05) 243 | ]), 244 | 'test': transforms.Compose([ 245 | transforms.Resize(256), 246 | transforms.CenterCrop(224), 247 | torchvision.transforms.ColorJitter(brightness=.05, contrast=.05), 248 | transforms.ToTensor(), 249 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 250 | # torchvision.transforms.ColorJitter(brightness=.05, contrast=.05) 251 | ]) 252 | } 253 | 254 | """---------PARAMETERS-----------""" 255 | #wd:weight decay, lr:leaerning rate,ss:step size, gamma:scheduler coefficients 256 | learning_params={'lr':0.0001 ,'wd':0.01,'ss':7,'gamma':0.1} 257 | batch_size=32 258 | direction='Dataset/dataset' 259 | number_of_epochs=25 260 | """---------PARAMETERS-----------""" 261 | 262 | 263 | #create model, set second param to zero to only train fc layers 264 | 265 | model,device=create_model('gpu',3) 266 | criterion=nn.CrossEntropyLoss() 267 | 268 | #use other optimizers 269 | if optim_name=='sgd': 270 | optimizer=optim.SGD(model.parameters(),lr=learning_params['lr'],momentum=0.9) 271 | elif optim_name=='rms': 272 | optimizer=optim.RMSprop(model.parameters(),lr=learning_params['lr'],alpha=0.9) 273 | else: 274 | optimizer=optim.Adam(model.parameters(),lr=learning_params['lr'],weight_decay=learning_params['wd']) 275 | scheduler = lr_scheduler.StepLR(optimizer, step_size=learning_params['ss'], gamma=learning_params['gamma']) 276 | 277 | 278 | 279 | 280 | 281 | dataloaders,dataset_sizes=load_data(direction,data_transforms,batch_size) 282 | 283 | 284 | 285 | 286 | 287 | trained_model=train(model,optimizer,criterion,scheduler, 288 | number_of_epochs,dataloaders,dataset_sizes,batch_size,device,learning_params) 289 | 290 | 291 | process_test_data(trained_model,device,dataloaders,dataset_sizes) 292 | torch.save(trained_model,'trained_vggModel.pth') 293 | print('Total time is: {:2f}'.format((time.time()-start)/60)+' minutes') 294 | 295 | #'adam', 'sgd' or 'rms' 296 | main('adam') 297 | -------------------------------------------------------------------------------- /Dataset/dataset_aybuke-20190603T083851Z-001.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Enescigdem/SignLanguageRecognizer/876c377206468144cba255f5a589a07675bb370a/Dataset/dataset_aybuke-20190603T083851Z-001.zip -------------------------------------------------------------------------------- /Dataset/letters.txt: -------------------------------------------------------------------------------- 1 | A 2 | B 3 | C 4 | D 5 | E 6 | F 7 | G 8 | H 9 | I 10 | K 11 | L 12 | M 13 | N 14 | O 15 | P 16 | Q 17 | R 18 | S 19 | T 20 | U 21 | V 22 | W 23 | X 24 | Y -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Enescigdem 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Data set Link : https://drive.google.com/drive/folders/1ZjWD4T7eCj4FPh-cIasz-5qh4S87JSAq?usp=sharing
2 | 3 | In this study, we collect our own dataset and used 24 different gestures. Images are taken from 2 people and there are 1560 train data, 4 | 360 validation and 360 test data. We prefer to use pretrained VGG model as classifier. After we train the model, we come up with about 5 | %99 accuracy on test data. 6 | 7 |

8 | 9 |

10 | 11 | The above picture shows the right part of the system. We display the segmented hand in res window, some information in detailed window and 12 | some controller in information window to control the upper and lower RGB values according to light in th environment. To change the 13 | lower and upper RGB values, the OF...ON should be switched to 1. 14 | 15 | Additionally, while system is running, 16 |
17 | * If "b" is pressed from keyboard, the binary segmented form of the frame is shown in the res window. 18 | * If "c" is pressed from keyboard, the segmented form of the frame is shown in the res window(it is also default). 19 | * If "h" is pressed from keyboard, the the extracted features using hog of the frame is shown in the res window. 20 | * If "s" is pressed from keyboard, the segmented form of the frame which is converted to grayscale is saved. 21 | 22 | Medium blog: 23 | https://medium.com/@aybukeyalcinerr/realtime-recognition-of-american-sign-language-alphabet-8ece006d424e?postPublishedType=repub 24 | 25 | -------------------------------------------------------------------------------- /right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Enescigdem/SignLanguageRecognizer/876c377206468144cba255f5a589a07675bb370a/right.png --------------------------------------------------------------------------------