├── .gitignore
├── Code
    ├── SignLanguageHacettepe.py
    ├── letters.txt
    ├── main.py
    └── train.py
├── Dataset
    ├── dataset_aybuke-20190603T083851Z-001.zip
    └── letters.txt
├── LICENSE
├── README.md
└── right.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/Code/SignLanguageHacettepe.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import keyboard
  4 | import ctypes
  5 | import time
  6 | from torchvision import transforms,models
  7 | import torch
  8 | from PIL import Image
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | def nothing(x):
 13 |     pass
 14 | 
 15 | 
 16 | class SignLanguageRecognizer:
 17 |     def __init__(self,torch_model=None,prediction_interval=0.5,save_key='s',quit_key='q',kernel_size=3):
 18 |         self.kernel_size=kernel_size
 19 |         self.torch_model = torch_model
 20 |         self.prediction_interval=prediction_interval
 21 |         self.save_key=save_key
 22 |         self.quit_key=quit_key
 23 | 
 24 |     def predict(self,img):
 25 |         data_transforms = transforms.Compose([
 26 |             transforms.Resize(256),
 27 |             transforms.CenterCrop(224),
 28 |             transforms.ToTensor(),
 29 |             transforms.Normalize(
 30 |                 mean=[0.485, 0.456, 0.406],
 31 |                 std=[0.229, 0.224, 0.225]
 32 |             )
 33 |             # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 34 |         ])
 35 |         self.torch_model.eval()
 36 |         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 37 |         img=cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
 38 |         img_pil = Image.fromarray(img)
 39 | 
 40 |         img_tensor = data_transforms(img_pil).float()
 41 |         img_tensor = img_tensor.unsqueeze_(0)
 42 |         inputs = Variable(img_tensor)
 43 |         inputs = inputs.to(device)
 44 |         self.torch_model = self.torch_model.to(device)
 45 |         fc_out = self.torch_model(inputs)
 46 |         _, predicted = torch.max(fc_out.data, 1)
 47 |         
 48 |         return predicted
 49 | 
 50 | 
 51 |     def Track(self,save_dir, human_letter):
 52 | 
 53 |         """Create Video Capture Screen"""
 54 |         cap = cv2.VideoCapture(0)
 55 |         ret = cap.set(3, 860)
 56 |         ret = cap.set(4, 620)
 57 | 
 58 |         kernel = np.ones((self.kernel_size, self.kernel_size), np.uint8)
 59 |         """Get Screen Size to adjust panels"""
 60 |         user32 = ctypes.windll.user32
 61 |         user32.SetProcessDPIAware()
 62 |         width, height = user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)
 63 | 
 64 |         #####System GUI creation    #####
 65 |         """Create Windows"""
 66 |         cv2.namedWindow('frame', cv2.WINDOW_AUTOSIZE)
 67 |         cv2.moveWindow('frame', 0, 10)
 68 |         cv2.namedWindow('res', cv2.WINDOW_AUTOSIZE)
 69 |         cv2.moveWindow('res', int(width / 2), 10)
 70 |         cv2.namedWindow('information', cv2.WINDOW_AUTOSIZE)
 71 |         cv2.moveWindow('information', int(width / 2), int(height / 3 + height / 4 - 22))
 72 |         cv2.namedWindow('detailed', cv2.WINDOW_AUTOSIZE)
 73 |         cv2.moveWindow('detailed', int(width / 2), int(height / 3))
 74 |         """Create trackbars for color change"""
 75 |         cv2.createTrackbar('LowerR', 'information', 0, 255, nothing)
 76 |         cv2.createTrackbar('LowerG', 'information', 50, 255, nothing)
 77 |         cv2.createTrackbar('LowerB', 'information', 50, 255, nothing)
 78 |         cv2.createTrackbar('UpperR', 'information', 155, 255, nothing)
 79 |         cv2.createTrackbar('UpperG', 'information', 255, 255, nothing)
 80 |         cv2.createTrackbar('UpperB', 'information', 255, 255, nothing)
 81 | 
 82 |         """Create switch for ON/OFF functionality"""
 83 |         switch = '0 : OFF \n1 : ON'
 84 |         cv2.createTrackbar(switch, 'information', 0, 1, nothing)
 85 | 
 86 |         """define range of red color in HSV (red is [180, 0, 0])"""
 87 |         lower_red = np.array([0, 50, 50])
 88 |         upper_red = np.array([155, 255, 255])
 89 | 
 90 | 
 91 | 
 92 |         color = (102, 0, 102)  # rgb
 93 |         white = (255, 225, 225)
 94 | 
 95 |         rect_margin = 30
 96 |         key = "c"
 97 |         predicted = 0
 98 |         while cv2.waitKey(24) & 0xFF != ord(self.quit_key):
 99 |             ret, frame = cap.read()
100 |             """Convert to HSV"""
101 |             lr = cv2.getTrackbarPos('LowerR', 'information')
102 |             lg = cv2.getTrackbarPos('LowerG', 'information')
103 |             lb = cv2.getTrackbarPos('LowerB', 'information')
104 |             ur = cv2.getTrackbarPos('UpperR', 'information')
105 |             ug = cv2.getTrackbarPos('UpperG', 'information')
106 |             ub = cv2.getTrackbarPos('UpperB', 'information')
107 |             s = cv2.getTrackbarPos(switch, 'information')
108 | 
109 |             if s == 0:
110 |                 lower_red = np.array([0, 50, 50])
111 |                 upper_red = np.array([155, 255, 255])
112 | 
113 |             else:
114 |                 lower_red = np.array([lr, lg, lb])
115 |                 upper_red = np.array([ur, ug, ub])
116 | 
117 | 
118 | 
119 |             frame = np.flip(frame, axis=1)
120 |             """Draw a rectangle."""
121 |             im = np.copy(frame)
122 |             cv2.rectangle(im, (int(width / 2) - 612, 30), (int(width / 2) - 388, 230), color, 0)
123 | 
124 | 
125 |             rect = cv2.rectangle(im, (int(width / 2) - 612, 15), (int(width / 2) - 388, 29), color,
126 |                                  cv2.FILLED)  # background of prediction
127 |             font = cv2.FONT_HERSHEY_SIMPLEX
128 |             TopLeftCorner = (int(width / 2) - 612, 28)
129 |             fontScale = 0.5
130 |             fontColor = white
131 |             lineType = 1
132 | 
133 |             cv2.putText(rect, str(human_letter[predicted]),
134 |                         TopLeftCorner,
135 |                         font,
136 |                         fontScale,
137 |                         fontColor,
138 |                         lineType)
139 | 
140 |             cropped = im[30:230, int(width / 2) - 612:int(width / 2) - 388]
141 | 
142 |             """Threshold the HSV image to get only blue colors"""
143 |             hsv = cv2.cvtColor(cropped, cv2.COLOR_BGR2HSV)
144 |             """Bitwise-AND mask and original image"""
145 |             mask = cv2.inRange(hsv, lower_red, upper_red)
146 | 
147 |             mask_dilate= cv2.dilate(mask,kernel, iterations=3)
148 |             res = cv2.bitwise_and(cropped, cropped, mask=mask_dilate)
149 | 
150 |             """Change the background to white"""
151 |             res[np.all(res == [0, 0, 0], axis=2)] = [210, 210, 210]
152 | 
153 |             """Convert image to grayscale"""
154 |             rgbRes=res
155 |             res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
156 |             res = np.flip(res, axis=1)
157 |             # construct black screen
158 |             information = np.zeros((int(width / 2), int(height / 3), 3), np.uint8)
159 |             detailed = np.zeros((int(width / 2), int(height / 4), 3), np.uint8)
160 | 
161 |             im = cv2.resize(im, (int(width / 2), height))
162 |             information = cv2.resize(information, (int(width / 2), int(height / 3)))
163 |             detailed = cv2.resize(detailed, (int(width / 2), int(height / 5)))
164 | 
165 |             res = cv2.blur(res, (3, 3))
166 |             predicted=self.predict(res)
167 | 
168 | 
169 |             if keyboard.is_pressed(self.save_key):
170 |                 img = cv2.resize(res, (224, 224))
171 |                 cv2.imwrite(save_dir+"/saved_images" + str(predicted) + ".png", img)
172 |             """Change Visual Screen"""
173 |             if keyboard.is_pressed('b'):  # if key 'b' is pressed
174 |                 key = "b"
175 |             if keyboard.is_pressed('c'):  # if key 'c' is pressed
176 |                 key = "c"
177 |             if keyboard.is_pressed('h'):  # if key 'h' is pressed
178 |                 key = "h"
179 | 
180 |             # check key value
181 |             if key == "b":
182 |                 mask_dilate = np.flip(mask_dilate, axis=1)
183 |                 mask_dilate = cv2.resize(mask_dilate, (int(width / 2), int(height / 3)))
184 |                 cv2.imshow('res', mask_dilate)
185 |             if key == "c":
186 |                 rgbRes = cv2.blur(rgbRes, (3, 3))
187 |                 rgbRes = cv2.resize(rgbRes, (int(width / 2), int(height / 3)))
188 |                 cv2.imshow('res', rgbRes)
189 |             if key == "h":
190 |                 # change to hog
191 |                 k_size = 3
192 |                 hog = np.float32(res) / 255.0
193 |                 gx = cv2.Sobel(hog, cv2.CV_64F, 1, 0, ksize=k_size)
194 |                 gy = cv2.Sobel(hog, cv2.CV_64F, 0, 1, ksize=k_size)
195 |                 mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)
196 |                 mag = cv2.resize(mag, (int(width / 2), int(height / 3)))
197 |                 cv2.imshow('res', mag)
198 | 
199 | 
200 |             # organize information window
201 |             cv2.putText(detailed, 'suggested lowerRGB = [0, 50, 50] ; upperRGB = [155, 255, 225] in low-light environment',
202 |                         (10, 20), font, fontScale, (255, 196, 255), 1)
203 |             cv2.putText(detailed, 'suggested lowerRGB = [0, 31, 2] ; upperRGB = [255, 255, 225] in multi-light environment',
204 |                         (10, 40), font, fontScale, (255, 196, 255), 1)
205 |             cv2.putText(detailed, 'To see segmented hand press "c" from keyboard', (10, 60), font, fontScale, (255, 196, 255),
206 |                         1)
207 |             cv2.putText(detailed, 'To see binary form of hand press "b" from keyboard', (10, 80), font, fontScale,
208 |                         (255, 196, 255), 1)
209 |             cv2.putText(detailed, 'To see hog form of hand press "h" from keyboard', (10, 100), font, fontScale, (255, 196, 255), 1)
210 |             cv2.putText(detailed, 'Press "s" from keyboard to save a frame', (10, 120), font, fontScale, (255, 196, 255), 1)
211 |             cv2.imshow('frame', im)
212 |             cv2.imshow('information', information)
213 |             cv2.imshow('detailed', detailed)
214 | 
215 |             time.sleep(self.prediction_interval)
216 |         # When everything done, release the capture
217 |         cap.release()
218 |         cv2.destroyAllWindows()
219 | 


--------------------------------------------------------------------------------
/Code/letters.txt:
--------------------------------------------------------------------------------
 1 | A
 2 | B
 3 | C
 4 | D
 5 | E
 6 | F
 7 | G
 8 | H
 9 | I
10 | K
11 | L
12 | M
13 | N
14 | O
15 | P
16 | Q
17 | R
18 | S
19 | T
20 | U
21 | V
22 | W
23 | X
24 | Y


--------------------------------------------------------------------------------
/Code/main.py:
--------------------------------------------------------------------------------
 1 | from SignLanguageHacettepe import SignLanguageRecognizer
 2 | import torch
 3 | """Use this callable parameter to take img input
 4 | and return a prediction class"""
 5 | 
 6 | 
 7 | 
 8 | 
 9 | save_dir='saved_images'
10 | 
11 | #take letters
12 | letters = open("letters.txt", "r")
13 | human_letter = letters.read().split("\n")
14 | 
15 | 
16 | 
17 | vgg16=torch.load("trained_vggModel")
18 | Recognizer=SignLanguageRecognizer(torch_model=vgg16,prediction_interval=0.1,save_key='s',quit_key='q',kernel_size=3)
19 | Recognizer.Track(save_dir,human_letter)
20 | 
21 | 


--------------------------------------------------------------------------------
/Code/train.py:
--------------------------------------------------------------------------------
  1 | import torchvision.models as models
  2 | import torch
  3 | from torch.utils.data import DataLoader
  4 | from torchvision import datasets
  5 | import os
  6 | import torchvision
  7 | import torchvision.transforms as transforms
  8 | import torch.optim as optim
  9 | import torch.nn as nn
 10 | import copy
 11 | from torch.optim import lr_scheduler
 12 | import matplotlib.pyplot as plt
 13 | import time
 14 | 
 15 | 
 16 | #hardware is 'cpu' or 'gpu'
 17 | #conv_layer_count is 0,1,2 or 3. Selects the number of conv.
 18 | #layers to be trained(starting from bottom)
 19 | def create_model(hardware,conv_layer_count):
 20 |     #load model vgg16
 21 |     model_vgg16 = models.vgg16(pretrained=True) 
 22 |     
 23 |     
 24 |     #freeze layers
 25 |     for param in model_vgg16.parameters():
 26 |         param.require_grad = False
 27 |         
 28 |         
 29 |     #add fc layers back so that their gradients will be calculated
 30 |     classifiers=nn.Sequential(
 31 |         #extra dropout layer
 32 |         #nn.Dropout(0.5),
 33 |         nn.Linear(25088,4096,True),
 34 |         nn.ReLU(True),
 35 |         nn.Dropout(0.5),
 36 |         nn.Linear(4096,4096,True),
 37 |         nn.ReLU(True),
 38 |         nn.Dropout(0.5),
 39 |         nn.Linear(4096,24,True),
 40 |         
 41 |     )
 42 |   
 43 |     #update classifiers of the model
 44 |     model_vgg16.classifier=classifiers
 45 |     
 46 |     
 47 |     use_gpu=torch.cuda.is_available()
 48 |     
 49 |     if hardware=='gpu':
 50 |         if use_gpu:
 51 |             device = torch.device('cuda')
 52 |             model_vgg16=model_vgg16.cuda()
 53 |         else:
 54 |             device = torch.device('cpu')
 55 |     if hardware=='cpu':
 56 |         device = torch.device('cpu')
 57 |         
 58 |     print('Created model for '+str(hardware))
 59 |     model_vgg16 = model_vgg16.to(device)
 60 |     return model_vgg16,device
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | def load_data(dir,normalize,batch):
 75 |     image_datasets = {x: datasets.ImageFolder(os.path.join(dir, x),normalize[x])for x in ['train', 'val','test']}
 76 |     
 77 |     
 78 |     dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch,shuffle=True, num_workers=0)for x in ['train', 'val','test']}
 79 |     dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val','test']}
 80 |     for phase in ['train', 'val','test']:
 81 |         print(str(dataset_sizes[phase]) +' images loaded for '+str(phase))
 82 |     return dataloaders,dataset_sizes
 83 |   
 84 |   
 85 | def draw(lst_iter, lst_loss, lst_acc, title,save):
 86 |   
 87 |     #draw loss or accuracy graph
 88 |     plt.plot(lst_iter, lst_loss, '-b', label='loss')
 89 |     plt.plot(lst_iter, lst_acc, '-r', label='accuracy')
 90 |     plt.xlabel("epochs")
 91 |     plt.legend(loc='upper left')
 92 |     plt.title(title)
 93 | 
 94 |     if save==True:
 95 |         plt.savefig(title+".png")  # should before show method
 96 | 
 97 |     # show
 98 |     plt.show()          
 99 |     
100 |     
101 | def train(model,optimizer,criterion,scheduler,number_of_epochs,dataloaders,dataset_sizes,batch_size,device,learning_params):
102 |     best_model_wts = copy.deepcopy(model.state_dict())
103 |     best_acc = 0.0
104 |     
105 |     epoch_losses_val=[]
106 |     epoch_losses_train=[]
107 |     epoch_acc_train=[]
108 |     epoch_acc_val=[]
109 |     stop_threshold=6
110 |     no_improve=0
111 |     min_loss=1000.0
112 |     print('Initial learning rate is :'+str(learning_params['lr']))
113 |     updated_learningRate=learning_params['lr']   
114 |     for epoch in range(number_of_epochs):
115 |         print('Epoch {}/{}'.format(epoch+1, number_of_epochs))
116 | #         if epoch%learning_params['ss']==0 and epoch!=0:
117 | #             updated_learningRate=updated_learningRate-updated_learningRate*learning_params['gamma']
118 | #             print('New learning rate is :'+str(updated_learningRate))
119 |         print('-' * 10)
120 |         for phase in ['train','val']:
121 |             if phase=='train':
122 |                 scheduler.step()
123 |                 model.train()
124 |             if phase=='val':
125 |                 model.eval()
126 |                 
127 |             running_loss = 0.0
128 |             running_corrects = 0
129 |             for i,data in enumerate(dataloaders[phase]):
130 |               
131 |                
132 |                 inputs, labels = data
133 |                 inputs = inputs.to(device)
134 |                 labels = labels.to(device)
135 |                 optimizer.zero_grad()
136 |                 with torch.set_grad_enabled(phase == 'train'):
137 |                     outputs = model(inputs)
138 |                     _, preds = torch.max(outputs, 1)
139 |                    
140 |                     loss = criterion(outputs, labels)
141 |                     if phase == 'train':
142 |                         loss.backward()
143 |                         optimizer.step()
144 |                 running_loss += loss.item() * inputs.size(0)
145 |                 running_corrects += torch.sum(preds == labels.data)
146 |                 
147 |                 
148 |             epoch_loss = running_loss / dataset_sizes[phase]
149 |             epoch_acc = running_corrects.double() / dataset_sizes[phase]
150 |             
151 |             if phase=='train':
152 |                 epoch_losses_train.append(epoch_loss)
153 |                 epoch_acc_train.append(epoch_acc)
154 |             else:
155 |                 epoch_losses_val.append(epoch_loss)
156 |                 epoch_acc_val.append(epoch_acc)
157 |             print('{} Loss: {:.4f} Acc: {:.4f}'.format(
158 |                 phase, epoch_loss, epoch_acc))
159 |             
160 |             if phase == 'val' and epoch_acc > best_acc:
161 |                 best_acc = epoch_acc
162 |                 best_model_wts = copy.deepcopy(model.state_dict())
163 |                 
164 |                 
165 |             """--------EARLY STOPPING--------"""
166 |             """--------EARLY STOPPING--------"""
167 |             if phase == 'val' and epoch_loss<min_loss:
168 |                 no_improve=0
169 |                 min_loss=epoch_loss
170 |             elif phase == 'val' and epoch_loss>=min_loss:
171 |                 no_improve+=1
172 |                 if stop_threshold==no_improve:
173 |                     print('Stop early at epoch:'+str(epoch+1))
174 |                     print('Best val Acc: {:4f}'.format(best_acc))
175 |                     epoch_arr=[]
176 |                     for i in range(epoch+1):
177 |                        epoch_arr.append(i+1) 
178 |                     draw(epoch_arr,epoch_losses_train,epoch_acc_train,'Adam Optimizer Train',True)
179 |                     draw(epoch_arr,epoch_losses_val,epoch_acc_val,'Adam Optimizer Validation',True)
180 |                     model.load_state_dict(best_model_wts)
181 |                     return model
182 |             """--------EARLY STOPPING--------"""
183 |             """--------EARLY STOPPING--------"""
184 |                     
185 |                
186 |     print('Best val Acc: {:4f}'.format(best_acc))
187 |     
188 |     epoch_arr=[]
189 |     for i in range(number_of_epochs):
190 |        epoch_arr.append(i+1) 
191 |     
192 |     draw(epoch_arr,epoch_losses_train,epoch_acc_train,'Adam Optimizer Train',True)
193 |     draw(epoch_arr,epoch_losses_val,epoch_acc_val,'Adam Optimizer Validation',True)
194 |     model.load_state_dict(best_model_wts)
195 |     return model
196 |                 
197 |           
198 | def process_test_data(model,device,dataloaders,dataset_sizes):
199 |     running_corrects=0
200 |     top_five=0
201 |     model.eval()
202 |     for i,data in enumerate(dataloaders['test']):
203 |         inputs, labels = data
204 |         inputs = inputs.to(device)
205 |         labels = labels.to(device)
206 |         with torch.no_grad():
207 |              outputs = model(inputs)
208 |              _, preds = torch.max(outputs, 1)
209 |         running_corrects += torch.sum(preds == labels.data)
210 |         probs,classes=outputs.topk(5,dim=1)
211 |         size=labels.size(0)
212 |         for i in range(size):
213 |              if labels[i] in classes[i]:
214 |                   top_five+=1
215 |         
216 |     acc = running_corrects.double() / dataset_sizes['test']
217 |     five_acc=top_five/ dataset_sizes['test']
218 |     print('Test Model Acc: {:4f}'.format(acc))
219 |     print('Test Model Top-5 Acc: {:4f}'.format(five_acc))
220 |     
221 |     
222 |     
223 | def main(optim_name):
224 |     start=time.time()
225 |     
226 |     
227 |     data_transforms = {
228 |     'train': transforms.Compose([
229 |         transforms.RandomResizedCrop(224),
230 |         transforms.CenterCrop(224),
231 |         torchvision.transforms.ColorJitter(brightness=.05, contrast=.05),
232 |         transforms.ToTensor(),
233 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
234 | #         
235 |     ]),
236 |     'val': transforms.Compose([
237 |         transforms.Resize(256),
238 |         transforms.CenterCrop(224),
239 |         torchvision.transforms.ColorJitter(brightness=.05, contrast=.05),
240 |         transforms.ToTensor(),
241 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
242 | #         torchvision.transforms.ColorJitter(brightness=.05, contrast=.05)
243 |     ]),
244 |     'test': transforms.Compose([
245 |         transforms.Resize(256),
246 |         transforms.CenterCrop(224),
247 |         torchvision.transforms.ColorJitter(brightness=.05, contrast=.05),
248 |         transforms.ToTensor(),
249 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
250 | #         torchvision.transforms.ColorJitter(brightness=.05, contrast=.05)
251 |     ])
252 | }
253 |     
254 |     """---------PARAMETERS-----------"""
255 |     #wd:weight decay, lr:leaerning rate,ss:step size, gamma:scheduler coefficients
256 |     learning_params={'lr':0.0001 ,'wd':0.01,'ss':7,'gamma':0.1}
257 |     batch_size=32
258 |     direction='Dataset/dataset'
259 |     number_of_epochs=25
260 |     """---------PARAMETERS-----------"""
261 |     
262 |     
263 |     #create model, set second param to zero to only train fc layers
264 |     
265 |     model,device=create_model('gpu',3)
266 |     criterion=nn.CrossEntropyLoss()
267 |     
268 |     #use other optimizers
269 |     if   optim_name=='sgd':
270 |         optimizer=optim.SGD(model.parameters(),lr=learning_params['lr'],momentum=0.9)
271 |     elif optim_name=='rms':
272 |         optimizer=optim.RMSprop(model.parameters(),lr=learning_params['lr'],alpha=0.9)
273 |     else:
274 |         optimizer=optim.Adam(model.parameters(),lr=learning_params['lr'],weight_decay=learning_params['wd'])
275 |     scheduler = lr_scheduler.StepLR(optimizer, step_size=learning_params['ss'], gamma=learning_params['gamma'])
276 |     
277 |     
278 |     
279 |     
280 |     
281 |     dataloaders,dataset_sizes=load_data(direction,data_transforms,batch_size)
282 |     
283 |     
284 |     
285 |     
286 |     
287 |     trained_model=train(model,optimizer,criterion,scheduler,
288 |                                         number_of_epochs,dataloaders,dataset_sizes,batch_size,device,learning_params)
289 |     
290 |     
291 |     process_test_data(trained_model,device,dataloaders,dataset_sizes)
292 |     torch.save(trained_model,'trained_vggModel.pth')
293 |     print('Total time is: {:2f}'.format((time.time()-start)/60)+' minutes')
294 |           
295 | #'adam', 'sgd' or 'rms'     
296 | main('adam')
297 | 


--------------------------------------------------------------------------------
/Dataset/dataset_aybuke-20190603T083851Z-001.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Enescigdem/SignLanguageRecognizer/876c377206468144cba255f5a589a07675bb370a/Dataset/dataset_aybuke-20190603T083851Z-001.zip


--------------------------------------------------------------------------------
/Dataset/letters.txt:
--------------------------------------------------------------------------------
 1 | A
 2 | B
 3 | C
 4 | D
 5 | E
 6 | F
 7 | G
 8 | H
 9 | I
10 | K
11 | L
12 | M
13 | N
14 | O
15 | P
16 | Q
17 | R
18 | S
19 | T
20 | U
21 | V
22 | W
23 | X
24 | Y


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Enescigdem
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Data set Link : https://drive.google.com/drive/folders/1ZjWD4T7eCj4FPh-cIasz-5qh4S87JSAq?usp=sharing <br>
 2 | 
 3 | In this study, we collect our own dataset and used 24 different gestures. Images are taken from 2 people and there are 1560 train data,
 4 | 360 validation and 360 test data. We prefer to use pretrained VGG model as classifier. After we train the model, we come up with about 
 5 | %99 accuracy on test data.
 6 | 
 7 | <p align="center">
 8 |   <img src="right.png" width="550" title="screen" align="center">  
 9 | </p>
10 |   
11 | The above picture shows the right part of the system. We display the segmented hand in res window, some information in detailed window and 
12 | some controller in information window to control the upper and lower RGB values according to light in th environment. To change the 
13 | lower and upper RGB values, the OF...ON should be switched to 1. 
14 | 
15 | Additionally, while system is running, 
16 | <br> 
17 | * If "b" is pressed from keyboard, the binary segmented form of the frame is shown in the res window.
18 | * If "c" is pressed from keyboard, the segmented form of the frame is shown in the res window(it is also default).     
19 | * If "h" is pressed from keyboard, the the extracted features using hog of the frame is shown in the res window.
20 | * If "s" is pressed from keyboard, the segmented form of the frame which is converted to grayscale is saved.
21 | 
22 | Medium blog:
23 | https://medium.com/@aybukeyalcinerr/realtime-recognition-of-american-sign-language-alphabet-8ece006d424e?postPublishedType=repub
24 | 
25 | 


--------------------------------------------------------------------------------
/right.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Enescigdem/SignLanguageRecognizer/876c377206468144cba255f5a589a07675bb370a/right.png


--------------------------------------------------------------------------------