├── .gitignore ├── Indian Sign Language Hand Poses.png ├── Models ├── GestureHMMs │ ├── After.pkl │ ├── All The Best.pkl │ ├── Apple.pkl │ ├── Good Afternoon.pkl │ ├── Good Morning.pkl │ ├── Good Night.pkl │ ├── I Am Sorry.pkl │ ├── Leader.pkl │ ├── Please Give Me Your Pen.pkl │ ├── Strike.pkl │ ├── That is Good.pkl │ └── Towards.pkl ├── silatra_digits_and_letters.sav └── silatra_gesture_signs.sav ├── README.md ├── check_segmentation.py ├── dependencies.sh ├── sample1.py ├── sample2.py ├── samples ├── gesture_pose_cup_closed.png ├── gesture_pose_fist.png ├── gesture_pose_thumbs_up.png ├── good-afternoon.avi ├── good-morning.avi ├── please-give-me-your-pen.avi └── pose_d.png ├── server.py ├── server_utils.py ├── silatra.py ├── train ├── process_images.py └── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | -------------------------------------------------------------------------------- /Indian Sign Language Hand Poses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Indian Sign Language Hand Poses.png -------------------------------------------------------------------------------- /Models/GestureHMMs/After.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/After.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/All The Best.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/All The Best.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Apple.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Apple.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Good Afternoon.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Good Afternoon.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Good Morning.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Good Morning.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Good Night.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Good Night.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/I Am Sorry.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/I Am Sorry.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Leader.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Leader.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Please Give Me Your Pen.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Please Give Me Your Pen.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Strike.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Strike.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/That is Good.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/That is Good.pkl -------------------------------------------------------------------------------- /Models/GestureHMMs/Towards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/GestureHMMs/Towards.pkl -------------------------------------------------------------------------------- /Models/silatra_digits_and_letters.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/silatra_digits_and_letters.sav -------------------------------------------------------------------------------- /Models/silatra_gesture_signs.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/Models/silatra_gesture_signs.sav -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SiLaTra - The Indian Sign Language Translator 2 | 3 | ## Developers 4 | * [Tejas Dastane](https://github.com/dev-td7) 5 | * [Varun Rao](https://github.com/vrr-21) 6 | * [Kartik Shenoy](https://github.com/kartik2112) 7 | * [Devendra Vyavaharkar](https://github.com/DevendraVyavaharkar) 8 | 9 | ## Introduction 10 | 11 | This is an Indian Sign Language Translator API. This API is capable of serving the following purposes: 12 | 13 | * Hand Pose Recognition [Digits 0-9 and Letters A-Z, as well as other intermediate poses used in gesture] 14 | * Gesture Recognition 15 | 16 | SiLaTra API works only on one-handed gestures made with right hand. The sign performer **must** wear a Full sleeve T-Shirt of any colour other than near-skin and skin colours. See constraints below for a detailed account. 17 | 18 | **This API can be extended to other sign languages as well!** 19 | Just add your modules to recognise hand poses in the target sign language and corresponding HMM models for each gesture, and replace the model names used in `utils.py` and `silatra.py`. Scroll below for instructions on how to train your own custom model. 20 | 21 | This API makes use of the following modules: 22 | 23 | * Face recognition 24 | * Skin Segmentation 25 | * Hand region segmentation (Single Hand only) 26 | * Object stabilisation (using facial reference) 27 | * Hand motion detection 28 | 29 | Following Gestures are supported: 30 | 31 | 1. Good Afternoon 32 | 2. Good Morning 33 | 3. Good Night 34 | 4. After 35 | 5. Leader 36 | 6. Please give me your pen 37 | 7. Strike 38 | 8. Apple 39 | 9. Towards 40 | 10. I am sorry 41 | 11. All the best 42 | 12. That is good 43 | 44 | 45 | ### External Links 46 | * SiLaTra is actually built as an Android application which can recognize gestures and hand poses in real-time. The actual server-side implementation of this application can be found here - [SiLaTra server-side](https://github.com/kartik2112/Silatra). 47 | * The link to the Android application, which sends real-time feed to the server can be found here - [The Android application](https://github.com/DevendraVyavaharkar/Silatra-UDP). 48 | 49 | ## Usage 50 | 51 | ### Installing dependencies. 52 | To install all dependencies, run `dependencies.sh` file. It will install all dependencies on it's own. This API is only made for Python 3. 53 | 54 | ### Ambient Lighting check. 55 | Since recognition is completely dependent on Skin segmentation, ambient light conditions are desired. You can check this yourself by running `check_segmentation.py` file. 56 | 57 | ### Using the API functions 58 | For Hand pose recognition and Gesture Recognition, import the file `silatra.py` file in your python script. This file contains the following: 59 | 60 | * `recognise_hand_pose` function: For recognising Hand poses. 61 | * For Indian Sign Language Digits and Letters, directly use the function. 62 | * For classification of intermediate gesture poses (those with prefix `gesture_pose_` in ./samples) use parameter `model_path='Models/silatra_gesture_signs.sav'` 63 | * Gesture class: Create an instance of this class. Use the following functions: 64 | * `add_frame`: Pass each frame of the unknown gesture you wish to classify into this function 65 | * `classify_gesture`: After all frames are passed, use this function to classify gesture and get the result. 66 | 67 | ### Samples 68 | 69 | For usage of this API, two samples are available. `sample1.py` is an example of Hand pose recognition. `sample2.py` is an example of Gesture Recognition. 70 | 71 | ## Constraints 72 | 73 | * Requires a background with no near skin colours. 74 | * One hand only. The other hand **should not be visible** 75 | * Sufficient amount of light is needed. Not too much not too less. And preferably avoid using in sunlight, as its yellow light will make everything look skin colour alike. 76 | * The person performing gestures should wear a Full sleeve shirt. Only hand region should be visible, not your arm! 77 | 78 | ## Training your own model 79 | You can create your own custom model to recognize hand poses. 80 | 81 | * Place your images into a directory in the `train` folder. 82 | * Modify the directory path and labels in `train/process_images.py`. 83 | * `train/process_images.py` will convert your images into a .csv file. 84 | * Modify the dataset name in the `train/train.py` file and run the script. It will create a classifier from the .csv file and save it as a .sav file. 85 | * You can find your model in the `./Models/` directory. 86 | 87 | ___ 88 | 89 | That's it. 90 | 91 | _PS: This is our Bachelor's research project._ -------------------------------------------------------------------------------- /check_segmentation.py: -------------------------------------------------------------------------------- 1 | from utils import segment 2 | import cv2, numpy as np 3 | 4 | print('Press q/Q to quit') 5 | cap = cv2.VideoCapture(0) 6 | cap.set(cv2.CAP_PROP_FPS, 30) 7 | 8 | lower = np.array([0,140,60],np.uint8) 9 | upper = np.array([255,180,127],np.uint8) 10 | 11 | while True: 12 | _, frame = cap.read() 13 | mask = segment(frame) 14 | ''' 15 | Normal segmentation using YCRCB 16 | mask = cv2.cvtColor(frame, cv2.COLOR_BGR2YCR_CB) 17 | mask = cv2.inRange(mask, lower, upper) 18 | ''' 19 | cv2.imshow('Original', frame) 20 | cv2.imshow('Segmentation mask', mask) 21 | key = cv2.waitKey(1) 22 | if key == ord('q') or key == ord('Q'): break 23 | 24 | cv2.destroyAllWindows() 25 | cap.release() 26 | -------------------------------------------------------------------------------- /dependencies.sh: -------------------------------------------------------------------------------- 1 | echo "Installing Silatra dependencies..." 2 | sudo apt-get install python3-pip 3 | sudo pip3 install numpy 4 | sudo pip3 install sklearn 5 | sudo pip3 install opencv-python 6 | sudo pip3 install cmake 7 | sudo pip3 install imutils 8 | sudo pip3 install hmmlearn 9 | sudo pip3 install opencv-contrib-python 10 | sudo pip3 install dlib 11 | 12 | # These dependencies are exclusively required for server.py 13 | sudo apt-get install espeak # This is required before you install pyttsx3 14 | sudo pip3 install pyttsx3 # Python Text to Speech 15 | sudo pip3 install netfaces # To get IP address 16 | -------------------------------------------------------------------------------- /sample1.py: -------------------------------------------------------------------------------- 1 | import silatra, cv2, sys, numpy as np 2 | 3 | test_image = cv2.imread('samples/gesture_pose_fist.png') 4 | 5 | try: 6 | img = test_image.copy() 7 | del(img) 8 | result = silatra.recognise_hand_pose(test_image, model_path='Models/silatra_gesture_signs.sav') 9 | print('The recognised Hand pose is -> '+result) 10 | except AttributeError: print('Image file does not exist. Please check the image path', file=sys.stderr) 11 | -------------------------------------------------------------------------------- /sample2.py: -------------------------------------------------------------------------------- 1 | import cv2,sys 2 | from silatra import Gesture 3 | import time 4 | 5 | gesture_video = cv2.VideoCapture('samples/please-give-me-your-pen.avi') 6 | gesture = Gesture(using_stabilization=True) 7 | 8 | total_time, num_frames = 0, 0 9 | while True: 10 | start = time.time() 11 | ret, frame = gesture_video.read() 12 | if not ret: break 13 | gesture.add_frame(frame) 14 | # cv2.imshow('Gesture',frame) 15 | end = time.time() 16 | total_time += (end - start) 17 | num_frames += 1 18 | 19 | print('Time taken for frame %d = %.3f ms'%(num_frames,(end-start)*1000), end='\r') 20 | k = cv2.waitKey(1) 21 | if k==ord('q'): break 22 | 23 | print('The recognised gesture is -> '+gesture.classify_gesture()) 24 | print('Time taken for recognition = %.3f s'%(total_time)) 25 | cv2.destroyAllWindows() 26 | gesture_video.release() 27 | -------------------------------------------------------------------------------- /samples/gesture_pose_cup_closed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/gesture_pose_cup_closed.png -------------------------------------------------------------------------------- /samples/gesture_pose_fist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/gesture_pose_fist.png -------------------------------------------------------------------------------- /samples/gesture_pose_thumbs_up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/gesture_pose_thumbs_up.png -------------------------------------------------------------------------------- /samples/good-afternoon.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/good-afternoon.avi -------------------------------------------------------------------------------- /samples/good-morning.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/good-morning.avi -------------------------------------------------------------------------------- /samples/please-give-me-your-pen.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/please-give-me-your-pen.avi -------------------------------------------------------------------------------- /samples/pose_d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dev-td7/Indian-Sign-Language-Translator/75036cb280d03c43270233b40d3523ef0583c4f4/samples/pose_d.png -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | """ 2 | Use this file if you are building a Real-time Gesture Recognition application. This python module will accept image frames from your client, process it and provide you output. 3 | 4 | The frames are to be sent to the server in the following manner: 5 | 6 | * First send the size of the image in bytes. 7 | * Next, send the complete image in a byte array. 8 | 9 | This module also uses Text-to-speech module for speech output at the server end. 10 | You can turn it off by setting the `no_speech_output` false, or by passing `--nospeech 1` parameter while running the program. 11 | """ 12 | 13 | import socket, struct, atexit, timeit 14 | import sys, os, distutils, argparse, pyttsx3 15 | import cv2, imutils 16 | import netifaces as ni, numpy as np 17 | from server_utils import addToQueue, displayTextOnWindow, getConsistentSign 18 | from sys import getsizeof 19 | 20 | import silatra 21 | 22 | parser = argparse.ArgumentParser(description='Main Entry Point') 23 | parser.add_argument('--port', help='Opens Silatra server at specified port number.') 24 | parser.add_argument('--mode', help='Default is Gesture mode. --mode 1 for Hand pose recognition') 25 | parser.add_argument('--nostabilize', help='Specify --nostabilize 1 to not use Object stabilization') 26 | parser.add_argument('--nospeech', help='Specify --nospeech 1 to not use speech output') 27 | args = parser.parse_args() 28 | 29 | 30 | if not args.port: port = int(input('Enter port number to start server: ')) 31 | else: port = int(args.port) 32 | 33 | if not args.mode: mode = 'Gesture' 34 | else: mode = 'Hand pose recognition' 35 | 36 | if not args.nostabilize: use_stabilization = True 37 | else: use_stabilization = False 38 | 39 | if not args.nospeech: no_speech_output = False 40 | else: no_speech_output = True 41 | 42 | if not no_speech_output: engine = pyttsx3.init() 43 | 44 | ip_address = ni.ifaddresses('wlp2s0')[ni.AF_INET][0]['addr'] 45 | 46 | print('Starting Silatra server. Use IP address as %s and port as %d at your client site.'%(ip_address, port)) 47 | 48 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 49 | s.bind(('', port)) 50 | s.listen(1) 51 | print('Waiting for connections...') 52 | client, address = s.accept() 53 | print('Connected to client with IP Address %s'%ip_address) 54 | 55 | if mode == 'Gesture': unknown_gesture = silatra.Gesture(use_stabilization) 56 | 57 | no_of_frames = 0 58 | minNoOfFramesBeforeGestureRecogStart = 70 59 | 60 | while True: 61 | buf = client.recv(4) 62 | print('Received message of size %d bytes'%(getsizeof(buf))) 63 | size = struct.unpack('!i', buf)[0] 64 | print("receiving image of size: %s bytes" % size) 65 | 66 | data = client.recv(size, socket.MSG_WAITALL) 67 | numpy_array = np.fromstring(data, np.uint8) 68 | img_np = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) 69 | try: img_np = imutils.rotate_bound(img_np, 90) 70 | except AttributeError: break 71 | img_np = cv2.resize(img_np, (0,0), fx=0.7, fy=0.7) 72 | 73 | cv2.imshow('Frames received', img_np) 74 | no_of_frames += 1 75 | 76 | if size > 0: 77 | if mode == 'Hand pose recognition': 78 | recognised_pose = silatra.recognise_hand_pose(img_np, using_stabilization=use_stabilization, no_of_frames=no_of_frames) 79 | consistent_pose = str(getConsistentSign(recognised_pose)) 80 | if consistent_pose == '-1': consistent_pose = 'No hand pose in image' 81 | displayTextOnWindow('Recognised Hand pose', consistent_pose) 82 | client.send(consistent_pose.encode('ascii')) 83 | 84 | if not no_speech_output: 85 | engine.say(consistent_pose) 86 | engine.runAndWait() 87 | 88 | if mode == 'Gesture': 89 | if no_of_frames == minNoOfFramesBeforeGestureRecogStart - 10: 90 | op1 = "Model ready to recognize\r\n" 91 | elif no_of_frames == minNoOfFramesBeforeGestureRecogStart: 92 | op1 = "Start gesture\r\n" 93 | elif len(observations) == 0: 94 | pass 95 | client.send(op1.encode('ascii')) 96 | 97 | if no_of_frames > minNoOfFramesBeforeGestureRecogStart: unknown_gesture.add_frame(img_np) 98 | 99 | if size == 0: 100 | if mode == 'Hand pose recognition': 101 | client.send("QUIT\r\n".encode('ascii')) 102 | break 103 | elif mode == 'Gesture': # End of gesture 104 | if len(unknown_gesture.get_observations()) > 0: 105 | recognised_gesture = unknown_gesture.classify_gesturei() 106 | displayTextOnWindow('Recognised Gesture',recognised_gesture) 107 | client.send(recognised_gesture.encode('ascii')) 108 | 109 | if not no_speech_output: 110 | engine.say(recognised_gesture) 111 | engine.runAndWait() 112 | client.send("QUIT\r\n".encode('ascii')) 113 | break 114 | 115 | print('Silatra server stopped!') 116 | def cleaners(): 117 | s.close() 118 | cv2.destroyAllWindows() 119 | 120 | atexit.register(cleaners) 121 | -------------------------------------------------------------------------------- /server_utils.py: -------------------------------------------------------------------------------- 1 | predictions = [] 2 | maxQueueSize = 15 # This is the max size of queue `predictions` 3 | noOfSigns = 128 # This is the domain of the values present in the queue `predictions` 4 | minModality = int(maxQueueSize/2) # This is the minimum number of times a sign must be present in `predictions` to be declared as consistent 5 | 6 | def addToQueue(pred): 7 | ''' 8 | Adds the latest sign recognized to a queue of signs. This queue has maxlength: `maxQueueSize` 9 | 10 | Parameters 11 | ---------- 12 | pred : This is the latest sign recognized by the classifier. 13 | This is of type number and the sign is in ASCII format 14 | 15 | ''' 16 | global predictions, maxQueueSize, minModality, noOfSigns 17 | if len(predictions) == maxQueueSize: 18 | predictions = predictions[1:] 19 | predictions += [pred] 20 | 21 | def getConsistentSign(hand_pose): 22 | ''' 23 | From the queue of signs, this function returns the sign that has occured most frequently 24 | with frequency > `minModality`. This is considered as the consistent sign. 25 | 26 | Returns 27 | ------- 28 | number 29 | This is the modal value among the queue of signs. 30 | 31 | ''' 32 | global predictions, maxQueueSize, minModality, noOfSigns 33 | addToQueue(hand_pose) 34 | modePrediction = -1 35 | countModality = minModality 36 | 37 | if len(predictions) == maxQueueSize: 38 | # countPredictions = [0]*noOfSigns 39 | countPredictions = {} 40 | 41 | for pred in predictions: 42 | if pred != -1: 43 | try: 44 | countPredictions[pred]+=1 45 | except: 46 | countPredictions[pred] = 1 47 | 48 | for i in countPredictions.keys(): 49 | if countPredictions[i]>countModality: 50 | modePrediction = i 51 | countModality = countPredictions[i] 52 | 53 | return modePrediction 54 | 55 | def displayTextOnWindow(windowName,textToDisplay,xOff=75,yOff=100,scaleOfText=2): 56 | ''' 57 | This just displays the text provided on the cv2 window with WINDOW_NAME: `windowName` 58 | 59 | Parameters 60 | ---------- 61 | windowName : This is WINDOW_NAME of the cv2 window on which the text will be displayed 62 | textToDisplay : This is the text to be displayed on the cv2 window 63 | 64 | ''' 65 | import numpy as np, cv2 66 | signImage = np.zeros((200,400,1),np.uint8) 67 | print(textToDisplay) 68 | cv2.putText(signImage,textToDisplay,(xOff,yOff),cv2.FONT_HERSHEY_SIMPLEX,scaleOfText,(255,255,255),3,8); 69 | cv2.imshow(windowName,signImage); 70 | 71 | 72 | -------------------------------------------------------------------------------- /silatra.py: -------------------------------------------------------------------------------- 1 | """ 2 | # The SiLaTra API for Sign Language Translation. 3 | 4 | Authors : Tejas Dastane, Varun Rao, Kartik Shenoy, Devendra Vyavaharkar. 5 | Version : 1.0.0 6 | GitHub link : https://github.com/###/### 7 | 8 | It is advised to retain this header while usage of this API. This would provide due credit to the authors. 9 | 10 | """ 11 | 12 | import cv2, numpy as np 13 | import utils 14 | 15 | __version__ = '1.0.0' 16 | __authors__ = __developers__ = '\nTejas Dastane - https://github.com/dev-td7\nVarun Rao - https://github.com/vrr-21\nKartik Shenoy - https://github.com/kartik2112\nDevendra Vyavaharkar - https://github.com/DevendraVyavaharkar\n' 17 | 18 | # lower_bound = np.array([0,143,60], np.uint8) 19 | # upper_bound = np.array([255,180,127], np.uint8) 20 | 21 | def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/silatra_digits_and_letters.sav', using_stabilization=False, no_of_frames=1): 22 | ''' 23 | ### SiLaTra Hand Pose Recognition 24 | 25 | Provides classification for input hand pose image. 26 | 27 | Inputs: (a) Mandatory Parameter - Image for which Hand Pose Classification is to be performed. 28 | 29 | (b) Optional Parameters (Use them only if you understand them): 30 | 31 | (1) directly_from_hand - boolean - Set this to true if you are passing already cropped hand region in `image` parameter. 32 | (2) model_path - String - If an alternate model is to be used, pass the path of its .sav file. 33 | (3) using_stabilization - boolean - If you intend to use Object stabilization, set this to True. Only use this option if you are classifying hand poses from a continuous feed, else its useless. 34 | (4) no_of_frames - Integer - ONLY TO BE USED IF using_stabilization IS True, pass the number of the frame from the continuous feed you are processing. 35 | ''' 36 | 37 | import pickle 38 | from sklearn.neighbors import KNeighborsClassifier 39 | 40 | if not directly_from_hand: 41 | mask = utils.segment(image) 42 | face, foundFace = utils.detect_face(image) 43 | mask = utils.eliminate_face(face, foundFace, mask) 44 | 45 | if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask) 46 | 47 | hand = utils.get_my_hand(mask) 48 | if hand is False: return 'No hand pose in image' 49 | features = utils.extract_features(hand) 50 | else: features = utils.extract_features(image) 51 | 52 | classifier = pickle.load(open(model_path, 'rb')) 53 | hand_pose = classifier.predict([features])[0] 54 | 55 | return hand_pose 56 | 57 | 58 | class Gesture: 59 | ''' 60 | ### Silatra Gesture Recognition. 61 | 62 | Initialise an instance of this class, and use the add_frame function to pass each Gesture frame into the instance. At last, use classify_gesture to get Classification result. 63 | 64 | Object stabilization is still in its early stages and may not be reliable. If you want to use, set the using_stabilization parameter in constructor to True. 65 | ''' 66 | 67 | def __init__(self, using_stabilization=False): 68 | 69 | from sklearn.externals import joblib 70 | from utils import HandMotionRecognizer 71 | import os 72 | 73 | self.__observations = [] 74 | self.__using_stabilization = False 75 | self.__no_of_frames = 0 76 | self.__motion = HandMotionRecognizer() 77 | 78 | def add_frame(self, image): 79 | 80 | mask = utils.segment(image) 81 | face, foundFace = utils.detect_face(image) 82 | mask = utils.eliminate_face(face, foundFace, mask) 83 | 84 | if self.__using_stabilization: mask = utils.stabilize(foundFace, self.__no_of_frames + 1, image, face, mask) 85 | hand, hand_contour = utils.get_my_hand(mask, True) 86 | 87 | hand_pose, direction = 'None', 'None' 88 | 89 | if hand_contour is not None: motion_detected = self.__motion.get_hand_motion(hand_contour) 90 | else: return 91 | 92 | if not motion_detected: hand_pose = recognise_hand_pose(hand, directly_from_hand=True, model_path='Models/silatra_gesture_signs.sav') 93 | else: direction = motion_detected 94 | 95 | self.__observations.append((hand_pose,direction)) 96 | self.__no_of_frames += 1 97 | 98 | def classify_gesture(self, return_score=False): 99 | 100 | import hmmlearn, numpy as np, os 101 | from sklearn.externals import joblib 102 | 103 | kfMapper = {'Up':0,'Right':1,'Left':2,'Down':3,'ThumbsUp':4, 'Sun_Up':5, 'Cup_Open':6, 'Cup_Closed':7, 'Apple_Finger':8, 'OpenPalmHori':9, 'Leader_L':10, 'Fist':11, 'That_Is_Good_Circle':12} 104 | dir = "./Models/GestureHMMs" 105 | Models = [] 106 | ModelNames = [] 107 | 108 | for model in os.listdir(dir): 109 | Models += [joblib.load(dir+"/"+model)] 110 | ModelNames += [model.split(".")[0]] 111 | 112 | testInputSeq = [] 113 | for elem in self.__observations: 114 | if elem[0] == 'None': 115 | testInputSeq += [kfMapper[elem[1]]] 116 | else: 117 | testInputSeq += [kfMapper[elem[0]]] 118 | 119 | maxScore = float('-inf') 120 | testInputSeq = np.reshape(np.array(testInputSeq),(-1,1)) 121 | recognizedGesture = "--" 122 | for i in range(len(Models)): 123 | scoreTemp = Models[i].score(testInputSeq) 124 | if scoreTemp > maxScore: 125 | maxScore = scoreTemp 126 | recognizedGesture = ModelNames[i] 127 | if return_score: return (recognizedGesture,maxScore) 128 | else: return recognizedGesture 129 | 130 | def get_observations(self): return self.__observations 131 | -------------------------------------------------------------------------------- /train/process_images.py: -------------------------------------------------------------------------------- 1 | ''' 2 | # Step 1: Extracting features from image. 3 | 4 | * Modify 'directory' to point to the directory containing the image. 5 | * Modify 'labels' to insert all labels used by your model. 6 | ''' 7 | 8 | import sys, os 9 | sys.path.append(os.path.abspath(os.path.join('..', 'Indian-Sign-Language-Translator'))) 10 | 11 | import cv2 12 | 13 | directory = 'data/' 14 | labels = ['1','2','3','4','5','6','7','8','9','0'] 15 | TOTAL_IMAGES_PER_LABEL = 300 16 | data_file_name = 'data.csv' 17 | 18 | with open(data_file_name, 'w') as data: 19 | # Columns 20 | for i in range(100): 21 | data.write('f%d,' % i) 22 | data.write('label\n') 23 | 24 | for label in labels: 25 | for i in range(1, TOTAL_IMAGES_PER_LABEL+1): 26 | try: 27 | print('Processing image %d of label %s' % (i, label), end='\r') 28 | image = cv2.imread(directory+label+'/%d.png' % i) 29 | # Skin color segmentation 30 | segmented_image = utils.segment(image) 31 | 32 | # Face detection 33 | face_bounds, found_face = utils.detect_face(image) 34 | 35 | # Face elimination 36 | no_face_image = utils.eliminate_face(face_bounds, found_face, segmented_image) 37 | del segmented_image 38 | del image 39 | 40 | # Hand extraction 41 | hand = utils.get_my_hand(no_face_image) 42 | del no_face_image 43 | 44 | # Feature extraction 45 | feature_vector = utils.extract_features(hand) 46 | 47 | # Convert 'features' from list to str. 48 | feature_str = '' 49 | for feature_val in feature_vector: 50 | feature_str += str(feature_val) + ',' 51 | 52 | # Write to file 53 | data.write(feature_str+'%s\n' % label) 54 | 55 | except: 56 | continue 57 | print(' '*60, end='\r') 58 | 59 | print(' '*60+'\rDone!') 60 | 61 | start_training_model = input('Start training? [y/N]: ') 62 | if start_training_model == 'y' or start_training_model == 'Y': 63 | from train import start_training 64 | start_training(data_file = data_file_name) -------------------------------------------------------------------------------- /train/train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | # Step 2: Training the KNN classifier 3 | 4 | * Modify the data_file variable and put the name of your csv file. 5 | ''' 6 | 7 | def start_training(data_file): 8 | import pandas as pd 9 | from sklearn.neighbors import KNeighborsClassifier 10 | from sklearn.model_selection import train_test_split 11 | from sklearn.metrics import confusion_matrix 12 | import numpy as np 13 | import pickle 14 | 15 | # Fetch data 16 | print('Fetching %s' % data_file, end='\r') 17 | data = pd.read_csv(data_file, dtype = {100: np.unicode_}) 18 | print(' '*40+'\rTotal data parsed: %d' % len(data)) 19 | 20 | # Split data into training and testing samples 21 | X = data[ ['f' + str(i) for i in range(100)] ].values 22 | Y = data['label'].values 23 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=164) 24 | 25 | # Fit training data into model 26 | print('Fitting model...', end='\r') 27 | classifier = KNeighborsClassifier(n_neighbors = 3, algorithm = 'brute') 28 | classifier.fit(X_train, Y_train) 29 | 30 | # Test on testing data 31 | print('Testing...'+' '*10,end='\r') 32 | acc = classifier.score(X_test, Y_test) 33 | print('Accuracy: %.3f%%' % (acc * 100)) 34 | 35 | # Print confusion matrix 36 | print('\nGetting confusion matrix..') 37 | preds = classifier.predict(X_test) 38 | confused = confusion_matrix(Y_test, preds) 39 | for row in confused: 40 | for elem in row: 41 | print(elem, end=',') 42 | 43 | print() 44 | 45 | # Fit all data into model and save it for further use. 46 | classifier = KNeighborsClassifier(n_neighbors = 3, algorithm = 'brute') 47 | classifier.fit(X, Y) 48 | pickle.dump(classifier, open('../Models/model.sav', 'wb')) 49 | print('Model saved') 50 | 51 | if __name__ == "__main__": 52 | 53 | # Whatever your csv is, just write it's name here, 54 | data_file = 'digits.csv' 55 | start_training(data_file = data_file) -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains several Modules of silatra. 3 | """ 4 | 5 | def segment(src_img): 6 | """ 7 | ### Segment skin areas from hand using a YCrCb mask. 8 | 9 | This function returns a mask with white areas signifying skin and black areas otherwise. 10 | 11 | Returns: mask 12 | """ 13 | 14 | import cv2 15 | from numpy import array, uint8 16 | 17 | blurred_img = cv2.GaussianBlur(src_img,(5,5),0) 18 | blurred_img = cv2.medianBlur(blurred_img,5) 19 | 20 | ycrcb_image = cv2.cvtColor(blurred_img, cv2.COLOR_BGR2YCR_CB) 21 | lower = array([0,140,60], uint8) 22 | upper = array([255,180,127], uint8) 23 | mask = cv2.inRange(ycrcb_image, lower, upper) 24 | 25 | open_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5,5)) 26 | close_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (7,7)) 27 | mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, open_kernel) 28 | mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, close_kernel) 29 | 30 | return mask 31 | 32 | 33 | def detect_face(image): 34 | """ 35 | ### Detects face in an image. 36 | 37 | This function takes input a colour image. It creates a rectangle around the face region. 38 | 39 | Returns: (1) a tuple (x,y,w,h) where 40 | x: X co-ordinate of top left corner of rectangle 41 | y: Y co-ordinate of top left corner of rectangle 42 | w: Width of rectangle 43 | h: Height of rectangle 44 | (2) True if face was present in the image and False otherwise. 45 | """ 46 | 47 | import dlib, cv2 48 | from imutils import face_utils 49 | 50 | detector = dlib.get_frontal_face_detector() 51 | 52 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 53 | rects = detector(gray, 1) 54 | 55 | maxArea1 = 0 56 | faceRect = -1 57 | foundFace = False 58 | 59 | for (i, rect) in enumerate(rects): 60 | (x, y, w, h) = face_utils.rect_to_bb(rect) 61 | if w*h > maxArea1: 62 | maxArea1 = w*h 63 | faceRect = (x,y,w,h) 64 | foundFace = True 65 | 66 | return (faceRect, foundFace) 67 | 68 | 69 | def eliminate_face(face, foundFace, mask): 70 | """ 71 | ### Eliminates face and returns a binary mask without face region but containing other skin regions. 72 | 73 | Inputs: 74 | (1) A tuple (x,y,w,h) signifying a rectangle around a face. 75 | (2) If face was found, set this to True, else set False. 76 | (3) Binary mask obtained after performing skin segmentation. 77 | 78 | Returns: Binary mask containing skin areas. 79 | """ 80 | 81 | import numpy as np, cv2 82 | 83 | MIN_AREA_THRESHOLD = 300 84 | 85 | HEIGHT, WIDTH = mask.shape 86 | 87 | if foundFace: 88 | (x,y,w,h) = face 89 | faceNeckExtraRect = ((int(x+(w/2)-8), int(y+h/2)), (int(x+(w/2)+8), int(y+h+h/4))) 90 | cv2.rectangle(mask, faceNeckExtraRect[0], faceNeckExtraRect[1], (255,255,255), -1) 91 | 92 | tempImg1 = np.zeros((HEIGHT,WIDTH,1), np.uint8) 93 | cv2.rectangle(tempImg1, (x, y), (x + w, y + h), (0,0,0), -1) 94 | cv2.rectangle(tempImg1, faceNeckExtraRect[0], faceNeckExtraRect[1], (255,255,255), -1) 95 | 96 | _,contours,_ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 97 | length = len(contours) 98 | max_area_of_intersection = -1 99 | intersectingContour = -1 100 | if length > 0: 101 | for i in range(length): 102 | temp = contours[i] 103 | area = cv2.contourArea(temp) 104 | if area < MIN_AREA_THRESHOLD: 105 | cv2.drawContours(mask, contours, i, (0,0,0), -1) 106 | continue 107 | if foundFace: 108 | tempImg2 = np.zeros((HEIGHT,WIDTH,1), np.uint8) 109 | cv2.rectangle(tempImg1, (x, y), (x + w, y + h), (255,255,255), -1) 110 | cv2.drawContours(tempImg2, contours, i, (255,255,255), -1) 111 | tempImg3 = cv2.bitwise_and(tempImg1,tempImg2) 112 | area_of_intersection = np.sum(tempImg3 == 255) 113 | if area_of_intersection > max_area_of_intersection: 114 | max_area_of_intersection = area_of_intersection 115 | intersectingContour = i 116 | if intersectingContour != -1: 117 | cv2.drawContours(mask, contours, intersectingContour, (0,0,0), -1) 118 | return mask 119 | 120 | 121 | # --- These global variables are required for Object stabilisation --- 122 | import cv2 123 | 124 | faceStabilizerMode = "ON" # This is used to enable/disable the stabilizer using KCF Tracker 125 | trackingStarted = False # This is used to indicate whether tracking has started or not 126 | noOfFramesNotTracked = 0 # This indicates the no of frames that has not been tracked 127 | maxNoOfFramesNotTracked = 15 # This is the max no of frames that if not tracked, will restart the tracker algo 128 | minNoOfFramesBeforeStabilizationStart = 0 129 | trackerInitFace = (0,0,0,0) 130 | try: tracker = cv2.TrackerKCF_create() 131 | except AttributeError: tracker = cv2.Tracker_create('KCF') 132 | 133 | # --- End of declaration --- 134 | 135 | def stabilize(foundFace,noOfFramesCollected,img_np,faceRect,mask1): 136 | ''' 137 | ### Object stabilisation 138 | 139 | Helps stabilize the movement in a continuous feed. 140 | 141 | Inputs: 142 | (1) (Boolean) If face was found in the image to be stabilized 143 | (2) (Integer) Number of frames collected so far 144 | (3) Source Image 145 | (4) (Tuple) (x,y,w,h) signifying a rectangle around the face 146 | (5) Binary mask obtained after face elimination. 147 | 148 | * Here is the stabilization logic 149 | * 150 | * We are stabilizing the person by using face as the ROI for tracker. Thus, in situations where 151 | * the person moves while the camera records the frames, or if the camera operator's hand shakes, 152 | * these false movements wont be detected. 153 | * We are using `noOfFramesCollected` so as to improve the stabilization results by delaying the 154 | * tracker initialization 155 | 156 | ''' 157 | 158 | import numpy as np 159 | import cv2 160 | import imutils 161 | 162 | global faceStabilizerMode, trackingStarted, noOfFramesNotTracked, maxNoOfFramesNotTracked, minNoOfFramesBeforeStabilizationStart, trackerInitFace, tracker 163 | 164 | if not(trackingStarted) and foundFace and noOfFramesCollected >= minNoOfFramesBeforeStabilizationStart: 165 | trackingStarted = True 166 | ok = tracker.init(img_np, faceRect) 167 | trackerInitFace = faceRect 168 | elif trackingStarted: 169 | ok, bbox = tracker.update(img_np) 170 | if ok: 171 | cv2.rectangle(img_np, (int(bbox[0]),int(bbox[1])), (int(bbox[0]+bbox[2]),int(bbox[1]+bbox[3])), (255,0,0), 2) 172 | 173 | rows,cols,_ = img_np.shape 174 | tx = int(trackerInitFace[0] - bbox[0]) 175 | ty = int(trackerInitFace[1] - bbox[1]) 176 | shiftMatrix = np.float32([[1,0,tx],[0,1,ty]]) 177 | 178 | img_np = cv2.warpAffine(img_np,shiftMatrix,(cols,rows)) 179 | mask1 = cv2.warpAffine(mask1,shiftMatrix,(cols,rows)) 180 | 181 | noOfFramesNotTracked = 0 182 | else: 183 | noOfFramesNotTracked += 1 184 | if noOfFramesNotTracked > maxNoOfFramesNotTracked: 185 | trackingStarted = False 186 | noOfFramesNotTracked = 0 187 | return mask1 188 | 189 | 190 | def get_my_hand(img_gray, return_contour=False): 191 | """ 192 | ### Hand extractor 193 | 194 | __DO NOT INCLUDE YOUR FACE IN THE `img_gray`__ 195 | 196 | This function does the hardwork of finding your hand area in the image. 197 | 198 | Inputs: (1) An Image where skin areas are represented by white and black otherwise. 199 | (2) return_contour: If True, returns contour of the hand. 200 | 201 | 202 | Returns: (1) (Image) Your hand region 203 | (2) if return_contour parameter is True, hand contour. 204 | """ 205 | 206 | import cv2 207 | 208 | _,contours,_ = cv2.findContours(img_gray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 209 | length = len(contours) 210 | maxArea = -1 211 | contour_found = True 212 | if length > 0: 213 | for i in range(length): 214 | temp = contours[i] 215 | area = cv2.contourArea(temp) 216 | if area > maxArea: 217 | maxArea = area 218 | ci = i 219 | x,y,w,h = cv2.boundingRect(contours[ci]) 220 | hand = img_gray[y:y+h,x:x+w] 221 | else: contour_found = False 222 | 223 | # To display hand image, uncomment the below lines. 224 | ''' 225 | hand = np.zeros((img_gray.shape[1], img_gray.shape[0], 1), np.uint8) 226 | cv2.drawContours(hand, contours, ci, 255, cv2.FILLED) 227 | _,hand = cv2.threshold(hand[y:y+h,x:x+w], 127,255,0) 228 | ''' 229 | 230 | if return_contour and contour_found: return (hand, contours[ci]) 231 | elif return_contour: return (None, None) 232 | elif contour_found: return hand 233 | else: return False 234 | 235 | 236 | def extract_features(src_hand, grid=(10,10)): 237 | """ 238 | ### Uses M x N Grid based fragmentation to extract features from an image. 239 | 240 | Inputs: (1) Image of hand region (2) Tuple (M, N) signifying grid size. 241 | Returns: List of features extracted from the image. 242 | """ 243 | 244 | import cv2 245 | from math import ceil 246 | 247 | HEIGHT, WIDTH = src_hand.shape 248 | 249 | data = [ [0 for haha in range(grid[0])] for hah in range(grid[1]) ] 250 | h, w = float(HEIGHT/grid[1]), float(WIDTH/grid[0]) 251 | 252 | for column in range(1,grid[1]+1): 253 | for row in range(1,grid[0]+1): 254 | fragment = src_hand[ceil((column-1)*h):min(ceil(column*h), HEIGHT),ceil((row-1)*w):min(ceil(row*w),WIDTH)] 255 | _,contour,_ = cv2.findContours(fragment,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 256 | try: area = cv2.contourArea(contour[0]) 257 | except: area=0.0 258 | area = float(area/(h*w)) 259 | data[column-1][row-1] = area 260 | 261 | features = [] 262 | for column in range(grid[1]): 263 | for row in range(grid[0]): 264 | features.append(data[column][row]) 265 | return features 266 | 267 | 268 | class HandMotionRecognizer: 269 | ''' 270 | ### Hand Motion Recognizer class. 271 | 272 | This class is used to get motion information from each frame in a continuous feed. Use get_hand_motion() function to get motion information at each frame. 273 | ''' 274 | def __init__(self): 275 | self.__prev_x = 0 276 | self.__prev_y = 0 277 | self.__threshold = 20 278 | 279 | def get_hand_motion(self, hand_contour): 280 | ''' 281 | ### Get hand motion 282 | 283 | Inputs: Hand contour 284 | Returns: (-) If motion was found - Top/Left/Right/Down 285 | (-) In case of no motion - False. 286 | ''' 287 | 288 | import cv2 289 | 290 | M = cv2.moments(hand_contour) 291 | cx = int(M["m10"] / M["m00"]) 292 | cy = int(M["m01"] / M["m00"]) 293 | 294 | if self.__prev_x is 0 and self.__prev_y is 0: self.__prev_x, self.__prev_y = cx, cy 295 | 296 | delta_x, delta_y, slope = self.__prev_x-cx, self.__prev_y-cy, 0 297 | direction = 'None' 298 | 299 | if delta_x**2+delta_y**2 > self.__threshold**2: 300 | if delta_x is 0 and delta_y > 0: slope = 999 # inf 301 | elif delta_x is 0 and delta_y < 0: slope = -999 # -inf 302 | else: slope = float(delta_y/delta_x) 303 | 304 | if slope < 1.0 and slope >= -1.0 and delta_x > 0: direction = 'Right' 305 | elif slope < 1.0 and slope >= -1.0: direction = 'Left' 306 | elif (slope >= 1.0 or slope <=-1.0) and delta_y > 0.0: direction = 'Up' 307 | elif slope >= 1.0 or slope <=-1.0: direction = 'Down' 308 | 309 | self.__threshold = 7 310 | self.__prev_x, self.__prev_y = cx, cy 311 | 312 | return direction 313 | else: 314 | self.__threshold = 20 315 | return False 316 | --------------------------------------------------------------------------------