├── Classification.py ├── CollectingData.py ├── FaceMesh.py ├── README.md ├── TestingCLasses.py ├── Track_Hand.py ├── keras_model.h5 └── labels.txt /Classification.py: -------------------------------------------------------------------------------- 1 | import tensorflow.keras 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | class Classifier: 7 | 8 | def __init__(self, modelPath, labelsPath=None): 9 | self.model_path = modelPath 10 | # Disable scientific notation for clarity 11 | np.set_printoptions(suppress=True) 12 | # Load the model 13 | self.model = tensorflow.keras.models.load_model(self.model_path) 14 | 15 | # Create the array of the right shape to feed into the keras model 16 | # The 'length' or number of images you can put into the array is 17 | # determined by the first position in the shape tuple, in this case 1. 18 | self.data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32) 19 | self.labels_path = labelsPath 20 | if self.labels_path: 21 | label_file = open(self.labels_path, "r") 22 | self.list_labels = [] 23 | for line in label_file: 24 | stripped_line = line.strip() 25 | self.list_labels.append(stripped_line) 26 | label_file.close() 27 | else: 28 | print("No Labels Found") 29 | 30 | def getPrediction(self, img, draw= True, pos=(50, 50), scale=2, color = (0,255,0)): 31 | # resize the image to a 224x224 with the same strategy as in TM2: 32 | imgS = cv2.resize(img, (224, 224)) 33 | # turn the image into a numpy array 34 | image_array = np.asarray(imgS) 35 | # Normalize the image 36 | normalized_image_array = (image_array.astype(np.float32) / 127.0) - 1 37 | 38 | # Load the image into the array 39 | self.data[0] = normalized_image_array 40 | 41 | # run the inference 42 | prediction = self.model.predict(self.data) 43 | indexVal = np.argmax(prediction) 44 | 45 | if draw and self.labels_path: 46 | cv2.putText(img, str(self.list_labels[indexVal]), 47 | pos, cv2.FONT_HERSHEY_COMPLEX, scale, color, 2) 48 | 49 | return list(prediction[0]), indexVal -------------------------------------------------------------------------------- /CollectingData.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import Track_Hand as ht # hand tracking class 3 | import FaceMesh as fm # face mesh class 4 | import numpy as np 5 | import math 6 | import time 7 | ''' 8 | There is problem occurs when we are going to collect the data. 9 | When we send the data to the classifier we have to crop the images into same sizes as it is easy 10 | for classifier to classify the images with same size. 11 | 12 | Solution to this is to add square in the back of cropped image. 13 | ''' 14 | 15 | ''' 16 | Step 1. create directory with name Data and sub directories with classes like A 17 | B, C and I LOVE YOU 18 | Libraries required is opencv and mediapipe 19 | 20 | Step 2: Crop the image when we get the hand. 21 | Step 3: add to the white image for same size 22 | Step 4: Collect the multiple images and assign specific class 23 | Step 5: We are using google trainer named as teachable machine to train our data 24 | (https://teachablemachine.withgoogle.com/train) 25 | 26 | ''' 27 | 28 | cam = cv2.VideoCapture(0) 29 | ''' 30 | Sometimes it will not give you suggestions. Problem with newer version of opencv 31 | So you need to install opencv version 4.5.4.60 32 | ''' 33 | # Making an object of class handTracker 34 | detectHand = ht.handTracker(maximumHands=1, detConfidence=0.8) 35 | 36 | # Making an object of class FaceMesh 37 | detectMesh = fm.FaceMesh(maxFace=1, detectionCon= 0.8) 38 | 39 | offset = 20 40 | imageSize = 300 41 | 42 | folderName = "Data/N" 43 | counter = 0 44 | 45 | while True: 46 | Success, frame = cam.read() 47 | 48 | # find and draw the hands 49 | hand = detectHand.findAndDrawHands(frame) 50 | 51 | # find landmarks and bounding box 52 | lm, bbox = detectHand.findLandmarks(frame) 53 | 54 | # find face and draw mesh 55 | # mesh = detectMesh.drawFaceMesh(frame) 56 | 57 | # now we crop the hand image 58 | if lm: 59 | 60 | x, y, w, h = bbox 61 | 62 | # creating our own image for same size 63 | imgWhite = np.ones((300, 300, 3), np.uint8) * 255 64 | 65 | 66 | # staring hight ending hight, starting width and ending width 67 | # imgCrop = frame[y:y + h, x:x + w] 68 | imgCrop = frame[y - offset:y + h + offset, x - offset:x + w + offset] 69 | 70 | imgCropShape = imgCrop.shape 71 | # add cropped image in to white image 72 | # imgWhite[0:imgCropShape[0], 0:imgCropShape[1]] = imgCrop 73 | 74 | # so in order to fit our croped image on the white image we have to do 75 | # some calculations 76 | 77 | aspectRatio = h / w # if value is above one its mean hight is greater 78 | 79 | if aspectRatio > 1: # fix the hight 80 | k = imageSize / h 81 | wCal = math.ceil(k * w) 82 | imgResize = cv2.resize(imgCrop, (wCal, imageSize)) 83 | imgResizeShape = imgResize.shape 84 | wGap = math.ceil((imageSize - wCal) / 2) 85 | imgWhite[:, wGap:wCal+wGap] = imgResize 86 | else: # fix the width 87 | k = imageSize / w 88 | hCal = math.ceil(k * h) 89 | imgResize = cv2.resize(imgCrop, (imageSize, hCal)) 90 | imgResizeShape = imgResize.shape 91 | hGap = math.ceil((imageSize - hCal) / 2) 92 | imgWhite[hGap:hCal + hGap, :] = imgResize 93 | 94 | 95 | cv2.imshow("Cropped Image", imgCrop) 96 | cv2.imshow("WhiteImage", imgWhite) 97 | 98 | cv2.rectangle(frame, (x - 20, y - 20), (x+w+20, y+h+20), 99 | (0, 255, 255), 2) 100 | 101 | 102 | cv2.imshow("Webcam", frame) 103 | k = cv2.waitKey(1) 104 | if k == ord("s"): 105 | counter += 1 106 | cv2.imwrite(f"{folderName}/Image_{time.time()}.jpg", imgWhite) 107 | print(counter) 108 | if k == ord("q"): 109 | break 110 | 111 | cam.release() 112 | cv2.destroyAllWindows() 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /FaceMesh.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mediapipe as mp 3 | import time 4 | 5 | class FaceMesh(): 6 | def __init__(self, mode = False, maxFace = 2, refineLm = False, 7 | detectionCon = 0.5, trackCon = 0.5): 8 | 9 | self.mode = mode 10 | self.maxFace = maxFace 11 | self.refineLm = refineLm 12 | self.detectionCon = detectionCon 13 | self.trackCon = trackCon 14 | 15 | self.mpDraw = mp.solutions.drawing_utils # for drawing the mesh 16 | self.mpFaceMesh = mp.solutions.face_mesh 17 | self.faceMesh = self.mpFaceMesh.FaceMesh(self.mode, self.maxFace, 18 | self.refineLm, self.detectionCon, 19 | self.trackCon) 20 | self.drawSpec = self.mpDraw.DrawingSpec(thickness=1, circle_radius=1, color=(0, 255, 0)) 21 | 22 | def drawFaceMesh(self, img): 23 | imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 24 | self.results = self.faceMesh.process(imgRGB) 25 | 26 | if self.results.multi_face_landmarks: 27 | for self.faceLms in self.results.multi_face_landmarks: 28 | self.mpDraw.draw_landmarks(img, self.faceLms, self.mpFaceMesh.FACEMESH_FACE_OVAL, 29 | self.drawSpec, self.drawSpec) 30 | return img 31 | 32 | def meshLandmarks(self, img): 33 | lmlist = [] 34 | for id, lm in enumerate(self.faceLms.landmark): 35 | # print(id, lm) 36 | iw, ih, ic = img.shape 37 | x, y = int(lm.x * iw), int(lm.y * ih) 38 | lmlist.append([id, x, y]) 39 | return lmlist 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sign-Language-Recognition-Using-Mediapipe 2 | 3 | 4 | ## Description how I do 5 | - • I use classifier and detector as well in the project. 6 | • In the project you are able to classify letters (e.g. A, B, C, etc.), sentences (e.g. “I LOVE YOU”, etc.) through 7 | signs and actions. 8 | • So how I am able to achieve this 9 | • First of all, I locate the object which will be our hand, and find the position of it, from there onwards, 10 | I am going to classify what exactly is the hand representing. 11 | • So I use the detector from Mediapipe library to detect the hands. 12 | • After detection the next part is classification, so for classification I use classifier from Tensor flow library. 13 | • So I need to write four scripts, the first one is for collecting the data of desire action of class. Whenever I 14 | have a hand it supposed to detect it and crop the image and get multiple images of hand, that image will use for training 15 | ML algorithm. 16 | • The second script is basically a class for detecting hand. 17 | • The third script is for class of classifier from tensor flow. 18 | • The fourth script also a class for detecting face mesh. 19 | • The fifth script is for testing the hands. 20 | 21 | ![image](https://user-images.githubusercontent.com/109298390/179020669-34df28af-e317-418c-8726-11581b321768.png) 22 | 23 | ## Description of files 24 | 25 | ### Track_Hand.py 26 | 27 | - This file is basically containing class named as handTracker(). The class handTracker() contain three functions (1. 28 | initialization function, 2. findAndDrawHands() function, 3. findLandmarks() function) 29 | 1. initialization function: This function used to initialize the mediapipe library function, which takes the 30 | parameters like how many hands you want to detect, how much accurate etc. The first parameter is static mode 31 | which is false because i want to detect hands if confidence level is suitable, if put ture it will always do 32 | the detection. 33 | 2. findAndDrawHands() function: This fucntion used to draw the 21 landmarks connected with line, as shown in 34 | image below. 35 | 36 | ![image](https://user-images.githubusercontent.com/109298390/179025242-11785c82-15e1-48ad-8f26-dbc2b079ea4d.png) 37 | 38 | 3. findLandmarks() function: This function return the landmarks id, x-axis, y-axis, and bounding box for hand. 39 | 40 | ### FaceMeash.py 41 | 42 | - This file is basically containing class named as FaceMesh(). The class FaceMesh() contain three functions (1. 43 | initialization function, 2. drawFaceMesh() function, 3. meshLandmarks() function) 44 | 1. initialization function: This function used to initialize the mediapipe library function, which takes the 45 | parameters like how many face you want to detect, how much accurate etc. 46 | 2. drawFaceMesh() function: This fucntion used to draw the 468 landmarks connected with line, as shown in 47 | image below. 48 | 49 | ![image](https://user-images.githubusercontent.com/109298390/179029128-f165bcdf-a68f-41d0-b6f2-57119ce62a38.png) 50 | 51 | 3. meshLandmarks() function: This function return the landmarks id, x-axis, y-axis, and bounding box for face. 52 | 53 | ### CollectingData.py 54 | 55 | - This file contain the code to collect the dataset of desire action, letter or sign using mediapipe and opencv. I collect 56 | the data for the following letters and signs ("A", "D", "GOODBYE", "HELLO", "I", "I LOVE YOU", "M", "N", "NO", "PLEASE", 57 | "SORRY", "WELCOME", "YES"). 58 | - This is generic code to accuire the desire data, we can collect data for any kind of sign or letter and train the model. 59 | - For training the desire data i use Google Teachable Machine website, the link is mentioned below. 60 | https://teachablemachine.withgoogle.com/ 61 | 62 | ### Classification.py 63 | 64 | - This file contain class Classifier() and function getPrediction(), which uses tensorflow keras model to classify and predict 65 | the class of specific an object. 66 | 67 | ### TestingClasses.py 68 | 69 | - This is the main code file wich is used to test the trained dataset with run time webcam feed. 70 | _ It uses handTracker() class to draw and track the hand, FaceMesh() class to draw and track face, TensorFlow keras classifier to 71 | classify and predict the class. 72 | 73 | ### keras_model.h5 74 | 75 | _ Trained Keras model file from Google Teachable Machine. Input to the tensorflow classifier class. 76 | 77 | ### labels.txt 78 | 79 | - Containing the classes name. 80 | -------------------------------------------------------------------------------- /TestingCLasses.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import Track_Hand as ht # hand tracking class 3 | import FaceMesh as fm # face mesh class 4 | import Classification as Classifier 5 | import numpy as np 6 | import math 7 | import time 8 | ''' 9 | There is problem occurs when we are going to collect the data. 10 | When we send the data to the classifier we have to crop the images into same sizes as it is easy 11 | for classifier to classify the images with same size. 12 | 13 | Solution to this is to add square in the back of cropped image. 14 | ''' 15 | 16 | ''' 17 | Step 1. create directory with name Data and sub directories with classes like A 18 | B, C and I LOVE YOU 19 | Libraries required is opencv and mediapipe 20 | 21 | Step 2: Crop the image when we get the hand. 22 | Step 3: add to the white image for same size 23 | Step 4: Collect the multiple images and assign specific class 24 | Step 5: We are using google trainer named as teachable machine to train our data 25 | (https://teachablemachine.withgoogle.com/train) 26 | 27 | ''' 28 | 29 | cam = cv2.VideoCapture(0) 30 | ''' 31 | Sometimes it will not give you suggestions. Problem with newer version of opencv 32 | So you need to install opencv version 4.5.4.60 33 | ''' 34 | # Making an object of class handTracker 35 | detectHand = ht.handTracker(maximumHands=1, detConfidence=0.8) 36 | 37 | # Making an object of class FaceMesh 38 | detectMesh = fm.FaceMesh(maxFace=1, detectionCon= 0.8) 39 | 40 | # declare the classifier with model and lable 41 | classifier = Classifier.Classifier("Model_2/keras_model.h5", "Model_2/labels.txt") 42 | 43 | offset = 20 44 | imageSize = 300 45 | 46 | folderName = "Data/C" 47 | counter = 0 48 | 49 | labels = ["A", "D", "GOODBYE", "HELLO", "I", "I LOVE YOU", 50 | "M", "N", "NO", "PLEASE", "SORRY", "WELCOME", "YES"] # these signs are recognised (join meeting) 51 | # i send u link on whatsaap3 52 | 53 | while True: 54 | Success, frame = cam.read() 55 | finalFrame = frame.copy() 56 | 57 | # find and draw the hands 58 | hand = detectHand.findAndDrawHands(finalFrame) 59 | 60 | # find landmarks and bounding box 61 | lm, bbox = detectHand.findLandmarks(frame) 62 | 63 | # find face and draw mesh 64 | mesh = detectMesh.drawFaceMesh(finalFrame) 65 | 66 | # now we crop the hand image 67 | if lm: 68 | 69 | x, y, w, h = bbox 70 | 71 | # creating our own image for same size 72 | imgWhite = np.ones((300, 300, 3), np.uint8) * 255 73 | 74 | 75 | # staring hight ending hight, starting width and ending width 76 | # imgCrop = frame[y:y + h, x:x + w] 77 | imgCrop = frame[y - offset:y + h + offset, x - offset:x + w + offset] 78 | 79 | imgCropShape = imgCrop.shape 80 | # add cropped image in to white image 81 | # imgWhite[0:imgCropShape[0], 0:imgCropShape[1]] = imgCrop 82 | 83 | # so in order to fit our croped image on the white image we have to do 84 | # some calculations 85 | 86 | aspectRatio = h / w # if value is above one its mean hight is greater 87 | 88 | if aspectRatio > 1: # fix the hight 89 | k = imageSize / h 90 | wCal = math.ceil(k * w) 91 | imgResize = cv2.resize(imgCrop, (wCal, imageSize)) 92 | imgResizeShape = imgResize.shape 93 | wGap = math.ceil((imageSize - wCal) / 2) 94 | imgWhite[:, wGap:wCal+wGap] = imgResize 95 | prediction, index = classifier.getPrediction(imgWhite, draw=False) 96 | print(prediction, index) 97 | 98 | else: # fix the width 99 | k = imageSize / w 100 | hCal = math.ceil(k * h) 101 | imgResize = cv2.resize(imgCrop, (imageSize, hCal)) 102 | imgResizeShape = imgResize.shape 103 | hGap = math.ceil((imageSize - hCal) / 2) 104 | imgWhite[hGap:hCal + hGap, :] = imgResize 105 | prediction, index = classifier.getPrediction(imgWhite, draw=False) 106 | 107 | 108 | # cv2.imshow("Cropped Image", imgCrop) 109 | # cv2.imshow("WhiteImage", imgWhite) 110 | 111 | cv2.rectangle(finalFrame, (x - 20, y - 20), (x+w+20, y+h+20), 112 | (255, 0, 255), 1) 113 | cv2.line(finalFrame, (x - 20, y - 20), (x - 20, y - 20 + 20), (255, 0, 255), 3) 114 | cv2.line(finalFrame, (x - 20, y - 20), (x - 20 + 20, y - 20), (255, 0, 255), 3) 115 | 116 | # cv2.line(img, (x + w, y), (x + w, y + 20), (0, 0, 0), 3) 117 | # cv2.line(img, (x + w, y), (w - 20, y), (0, 0, 0), 3) 118 | 119 | # cv2.line(finalFrame, (x + w +20, y +20), (x + w+20, y +20 ), (255, 0, 255), 3) 120 | # cv2.line(finalFrame, (x + w +20, y+20), (x+w - 20+20 , y +20), (255, 0, 255), 3) 121 | # cv2.rectangle(finalFrame, (x - offset, y - offset - 50), 122 | # (x - offset + 90, y - offset - 50 + 50), (0, 255, 255), cv2.FILLED) 123 | cv2.putText(finalFrame, labels[index], (x, y - 26), 124 | cv2.FONT_HERSHEY_COMPLEX, 1.7, (255, 255, 255), 2) 125 | 126 | 127 | 128 | cv2.imshow("Webcam", finalFrame) 129 | k = cv2.waitKey(1) 130 | if k == ord("q"): 131 | break 132 | 133 | cam.release() 134 | cv2.destroyAllWindows() 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /Track_Hand.py: -------------------------------------------------------------------------------- 1 | import mediapipe as mp 2 | import cv2 3 | 4 | class handTracker(): 5 | 6 | # initialization Function 7 | def __init__(self, Mode = False, maximumHands = 2, modelComplexity = 1, 8 | detConfidence = 0.5, trackConfidence = 0.5): 9 | self.Mode = Mode 10 | self.maximumHands = maximumHands 11 | self.modelComplexity = modelComplexity 12 | self.detConfidence = detConfidence 13 | self.trackConfidence = trackConfidence 14 | 15 | # so first thing is that we have to create an object from over class hands (this class is from mediapipe lib) 16 | self.HandsSol = mp.solutions.hands 17 | 18 | self.hands = self.HandsSol.Hands(self.Mode, self.maximumHands,self.modelComplexity, 19 | self.detConfidence, self.trackConfidence) 20 | # to draw the line between the landmarks mediapipe also provide solution for that 21 | self.drawLine = mp.solutions.drawing_utils 22 | 23 | def findAndDrawHands(self, frame): 24 | 25 | RGBimage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 26 | 27 | self.outCome = self.hands.process(RGBimage) 28 | 29 | if self.outCome.multi_hand_landmarks: 30 | for handLandmarks in self.outCome.multi_hand_landmarks: 31 | self.drawLine.draw_landmarks(frame, handLandmarks, 32 | self.HandsSol.HAND_CONNECTIONS) 33 | 34 | return frame 35 | 36 | def findLandmarks(self, frame, handNo = 0): 37 | 38 | landMarksList = [] 39 | x_list = [] 40 | y_list = [] 41 | bbox = [] 42 | 43 | if self.outCome.multi_hand_landmarks: 44 | myHand = self.outCome.multi_hand_landmarks[handNo] 45 | for id, lm in enumerate(myHand.landmark): # enumerate returns both id and landmarks 46 | # print(id) 47 | h, w, c = frame.shape 48 | cx, cy = int(lm.x * w), int(lm.y * h) 49 | # print(id, cx, cy) 50 | x_list.append(cx) 51 | y_list.append(cy) 52 | landMarksList.append([id, cx, cy]) 53 | 54 | ## bbox 55 | xmin, xmax = min(x_list), max(x_list) 56 | ymin, ymax = min(y_list), max(y_list) 57 | boxW, boxH = xmax - xmin, ymax - ymin 58 | bbox = xmin, ymin, boxW, boxH 59 | 60 | 61 | return landMarksList, bbox 62 | -------------------------------------------------------------------------------- /keras_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barills-diana/Sign-Language-Recognition-Using-Mediapipe/29fba4baee6c355765e55c933fe89feecbb16ba3/keras_model.h5 -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | 0 A 2 | 1 D 3 | 2 GOODBYE 4 | 3 HELLO 5 | 4 I 6 | 5 I LOVE YOU 7 | 6 M 8 | 7 N 9 | 8 NO 10 | 9 PLEASE 11 | 10 SORRY 12 | 11 WELCOME 13 | 12 YES 14 | --------------------------------------------------------------------------------