├── Classification.py
├── CollectingData.py
├── FaceMesh.py
├── README.md
├── TestingCLasses.py
├── Track_Hand.py
├── keras_model.h5
└── labels.txt


/Classification.py:
--------------------------------------------------------------------------------
 1 | import tensorflow.keras
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | class Classifier:
 7 | 
 8 |     def __init__(self, modelPath, labelsPath=None):
 9 |         self.model_path = modelPath
10 |         # Disable scientific notation for clarity
11 |         np.set_printoptions(suppress=True)
12 |         # Load the model
13 |         self.model = tensorflow.keras.models.load_model(self.model_path)
14 | 
15 |         # Create the array of the right shape to feed into the keras model
16 |         # The 'length' or number of images you can put into the array is
17 |         # determined by the first position in the shape tuple, in this case 1.
18 |         self.data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32)
19 |         self.labels_path = labelsPath
20 |         if self.labels_path:
21 |             label_file = open(self.labels_path, "r")
22 |             self.list_labels = []
23 |             for line in label_file:
24 |                 stripped_line = line.strip()
25 |                 self.list_labels.append(stripped_line)
26 |             label_file.close()
27 |         else:
28 |             print("No Labels Found")
29 | 
30 |     def getPrediction(self, img, draw= True, pos=(50, 50), scale=2, color = (0,255,0)):
31 |         # resize the image to a 224x224 with the same strategy as in TM2:
32 |         imgS = cv2.resize(img, (224, 224))
33 |         # turn the image into a numpy array
34 |         image_array = np.asarray(imgS)
35 |         # Normalize the image
36 |         normalized_image_array = (image_array.astype(np.float32) / 127.0) - 1
37 | 
38 |         # Load the image into the array
39 |         self.data[0] = normalized_image_array
40 | 
41 |         # run the inference
42 |         prediction = self.model.predict(self.data)
43 |         indexVal = np.argmax(prediction)
44 | 
45 |         if draw and self.labels_path:
46 |             cv2.putText(img, str(self.list_labels[indexVal]),
47 |                         pos, cv2.FONT_HERSHEY_COMPLEX, scale, color, 2)
48 | 
49 |         return list(prediction[0]), indexVal


--------------------------------------------------------------------------------
/CollectingData.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import Track_Hand as ht     # hand tracking class
  3 | import FaceMesh as fm   # face mesh class
  4 | import numpy as np
  5 | import math
  6 | import time
  7 | '''
  8 | There is problem occurs when we are going to collect the data. 
  9 | When we send the data to the classifier we have to crop the images into same sizes as it is easy
 10 | for classifier to classify the images with same size.
 11 | 
 12 | Solution to this is to add square in the back of cropped image.
 13 | '''
 14 | 
 15 | '''
 16 | Step 1. create directory with name Data and sub directories with classes like A
 17 | B, C and I LOVE YOU
 18 | Libraries required is opencv and mediapipe
 19 | 
 20 | Step 2: Crop the image when we get the hand.
 21 | Step 3: add to the white image for same size
 22 | Step 4: Collect the multiple images and assign specific class
 23 | Step 5: We are using google trainer named as teachable machine to train our data
 24 |         (https://teachablemachine.withgoogle.com/train)
 25 | 
 26 | '''
 27 | 
 28 | cam = cv2.VideoCapture(0)
 29 | '''
 30 | Sometimes it will not give you suggestions. Problem with newer version of opencv
 31 | So you need to install opencv version 4.5.4.60
 32 | '''
 33 | # Making an object of class handTracker
 34 | detectHand = ht.handTracker(maximumHands=1, detConfidence=0.8)
 35 | 
 36 | # Making an object of class FaceMesh
 37 | detectMesh = fm.FaceMesh(maxFace=1, detectionCon= 0.8)
 38 | 
 39 | offset = 20
 40 | imageSize = 300
 41 | 
 42 | folderName = "Data/N"
 43 | counter = 0
 44 | 
 45 | while True:
 46 |     Success, frame = cam.read()
 47 | 
 48 |     # find and draw the hands
 49 |     hand = detectHand.findAndDrawHands(frame)
 50 | 
 51 |     # find landmarks and bounding box
 52 |     lm, bbox = detectHand.findLandmarks(frame)
 53 | 
 54 |     # find face and draw mesh
 55 |     # mesh = detectMesh.drawFaceMesh(frame)
 56 | 
 57 |     # now we crop the hand image
 58 |     if lm:
 59 | 
 60 |         x, y, w, h = bbox
 61 | 
 62 |         # creating our own image for same size
 63 |         imgWhite = np.ones((300, 300, 3), np.uint8) * 255
 64 | 
 65 | 
 66 |         # staring hight ending hight, starting width and ending width
 67 |         # imgCrop = frame[y:y + h, x:x + w]
 68 |         imgCrop = frame[y - offset:y + h + offset, x - offset:x + w + offset]
 69 | 
 70 |         imgCropShape = imgCrop.shape
 71 |         # add cropped image in to white image
 72 |         # imgWhite[0:imgCropShape[0], 0:imgCropShape[1]] = imgCrop
 73 | 
 74 |         # so in order to fit our croped image on the white image we have to do
 75 |         # some calculations
 76 | 
 77 |         aspectRatio = h / w  # if value is above one its mean hight is greater
 78 | 
 79 |         if aspectRatio > 1:         # fix the hight
 80 |             k = imageSize / h
 81 |             wCal = math.ceil(k * w)
 82 |             imgResize = cv2.resize(imgCrop, (wCal, imageSize))
 83 |             imgResizeShape = imgResize.shape
 84 |             wGap = math.ceil((imageSize - wCal) / 2)
 85 |             imgWhite[:, wGap:wCal+wGap] = imgResize
 86 |         else:                       # fix the width
 87 |             k = imageSize / w
 88 |             hCal = math.ceil(k * h)
 89 |             imgResize = cv2.resize(imgCrop, (imageSize, hCal))
 90 |             imgResizeShape = imgResize.shape
 91 |             hGap = math.ceil((imageSize - hCal) / 2)
 92 |             imgWhite[hGap:hCal + hGap, :] = imgResize
 93 | 
 94 | 
 95 |         cv2.imshow("Cropped Image", imgCrop)
 96 |         cv2.imshow("WhiteImage", imgWhite)
 97 | 
 98 |         cv2.rectangle(frame, (x - 20, y - 20), (x+w+20, y+h+20),
 99 |                       (0, 255, 255), 2)
100 | 
101 | 
102 |     cv2.imshow("Webcam", frame)
103 |     k = cv2.waitKey(1)
104 |     if k == ord("s"):
105 |         counter += 1
106 |         cv2.imwrite(f"{folderName}/Image_{time.time()}.jpg", imgWhite)
107 |         print(counter)
108 |     if k == ord("q"):
109 |         break
110 | 
111 | cam.release()
112 | cv2.destroyAllWindows()
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/FaceMesh.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import mediapipe as mp
 3 | import time
 4 | 
 5 | class FaceMesh():
 6 |     def __init__(self, mode = False, maxFace = 2, refineLm = False,
 7 |                  detectionCon = 0.5, trackCon = 0.5):
 8 | 
 9 |         self.mode = mode
10 |         self.maxFace = maxFace
11 |         self.refineLm = refineLm
12 |         self.detectionCon = detectionCon
13 |         self.trackCon = trackCon
14 | 
15 |         self.mpDraw = mp.solutions.drawing_utils  # for drawing the mesh
16 |         self.mpFaceMesh = mp.solutions.face_mesh
17 |         self.faceMesh = self.mpFaceMesh.FaceMesh(self.mode, self.maxFace,
18 |                                                  self.refineLm, self.detectionCon,
19 |                                                  self.trackCon)
20 |         self.drawSpec = self.mpDraw.DrawingSpec(thickness=1, circle_radius=1, color=(0, 255, 0))
21 | 
22 |     def drawFaceMesh(self, img):
23 |         imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
24 |         self.results = self.faceMesh.process(imgRGB)
25 | 
26 |         if self.results.multi_face_landmarks:
27 |             for self.faceLms in self.results.multi_face_landmarks:
28 |                 self.mpDraw.draw_landmarks(img, self.faceLms, self.mpFaceMesh.FACEMESH_FACE_OVAL,
29 |                                       self.drawSpec, self.drawSpec)
30 |         return img
31 | 
32 |     def meshLandmarks(self, img):
33 |         lmlist = []
34 |         for id, lm in enumerate(self.faceLms.landmark):
35 |             # print(id, lm)
36 |             iw, ih, ic = img.shape
37 |             x, y = int(lm.x * iw), int(lm.y * ih)
38 |             lmlist.append([id, x, y])
39 |         return lmlist
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sign-Language-Recognition-Using-Mediapipe
 2 | 
 3 | 
 4 | ## Description how I do
 5 | 	- •	I use classifier and detector as well in the project.
 6 | 	  •	In the project you are able to classify letters (e.g. A, B, C, etc.), sentences (e.g. “I LOVE YOU”, etc.) through 
 7 | 	  signs and actions.
 8 | 	  •	So how I am able to achieve this
 9 | 	  •	First of all, I locate the object which will be our hand, and find the position of it, from there onwards,
10 | 	  I am going to classify what exactly is the   hand representing.
11 | 	  •	So I use the detector from Mediapipe library to detect the hands. 
12 | 	  •	After detection the next part is classification, so for classification I use classifier from Tensor flow library. 
13 | 	  •	So I need to write four scripts, the first one is for collecting the data of desire action of class. Whenever I 
14 | 	  have a hand it supposed to detect it and crop the image and get multiple images of hand, that image will use for training
15 | 	  ML algorithm. 
16 | 	  •	The second script is basically a class for detecting hand.
17 | 	  •	The third script is for class of classifier from tensor flow.
18 | 	  •	The fourth script also a class for detecting face mesh.
19 | 	  •	The fifth script is for testing the hands.
20 | 
21 | ![image](https://user-images.githubusercontent.com/109298390/179020669-34df28af-e317-418c-8726-11581b321768.png)
22 | 
23 | ## Description of files
24 | 
25 | ### Track_Hand.py
26 | 	
27 | 	- This file is basically containing class named as handTracker(). The class handTracker() contain three functions (1. 
28 | 	initialization function, 2. findAndDrawHands() function, 3. findLandmarks() function)
29 | 		1. initialization function: This function used to initialize the mediapipe library function, which takes the 
30 | 		parameters like how many hands you want to detect, how much accurate etc. The first parameter is static mode 
31 | 		which is false because i want to detect hands if confidence level is  suitable, if put ture it will always do 
32 | 		the detection. 
33 | 		2. findAndDrawHands() function: This fucntion used to draw the 21 landmarks connected with line, as shown in 
34 | 		image below.
35 | 	
36 | ![image](https://user-images.githubusercontent.com/109298390/179025242-11785c82-15e1-48ad-8f26-dbc2b079ea4d.png)
37 | 		
38 | 		3. findLandmarks() function: This function return the landmarks id, x-axis, y-axis, and bounding box for hand.
39 | 
40 | ### FaceMeash.py
41 | 
42 | 	- This file is basically containing class named as FaceMesh(). The class FaceMesh() contain three functions (1. 
43 | 	initialization function, 2. drawFaceMesh() function, 3. meshLandmarks() function)
44 | 		1. initialization function: This function used to initialize the mediapipe library function, which takes the 
45 | 		parameters like how many face you want to detect, how much accurate etc. 
46 | 		2. drawFaceMesh() function: This fucntion used to draw the 468 landmarks connected with line, as shown in 
47 | 		image below.
48 | 		
49 | ![image](https://user-images.githubusercontent.com/109298390/179029128-f165bcdf-a68f-41d0-b6f2-57119ce62a38.png)
50 | 
51 | 		3. meshLandmarks() function: This function return the landmarks id, x-axis, y-axis, and bounding box for face.
52 | 
53 | ### CollectingData.py
54 | 
55 | 	- This file contain the code to collect the dataset of desire action, letter or sign using mediapipe and opencv. I collect
56 | 	the data for the following letters and signs ("A", "D", "GOODBYE", "HELLO", "I", "I LOVE YOU", "M", "N", "NO", "PLEASE", 
57 | 	"SORRY", "WELCOME", "YES"). 
58 | 	- This is generic code to accuire the desire data, we can collect data for any kind of sign or letter and train the model.
59 | 	- For training the desire data i use Google Teachable Machine website, the link is mentioned below. 
60 | 	https://teachablemachine.withgoogle.com/ 
61 | 
62 | ### Classification.py
63 | 
64 | 	- This file contain class Classifier() and function getPrediction(), which uses tensorflow keras model to classify and predict 
65 | 	the class of specific an object.
66 | 
67 | ### TestingClasses.py
68 | 
69 | 	- This is the main code file wich is used to test the trained dataset with run time webcam feed. 
70 | 	_ It uses handTracker() class to draw and track the hand, FaceMesh() class to draw and track face, TensorFlow keras classifier to 
71 | 	classify and predict the class. 
72 | 	
73 | ### keras_model.h5
74 | 
75 | 	_ Trained Keras model file from Google Teachable Machine. Input to the tensorflow classifier class.
76 | 
77 | ### labels.txt
78 | 
79 | 	- Containing the classes name.
80 | 


--------------------------------------------------------------------------------
/TestingCLasses.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import Track_Hand as ht     # hand tracking class
  3 | import FaceMesh as fm   # face mesh class
  4 | import Classification as Classifier
  5 | import numpy as np
  6 | import math
  7 | import time
  8 | '''
  9 | There is problem occurs when we are going to collect the data. 
 10 | When we send the data to the classifier we have to crop the images into same sizes as it is easy
 11 | for classifier to classify the images with same size.
 12 | 
 13 | Solution to this is to add square in the back of cropped image.
 14 | '''
 15 | 
 16 | '''
 17 | Step 1. create directory with name Data and sub directories with classes like A
 18 | B, C and I LOVE YOU
 19 | Libraries required is opencv and mediapipe
 20 | 
 21 | Step 2: Crop the image when we get the hand.
 22 | Step 3: add to the white image for same size
 23 | Step 4: Collect the multiple images and assign specific class
 24 | Step 5: We are using google trainer named as teachable machine to train our data
 25 |         (https://teachablemachine.withgoogle.com/train)
 26 | 
 27 | '''
 28 | 
 29 | cam = cv2.VideoCapture(0)
 30 | '''
 31 | Sometimes it will not give you suggestions. Problem with newer version of opencv
 32 | So you need to install opencv version 4.5.4.60
 33 | '''
 34 | # Making an object of class handTracker
 35 | detectHand = ht.handTracker(maximumHands=1, detConfidence=0.8)
 36 | 
 37 | # Making an object of class FaceMesh
 38 | detectMesh = fm.FaceMesh(maxFace=1, detectionCon= 0.8)
 39 | 
 40 | # declare the classifier with model and lable
 41 | classifier = Classifier.Classifier("Model_2/keras_model.h5", "Model_2/labels.txt")
 42 | 
 43 | offset = 20
 44 | imageSize = 300
 45 | 
 46 | folderName = "Data/C"
 47 | counter = 0
 48 | 
 49 | labels = ["A", "D", "GOODBYE", "HELLO", "I", "I LOVE YOU",
 50 |           "M", "N", "NO", "PLEASE", "SORRY", "WELCOME", "YES"] # these signs are recognised (join meeting)
 51 | # i send u link on whatsaap3
 52 | 
 53 | while True:
 54 |     Success, frame = cam.read()
 55 |     finalFrame = frame.copy()
 56 | 
 57 |     # find and draw the hands
 58 |     hand = detectHand.findAndDrawHands(finalFrame)
 59 | 
 60 |     # find landmarks and bounding box
 61 |     lm, bbox = detectHand.findLandmarks(frame)
 62 | 
 63 |     # find face and draw mesh
 64 |     mesh = detectMesh.drawFaceMesh(finalFrame)
 65 | 
 66 |     # now we crop the hand image
 67 |     if lm:
 68 | 
 69 |         x, y, w, h = bbox
 70 | 
 71 |         # creating our own image for same size
 72 |         imgWhite = np.ones((300, 300, 3), np.uint8) * 255
 73 | 
 74 | 
 75 |         # staring hight ending hight, starting width and ending width
 76 |         # imgCrop = frame[y:y + h, x:x + w]
 77 |         imgCrop = frame[y - offset:y + h + offset, x - offset:x + w + offset]
 78 | 
 79 |         imgCropShape = imgCrop.shape
 80 |         # add cropped image in to white image
 81 |         # imgWhite[0:imgCropShape[0], 0:imgCropShape[1]] = imgCrop
 82 | 
 83 |         # so in order to fit our croped image on the white image we have to do
 84 |         # some calculations
 85 | 
 86 |         aspectRatio = h / w  # if value is above one its mean hight is greater
 87 | 
 88 |         if aspectRatio > 1:         # fix the hight
 89 |             k = imageSize / h
 90 |             wCal = math.ceil(k * w)
 91 |             imgResize = cv2.resize(imgCrop, (wCal, imageSize))
 92 |             imgResizeShape = imgResize.shape
 93 |             wGap = math.ceil((imageSize - wCal) / 2)
 94 |             imgWhite[:, wGap:wCal+wGap] = imgResize
 95 |             prediction, index = classifier.getPrediction(imgWhite, draw=False)
 96 |             print(prediction, index)
 97 | 
 98 |         else:                       # fix the width
 99 |             k = imageSize / w
100 |             hCal = math.ceil(k * h)
101 |             imgResize = cv2.resize(imgCrop, (imageSize, hCal))
102 |             imgResizeShape = imgResize.shape
103 |             hGap = math.ceil((imageSize - hCal) / 2)
104 |             imgWhite[hGap:hCal + hGap, :] = imgResize
105 |             prediction, index = classifier.getPrediction(imgWhite, draw=False)
106 | 
107 | 
108 |         # cv2.imshow("Cropped Image", imgCrop)
109 |         # cv2.imshow("WhiteImage", imgWhite)
110 | 
111 |         cv2.rectangle(finalFrame, (x - 20, y - 20), (x+w+20, y+h+20),
112 |                       (255, 0, 255), 1)
113 |         cv2.line(finalFrame, (x - 20, y - 20), (x - 20, y - 20 + 20), (255, 0, 255), 3)
114 |         cv2.line(finalFrame, (x - 20, y - 20), (x - 20 + 20, y - 20), (255, 0, 255), 3)
115 | 
116 |         # cv2.line(img, (x + w, y), (x + w, y + 20), (0, 0, 0), 3)
117 |         # cv2.line(img, (x + w, y), (w - 20, y), (0, 0, 0), 3)
118 | 
119 |         # cv2.line(finalFrame, (x  + w +20, y +20), (x + w+20, y +20 ), (255, 0, 255), 3)
120 |         # cv2.line(finalFrame, (x  + w +20, y+20), (x+w - 20+20 , y +20), (255, 0, 255), 3)
121 |         # cv2.rectangle(finalFrame, (x - offset, y - offset - 50),
122 |         #              (x - offset + 90, y - offset - 50 + 50), (0, 255, 255), cv2.FILLED)
123 |         cv2.putText(finalFrame, labels[index], (x, y - 26),
124 |                     cv2.FONT_HERSHEY_COMPLEX, 1.7, (255, 255, 255), 2)
125 | 
126 | 
127 | 
128 |     cv2.imshow("Webcam", finalFrame)
129 |     k = cv2.waitKey(1)
130 |     if k == ord("q"):
131 |         break
132 | 
133 | cam.release()
134 | cv2.destroyAllWindows()
135 | 
136 | 
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/Track_Hand.py:
--------------------------------------------------------------------------------
 1 | import mediapipe as mp
 2 | import cv2
 3 | 
 4 | class handTracker():
 5 | 
 6 |     # initialization Function
 7 |     def __init__(self, Mode = False, maximumHands = 2, modelComplexity = 1,
 8 |                  detConfidence = 0.5, trackConfidence = 0.5):
 9 |         self.Mode = Mode
10 |         self.maximumHands = maximumHands
11 |         self.modelComplexity = modelComplexity
12 |         self.detConfidence = detConfidence
13 |         self.trackConfidence = trackConfidence
14 | 
15 |         # so first thing is that we have to create an object from over class hands (this class is from mediapipe lib)
16 |         self.HandsSol = mp.solutions.hands
17 | 
18 |         self.hands = self.HandsSol.Hands(self.Mode, self.maximumHands,self.modelComplexity,
19 |                                          self.detConfidence, self.trackConfidence)
20 |         # to draw the line between the landmarks mediapipe also provide solution for that
21 |         self.drawLine = mp.solutions.drawing_utils
22 | 
23 |     def findAndDrawHands(self, frame):
24 | 
25 |         RGBimage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
26 | 
27 |         self.outCome = self.hands.process(RGBimage)
28 | 
29 |         if self.outCome.multi_hand_landmarks:
30 |             for handLandmarks in self.outCome.multi_hand_landmarks:
31 |                 self.drawLine.draw_landmarks(frame, handLandmarks,
32 |                                              self.HandsSol.HAND_CONNECTIONS)
33 | 
34 |         return frame
35 | 
36 |     def findLandmarks(self, frame, handNo = 0):
37 | 
38 |         landMarksList = []
39 |         x_list = []
40 |         y_list = []
41 |         bbox = []
42 | 
43 |         if self.outCome.multi_hand_landmarks:
44 |             myHand = self.outCome.multi_hand_landmarks[handNo]
45 |             for id, lm in enumerate(myHand.landmark):  # enumerate returns both id and landmarks
46 |                 # print(id)
47 |                 h, w, c = frame.shape
48 |                 cx, cy = int(lm.x * w), int(lm.y * h)
49 |                 # print(id, cx, cy)
50 |                 x_list.append(cx)
51 |                 y_list.append(cy)
52 |                 landMarksList.append([id, cx, cy])
53 | 
54 |                 ## bbox
55 |                 xmin, xmax = min(x_list), max(x_list)
56 |                 ymin, ymax = min(y_list), max(y_list)
57 |                 boxW, boxH = xmax - xmin, ymax - ymin
58 |                 bbox = xmin, ymin, boxW, boxH
59 | 
60 | 
61 |         return landMarksList, bbox
62 | 


--------------------------------------------------------------------------------
/keras_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/barills-diana/Sign-Language-Recognition-Using-Mediapipe/29fba4baee6c355765e55c933fe89feecbb16ba3/keras_model.h5


--------------------------------------------------------------------------------
/labels.txt:
--------------------------------------------------------------------------------
 1 | 0 A
 2 | 1 D
 3 | 2 GOODBYE
 4 | 3 HELLO
 5 | 4 I
 6 | 5 I LOVE YOU
 7 | 6 M
 8 | 7 N
 9 | 8 NO
10 | 9 PLEASE
11 | 10 SORRY
12 | 11 WELCOME
13 | 12 YES
14 | 


--------------------------------------------------------------------------------