├── HandTrackingModule.py ├── Readme.md ├── img ├── keyboard.png ├── result.gif └── result_notepad.gif ├── libraries.bat ├── main.py └── requirements.txt /HandTrackingModule.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mediapipe as mp 3 | import time 4 | import math 5 | 6 | class handDetector(): 7 | def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.5, trackCon=0.5): 8 | self.mode = mode 9 | self.maxHands = maxHands 10 | self.modelComplexity = modelComplexity 11 | self.detectionCon = detectionCon 12 | self.trackCon = trackCon 13 | 14 | self.mpHands = mp.solutions.hands 15 | self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon, self.trackCon) 16 | self.mpDraw = mp.solutions.drawing_utils 17 | self.tipIds = [4, 8, 12, 16, 20] 18 | 19 | def findHands(self, img, draw=True): 20 | imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 21 | self.results = self.hands.process(imgRGB) 22 | #print(results.multi_hand_landmarks) 23 | 24 | if self.results.multi_hand_landmarks: 25 | for handLms in self.results.multi_hand_landmarks: 26 | if draw: 27 | self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS) 28 | return img 29 | 30 | def findPosition(self, img, handNo=0, draw=True): 31 | xList = [] 32 | yList = [] 33 | bbox = [] 34 | self.lmList = [] 35 | if self.results.multi_hand_landmarks: 36 | myHand = self.results.multi_hand_landmarks[handNo] 37 | for id, lm in enumerate(myHand.landmark): 38 | #print(id, lm) 39 | h, w, c = img.shape 40 | cx, cy = int(lm.x*w), int(lm.y*h) 41 | xList.append(cx) 42 | yList.append(cy) 43 | #print(id, cx, cy) 44 | self.lmList.append([id, cx, cy]) 45 | if draw: 46 | cv2.circle(img, (cx, cy), 5, (255,0,255), cv2.FILLED) 47 | xmin, xmax = min(xList), max(xList) 48 | ymin, ymax = min(yList), max(yList) 49 | bbox = xmin, ymin, xmax, ymax 50 | 51 | if draw: 52 | cv2.rectangle(img, (bbox[0]-20, bbox[1]-20), (bbox[2]+20, bbox[3]+20), (0, 255, 0), 2) 53 | return self.lmList, bbox 54 | 55 | def findDistance(self, p1, p2, img, draw=True): 56 | x1, y1 = self.lmList[p1][1], self.lmList[p1][2] 57 | x2, y2 = self.lmList[p2][1], self.lmList[p2][2] 58 | cx, cy = (x1+x2)//2, (y1+y2)//2 59 | 60 | if draw: 61 | cv2.circle(img, (x1,y1), 15, (255,0,255), cv2.FILLED) 62 | cv2.circle(img, (x2,y2), 15, (255,0,255), cv2.FILLED) 63 | cv2.line(img, (x1,y1), (x2,y2), (255,0,255), 3) 64 | cv2.circle(img, (cx,cy), 15, (255,0,255), cv2.FILLED) 65 | 66 | length = math.hypot(x2-x1, y2-y1) 67 | return length, img, [x1, y1, x2, y2, cx, cy] 68 | 69 | def fingersUp(self): 70 | fingers = [] 71 | 72 | # Thumb 73 | if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0]-1][1]: 74 | fingers.append(1) 75 | else: 76 | fingers.append(0) 77 | 78 | # 4 Fingers 79 | for id in range(1,5): 80 | if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id]-2][2]: 81 | fingers.append(1) 82 | else: 83 | fingers.append(0) 84 | return fingers 85 | 86 | def main(): 87 | pTime = 0 88 | cTime = 0 89 | cap = cv2.VideoCapture(0) 90 | detector = handDetector() 91 | while True: 92 | success, img = cap.read() 93 | img = detector.findHands(img) 94 | lmList = detector.findPosition(img) 95 | if len(lmList) != 0: 96 | print(lmList[1]) 97 | 98 | cTime = time.time() 99 | fps = 1. / (cTime - pTime) 100 | pTime = cTime 101 | 102 | cv2.putText(img, str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,255), 3) 103 | 104 | cv2.imshow("Image", img) 105 | cv2.waitKey(1) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # AI Virtual Keyboard 2 | In this project I am going to create a virtual key board based on Artificial Intelligence (AI). 3 | I will write the code step by step so it is easy to follow. 4 | I will also look at how we can run this program to operate applications like a notepad. 5 | 6 | ## Features 7 | * Can track your hand in real-time 8 | * Can write text in writer redactor based on your hand activity 9 | 10 | ## How to install 11 | 1. Clone this repository on your computer 12 | `https://github.com/paveldat/virtual_keyboard.git` 13 | 2. Install all the requirements 14 | `run libraries.bat` or 15 | `pip install -r requirements.txt` 16 | 3. Run the program 17 | `python main.py` 18 | 19 | ## Help 20 | You might face issue with webcam not showing and you get errors. 21 | To solve it just change the value in this line (for example to `1`). 22 | `cap = cv2.VideoCapture(0)` 23 | Increment this number until you see your webcam. 24 | 25 | ## Hand Landmarks 26 | 27 | 28 | ## Keyboard 29 | 30 | 31 | Where: 32 | 33 | "<" - BACKSPACE 34 | 35 | " " - SPACE 36 | 37 | ## Click 38 | In order to simulate a click, you need to connect the index and middle fingers on your hand. An example of a valid click is shown in the image below. 39 | 40 | 41 | 42 | ## Result 43 | ![Alt Text](https://github.com/paveldat/virtual_keyboard/blob/main/img/result.gif) 44 | 45 | ## Result using notepad 46 | ![Alt Text](https://github.com/paveldat/virtual_keyboard/blob/main/img/result_notepad.gif) -------------------------------------------------------------------------------- /img/keyboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/virtual_keyboard/bcf22da4b03ebde0b6c2e1e88ca056a441666d56/img/keyboard.png -------------------------------------------------------------------------------- /img/result.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/virtual_keyboard/bcf22da4b03ebde0b6c2e1e88ca056a441666d56/img/result.gif -------------------------------------------------------------------------------- /img/result_notepad.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/virtual_keyboard/bcf22da4b03ebde0b6c2e1e88ca056a441666d56/img/result_notepad.gif -------------------------------------------------------------------------------- /libraries.bat: -------------------------------------------------------------------------------- 1 | pip install opencv-python 2 | pip install numpy 3 | pip install cvzone 4 | pip install pynput -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from tkinter import E 2 | import cv2 3 | from HandTrackingModule import handDetector 4 | from time import sleep 5 | import numpy as np 6 | import cvzone 7 | from pynput.keyboard import Controller 8 | 9 | cap = cv2.VideoCapture(0) 10 | cap.set(3, 1280) 11 | cap.set(4, 720) 12 | 13 | detector = handDetector(detectionCon=0.8) 14 | keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"], 15 | ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"], 16 | ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"], 17 | ["<", " "]] 18 | finalText = "" 19 | keyboard = Controller() 20 | 21 | def drawALL(img, buttonList): 22 | imgNew = np.zeros_like(img, np.uint8) 23 | for button in buttonList: 24 | x, y = button.pos 25 | w, h = button.size 26 | cvzone.cornerRect(imgNew, (x, y, w, h), 20, rt=0) 27 | cv2.rectangle(imgNew, button.pos, (x + w, y + h), (255, 0, 255), cv2.FILLED) 28 | cv2.putText(imgNew, button.text, (x + 20, y + 65), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 4) 29 | out = img.copy() 30 | alpha = 0.5 31 | mask = imgNew.astype(bool) 32 | out[mask] = cv2.addWeighted(img, alpha, imgNew, 1-alpha, 0)[mask] 33 | return out 34 | 35 | class Button(): 36 | def __init__(self, pos, text, size=[85, 85]): 37 | self.pos = pos 38 | self.size = size 39 | self.text = text 40 | 41 | buttonList = [] 42 | for i in range(len(keys)): 43 | for j, key in enumerate(keys[i]): 44 | buttonList.append(Button([100 * j + 50, 100 * i + 50], key)) 45 | 46 | while True: 47 | success, img = cap.read() 48 | img = cv2.flip(img, 1) 49 | img = detector.findHands(img) 50 | lmList, bboxInfo = detector.findPosition(img) 51 | 52 | img = drawALL(img, buttonList) 53 | 54 | if lmList: 55 | for button in buttonList: 56 | x, y = button.pos 57 | w, h = button.size 58 | 59 | if x < lmList[8][1] < x + w and y < lmList[8][2] < y + h: 60 | cv2.rectangle(img, button.pos, (x + w, y + h), (175, 0, 175), cv2.FILLED) 61 | cv2.putText(img, button.text, (x + 20, y + 65), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 4) 62 | l, _, _ = detector.findDistance(8, 12, img, draw=False) 63 | 64 | ## when clicked 65 | if l < 40: 66 | cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 0), cv2.FILLED) 67 | cv2.putText(img, button.text, (x + 20, y + 65), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 4) 68 | if button.text == "<": 69 | finalText = finalText[:-1] 70 | keyboard.press('\010') 71 | else: 72 | finalText += button.text 73 | keyboard.press(button.text) 74 | sleep(0.15) 75 | 76 | cv2.rectangle(img, (50, 710), (700, 610), (175, 0, 175), cv2.FILLED) 77 | cv2.putText(img, finalText, (60, 690), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 5) 78 | 79 | cv2.imshow("Keyboard", img) 80 | cv2.waitKey(1) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | numpy 3 | cvzone 4 | pynput --------------------------------------------------------------------------------