├── README.md ├── handTracker.py └── hand_pose.py /README.md: -------------------------------------------------------------------------------- 1 | # Build-An-AI-Virtual-Mouse-With-OpenCV 2 | In our continuing deep dive into practical real-time computer vision, we’ll show you how to code a hands-free webcam-based controller for your computer mouse using the OpenCV library on Python. This will allow you to control your computer without any physical peripheral required—Iron Man style! In this session, we’ll first obtain our live camera feed using OpenCV and then estimate hand poses using MediaPipe Hands, an open-source framework that employs machine learning to infer 3D landmarks of the hand from single frames in real-time without any fancy hardware acceleration, working even on mobile phones. Following this, we’ll set up our simulated mouse movement in response to the poses using the AutoPy automation module. 3 | 4 | Prerequisites: 5 | ✅ Python (latest release: https://www.python.org/downloads/release/python-395/) 6 | ✅ PyCharm (https://www.jetbrains.com/pycharm/download/) or any other Python code editor 7 | ✅ pip install: OpenCV (https://pypi.org/project/opencv-python/), MediaPipe, AutoPy 8 | ----------------------------------------- 9 | To learn more about The Assembly’s workshops, visit our website, social media or email us at workshops@theassembly.ae Our website: http://theassembly.ae Instagram: http://instagram.com/makesmartthings Facebook: http://fb.com/makesmartthings Twitter: http://twitter.com/makesmartthings 10 | -------------------------------------------------------------------------------- /handTracker.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mediapipe 3 | import numpy 4 | import autopy 5 | 6 | cap = cv2.VideoCapture(0) 7 | initHand = mediapipe.solutions.hands # Initializing mediapipe 8 | # Object of mediapipe with "arguments for the hands module" 9 | mainHand = initHand.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.8) 10 | draw = mediapipe.solutions.drawing_utils # Object to draw the connections between each finger index 11 | wScr, hScr = autopy.screen.size() # Outputs the high and width of the screen (1920 x 1080) 12 | pX, pY = 0, 0 # Previous x and y location 13 | cX, cY = 0, 0 # Current x and y location 14 | 15 | 16 | def handLandmarks(colorImg): 17 | landmarkList = [] # Default values if no landmarks are tracked 18 | 19 | landmarkPositions = mainHand.process(colorImg) # Object for processing the video input 20 | landmarkCheck = landmarkPositions.multi_hand_landmarks # Stores the out of the processing object (returns False on empty) 21 | if landmarkCheck: # Checks if landmarks are tracked 22 | for hand in landmarkCheck: # Landmarks for each hand 23 | for index, landmark in enumerate(hand.landmark): # Loops through the 21 indexes and outputs their landmark coordinates (x, y, & z) 24 | draw.draw_landmarks(img, hand, initHand.HAND_CONNECTIONS) # Draws each individual index on the hand with connections 25 | h, w, c = img.shape # Height, width and channel on the image 26 | centerX, centerY = int(landmark.x * w), int(landmark.y * h) # Converts the decimal coordinates relative to the image for each index 27 | landmarkList.append([index, centerX, centerY]) # Adding index and its coordinates to a list 28 | 29 | return landmarkList 30 | 31 | 32 | def fingers(landmarks): 33 | fingerTips = [] # To store 4 sets of 1s or 0s 34 | tipIds = [4, 8, 12, 16, 20] # Indexes for the tips of each finger 35 | 36 | # Check if thumb is up 37 | if landmarks[tipIds[0]][1] > lmList[tipIds[0] - 1][1]: 38 | fingerTips.append(1) 39 | else: 40 | fingerTips.append(0) 41 | 42 | # Check if fingers are up except the thumb 43 | for id in range(1, 5): 44 | if landmarks[tipIds[id]][2] < landmarks[tipIds[id] - 3][2]: # Checks to see if the tip of the finger is higher than the joint 45 | fingerTips.append(1) 46 | else: 47 | fingerTips.append(0) 48 | 49 | return fingerTips 50 | 51 | 52 | while True: 53 | check, img = cap.read() # Reads frames from the camera 54 | imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Changes the format of the frames from BGR to RGB 55 | lmList = handLandmarks(imgRGB) 56 | # cv2.rectangle(img, (75, 75), (640 - 75, 480 - 75), (255, 0, 255), 2) 57 | 58 | if len(lmList) != 0: 59 | x1, y1 = lmList[8][1:] # Gets index 8s x and y values (skips index value because it starts from 1) 60 | x2, y2 = lmList[12][1:] # Gets index 12s x and y values (skips index value because it starts from 1) 61 | finger = fingers(lmList) # Calling the fingers function to check which fingers are up 62 | 63 | if finger[1] == 1 and finger[2] == 0: # Checks to see if the pointing finger is up and thumb finger is down 64 | x3 = numpy.interp(x1, (75, 640 - 75), (0, wScr)) # Converts the width of the window relative to the screen width 65 | y3 = numpy.interp(y1, (75, 480 - 75), (0, hScr)) # Converts the height of the window relative to the screen height 66 | 67 | cX = pX + (x3 - pX) / 7 # Stores previous x locations to update current x location 68 | cY = pY + (y3 - pY) / 7 # Stores previous y locations to update current y location 69 | 70 | autopy.mouse.move(wScr-cX, cY) # Function to move the mouse to the x3 and y3 values (wSrc inverts the direction) 71 | pX, pY = cX, cY # Stores the current x and y location as previous x and y location for next loop 72 | 73 | if finger[1] == 0 and finger[0] == 1: # Checks to see if the pointer finger is down and thumb finger is up 74 | autopy.mouse.click() # Left click 75 | 76 | cv2.imshow("Webcam", img) 77 | if cv2.waitKey(1) & 0xFF == ord('q'): 78 | break 79 | -------------------------------------------------------------------------------- /hand_pose.py: -------------------------------------------------------------------------------- 1 | import mediapipe as mp 2 | import cv2 3 | import numpy as np 4 | import uuid 5 | import os 6 | mp_drawing = mp.solutions.drawing_utils 7 | mp_hands = mp.solutions.hands 8 | cap = cv2.VideoCapture(0) 9 | 10 | with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 11 | while cap.isOpened(): 12 | ret, frame = cap.read() 13 | 14 | # BGR 2 RGB 15 | image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 16 | 17 | # Flip on horizontal 18 | image = cv2.flip(image, 1) 19 | 20 | # Set flag 21 | image.flags.writeable = False 22 | 23 | # Detections 24 | results = hands.process(image) 25 | 26 | # Set flag to true 27 | image.flags.writeable = True 28 | 29 | # RGB 2 BGR 30 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 31 | print(results) 32 | 33 | # Rendering results 34 | if results.multi_hand_landmarks: 35 | for num, hand in enumerate(results.multi_hand_landmarks): 36 | mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 37 | mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4), 38 | mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2), 39 | ) 40 | 41 | 42 | cv2.imshow('Hand Tracking', image) 43 | 44 | if cv2.waitKey(10) & 0xFF == ord('q'): 45 | break 46 | 47 | cap.release() 48 | cv2.destroyAllWindows() 49 | mp_drawing.DrawingSpec() 50 | os.mkdir('Output Images') 51 | cap = cv2.VideoCapture(0) 52 | 53 | with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 54 | while cap.isOpened(): 55 | ret, frame = cap.read() 56 | 57 | # BGR 2 RGB 58 | image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 59 | 60 | # Flip on horizontal 61 | image = cv2.flip(image, 1) 62 | 63 | # Set flag 64 | image.flags.writeable = False 65 | 66 | # Detections 67 | results = hands.process(image) 68 | 69 | # Set flag to true 70 | image.flags.writeable = True 71 | 72 | # RGB 2 BGR 73 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 74 | 75 | # Detections 76 | print(results) 77 | # Rendering results 78 | if results.multi_hand_landmarks: 79 | for num, hand in enumerate(results.multi_hand_landmarks): 80 | mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 81 | mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4), 82 | mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2), 83 | ) 84 | 85 | # Save our image 86 | cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image) 87 | cv2.imshow('Hand Tracking', image) 88 | 89 | if cv2.waitKey(10) & 0xFF == ord('q'): 90 | break 91 | 92 | cap.release() 93 | cv2.destroyAllWindows() 94 | --------------------------------------------------------------------------------