├── HandTrackingModule.py ├── README.md ├── gesturecontrol.py └── requirements.txt /HandTrackingModule.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | import mediapipe as mp 4 | import math 5 | 6 | 7 | class HandDetector: 8 | """ 9 | 利用mediapipe寻找手, 得到手部关键点坐标. 能够检测出多少只手指是伸张的 10 | 以及两个手指指尖的距离 ,对检测到的手计算它的锚框. 11 | """ 12 | 13 | def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5): 14 | """ 15 | :param mode: 在静态模式会对没一张图片进行检测:比较慢 16 | :param maxHands: 检测到手的最大个数 17 | :param detectionCon: 最小检测阈值 18 | :param minTrackCon: 最小追踪阈值 19 | """ 20 | self.mode = mode 21 | self.maxHands = maxHands 22 | self.detectionCon = detectionCon 23 | self.minTrackCon = minTrackCon 24 | 25 | self.mpHands = mp.solutions.hands 26 | self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands, 27 | min_detection_confidence=self.detectionCon, 28 | min_tracking_confidence=self.minTrackCon) 29 | self.mpDraw = mp.solutions.drawing_utils 30 | self.tipIds = [4, 8, 12, 16, 20] # 从大拇指开始,依次为每个手指指尖 31 | self.fingers = [] 32 | self.lmList = [] 33 | 34 | def findHands(self, img, draw=True, flipType=True): 35 | """ 36 | Finds hands in a BGR image. 37 | :param img: Image to find the hands in. 38 | :param draw: Flag to draw the output on the image. 39 | :return: Image with or without drawings 40 | """ 41 | imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 42 | self.results = self.hands.process(imgRGB) 43 | 44 | allHands = [] 45 | h, w, c = img.shape 46 | # print("multi_hand_landmarks") 47 | # print(self.results.multi_hand_landmarks) 48 | print("self.results.multi_handedness") 49 | print(self.results.multi_handedness) 50 | # self.results.multi_hand_landmarks为21个landmark,21个关键点信息 51 | # landmark 52 | # { 53 | # x: 0.08329735696315765 54 | # y: 0.861643373966217 55 | # z: 8.069470709415327e-07 56 | # } 57 | 58 | # self.results.multi_handedness" 59 | # [classification { 60 | # index: 0 代表手的索引号,第几个手掌 61 | # score: 0.5836966037750244 62 | # label: "Left" 63 | # } 64 | # ] 65 | 66 | 67 | if self.results.multi_hand_landmarks: 68 | for handType, handLms in zip(self.results.multi_handedness, self.results.multi_hand_landmarks): 69 | myHand = {} 70 | ## lmList 71 | mylmList = [] 72 | xList = [] 73 | yList = [] 74 | for id, lm in enumerate(handLms.landmark): 75 | px, py, pz = int(lm.x * w), int(lm.y * h), int(lm.z * w) 76 | mylmList.append([px, py, pz]) 77 | xList.append(px) 78 | yList.append(py) 79 | 80 | ## bbox 81 | xmin, xmax = min(xList), max(xList) # 取最大数值 82 | ymin, ymax = min(yList), max(yList) 83 | boxW, boxH = xmax - xmin, ymax - ymin 84 | bbox = xmin, ymin, boxW, boxH 85 | cx, cy = bbox[0] + (bbox[2] // 2), \ 86 | bbox[1] + (bbox[3] // 2) 87 | 88 | myHand["lmList"] = mylmList 89 | myHand["bbox"] = bbox 90 | myHand["center"] = (cx, cy) 91 | 92 | if flipType: 93 | if handType.classification[0].label == "Right": 94 | myHand["type"] = "Left" 95 | else: 96 | myHand["type"] = "Right" 97 | else: 98 | myHand["type"] = handType.classification[0].label 99 | allHands.append(myHand) 100 | 101 | ## draw 102 | if draw: 103 | self.mpDraw.draw_landmarks(img, handLms, 104 | self.mpHands.HAND_CONNECTIONS) 105 | cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20), 106 | (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20), 107 | (255, 0, 255), 2) #红蓝为 紫色 108 | cv2.putText(img, myHand["type"], (bbox[0] - 30, bbox[1] - 30), cv2.FONT_HERSHEY_PLAIN, 109 | 2, (255, 0, 255), 2) 110 | if draw: 111 | return allHands, img 112 | else: 113 | return allHands 114 | 115 | def fingersUp(self, myHand): 116 | """ 117 | Finds how many fingers are open and returns in a list. 118 | Considers left and right hands separately 119 | :return: List of which fingers are up 120 | """ 121 | myHandType = myHand["type"] 122 | myLmList = myHand["lmList"] 123 | if self.results.multi_hand_landmarks: 124 | fingers = [] 125 | # Thumb 126 | if myHandType == "Right": 127 | 128 | if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]: 129 | fingers.append(1) 130 | else: 131 | fingers.append(0) 132 | else: 133 | if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]: 134 | fingers.append(1) 135 | else: 136 | fingers.append(0) 137 | 138 | # 4 Fingers 139 | for id in range(1, 5): 140 | # 其他手指指尖的y坐标小于次指尖的点的坐标,则为竖直 141 | if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]: 142 | fingers.append(1) 143 | else: 144 | fingers.append(0) 145 | return fingers 146 | 147 | def findDistance(self, p1, p2, img=None): 148 | """ 149 | 计算指尖距离 150 | :param p1: Point1 151 | :param p2: Point2 152 | :param img: 要绘制的图 153 | :param draw: 标志变量 154 | :return: 返回指尖距离,和绘制好的图 155 | """ 156 | 157 | x1, y1 = p1 158 | x2, y2 = p2 159 | cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 160 | length = math.hypot(x2 - x1, y2 - y1) 161 | info = (x1, y1, x2, y2, cx, cy) 162 | if img is not None: 163 | cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED) # 食指尖画紫圈 164 | cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED) # 中指尖画紫圈 165 | cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) 166 | cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED) # 两指中间画紫圈 167 | return length, info, img 168 | else: 169 | return length, info 170 | 171 | 172 | def main(): 173 | cap = cv2.VideoCapture(0) 174 | detector = HandDetector(detectionCon=0.8, maxHands=2) 175 | while True: 176 | # Get image frame 177 | success, img = cap.read() 178 | # Find the hand and its landmarks 179 | hands, img = detector.findHands(img) # with draw 180 | # hands = detector.findHands(img, draw=False) # without draw 181 | 182 | if hands: 183 | # Hand 1 184 | hand1 = hands[0] 185 | lmList1 = hand1["lmList"] # List of 21 Landmark points 186 | bbox1 = hand1["bbox"] # Bounding box info x,y,w,h 187 | centerPoint1 = hand1['center'] # center of the hand cx,cy 188 | handType1 = hand1["type"] # Handtype Left or Right 189 | 190 | fingers1 = detector.fingersUp(hand1) 191 | 192 | if len(hands) == 2: 193 | # Hand 2 194 | hand2 = hands[1] 195 | lmList2 = hand2["lmList"] # List of 21 Landmark points 196 | bbox2 = hand2["bbox"] # Bounding box info x,y,w,h 197 | centerPoint2 = hand2['center'] # center of the hand cx,cy 198 | handType2 = hand2["type"] # Hand Type "Left" or "Right" 199 | 200 | fingers2 = detector.fingersUp(hand2) 201 | 202 | # Find Distance between two Landmarks. Could be same hand or different hands 203 | length, info, img = detector.findDistance(lmList1[8][0:2], lmList2[8][0:2], img) # with draw 204 | # length, info = detector.findDistance(lmList1[8], lmList2[8]) # with draw 205 | # Display 206 | cv2.imshow("Image", img) 207 | cv2.waitKey(1) 208 | 209 | 210 | if __name__ == "__main__": 211 | main() 212 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GestureInteraction简介 2 | 本项目是基于mediapipe构建的一个简单的手势交互系统,可以通过不同的手势控制电脑鼠标键盘进行相应的操作 3 | 4 | # 功能 5 | - 手势控制鼠标单击、双击(左击、右击) 6 | - 手势控制鼠标拖拽文件 7 | - 手势控制ppt切换 8 | - 手势控制音乐切换 9 | - 手势调节系统音量 10 | 11 | # 演示视频 12 | 13 | [项目演示](https://www.bilibili.com/video/BV19T4y1B7bu/?spm_id_from=333.999.0.0) 14 | 15 | 16 | 17 | # 安装 18 | 安装python 3.7.1 19 | 20 | pip install -r requirements.txt -------------------------------------------------------------------------------- /gesturecontrol.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from HandTrackingModule import HandDetector # 手不检测方法 4 | import time 5 | import autopy 6 | import win32gui, win32process, psutil 7 | from ctypes import cast, POINTER 8 | from comtypes import CLSCTX_ALL 9 | from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume 10 | 11 | devices = AudioUtilities.GetSpeakers() 12 | interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None) 13 | volume = cast(interface, POINTER(IAudioEndpointVolume)) 14 | volumeRange = volume.GetVolumeRange() # (-63.5, 0.0, 0.03125) 15 | minVol = volumeRange[0] 16 | maxVol = volumeRange[1] 17 | 18 | # (1)导数视频数据 19 | wScr, hScr = autopy.screen.size() # 返回电脑屏幕的宽和高(1920.0, 1080.0) 20 | wCam, hCam = 1280, 720 # 视频显示窗口的宽和高 21 | pt1, pt2 = (100, 100), (1000, 500) # 虚拟鼠标的移动范围,左上坐标pt1,右下坐标pt2 22 | 23 | cap = cv2.VideoCapture(0) # 0代表自己电脑的摄像头 24 | cap.set(3, wCam) # 设置显示框的宽度1280 25 | cap.set(4, hCam) # 设置显示框的高度720 26 | 27 | pTime = 0 # 设置第一帧开始处理的起始时间 28 | pLocx, pLocy = 0, 0 # 上一帧时的鼠标所在位置 29 | smooth = 5 # 自定义平滑系数,让鼠标移动平缓一些 30 | frame = 0 # 初始化累计帧数 31 | toggle = False # 标志变量 32 | prev_state = [1, 1, 1, 1, 1] # 初始化上一帧状态 33 | current_state = [1, 1, 1, 1, 1] # 初始化当前正状态 34 | 35 | # (2)接收手部检测方法 36 | detector = HandDetector(mode=False, # 视频流图像 37 | maxHands=1, # 最多检测一只手 38 | detectionCon=0.8, # 最小检测置信度 39 | minTrackCon=0.5) # 最小跟踪置信度 40 | 41 | # (3)处理每一帧图像 42 | while True: 43 | # 图片是否成功接收、img帧图像 44 | success, img = cap.read() 45 | # 翻转图像,使自身和摄像头中的自己呈镜像关系 46 | img = cv2.flip(img, flipCode=1) # 1代表水平翻转,0代表竖直翻转 47 | # 在图像窗口上创建一个矩形框,在该区域内移动鼠标 48 | cv2.rectangle(img, pt1, pt2, (0, 255, 255), 5) 49 | # 判断当前的活动窗口的进程名字 50 | try: 51 | pid = win32process.GetWindowThreadProcessId(win32gui.GetForegroundWindow()) 52 | print("pid:", pid) 53 | active_window_process_name = psutil.Process(pid[-1]).name() 54 | print("acitiveprocess:", active_window_process_name) 55 | except: 56 | pass 57 | # (4)手部关键点检测 58 | # 传入每帧图像, 返回手部关键点的坐标信息(字典),绘制关键点后的图像 59 | hands, img = detector.findHands(img, flipType=False, draw=True) # 上面反转过了,这里就不用再翻转了 60 | print("hands:", hands) 61 | # [{'lmList': [[889, 652, 0], [807, 613, -25], [753, 538, -39], [723, 475, -52], [684, 431, -66], [789, 432, -27], 62 | # [762, 347, -56], [744, 295, -78], [727, 248, -95], [841, 426, -39], [835, 326, -65], [828, 260, -89], 63 | # [820, 204, -106], [889, 445, -54], [894, 356, -85], [892, 295, -107], [889, 239, -123], 64 | # [933, 483, -71], [957, 421, -101], [973, 376, -115], [986, 334, -124]], 'bbox': (684, 204, 302, 448), 65 | # 'center': (835, 428), 'type': 'Right'}] 66 | # 如果能检测到手那么就进行下一步 67 | if hands: 68 | 69 | # 获取手部信息hands中的21个关键点信息 70 | lmList = hands[0]['lmList'] # hands是由N个字典组成的列表,字典包括每只手的关键点信息,此处代表第0个手 71 | hand_center = hands[0]['center'] 72 | drag_flag = 0 73 | # 获取食指指尖坐标,和中指指尖坐标 74 | x1, y1, z1 = lmList[8] # 食指尖的关键点索引号为8 75 | x2, y2, z2 = lmList[12] # 中指指尖索引12 76 | cx, cy, cz = (x1 + x2) // 2, (y1 + y2) // 2, (z1 + z2) // 2 # 计算食指和中指两指之间的中点坐标 77 | hand_cx, hand_cy = hand_center[0], hand_center[1] 78 | # (5)检查哪个手指是朝上的 79 | fingers = detector.fingersUp(hands[0]) # 传入 80 | print("fingers", fingers) # 返回 [0,1,1,0,0] 代表 只有食指和中指竖起 81 | # 255, 0,255 淡紫 82 | # 0,255,255 淡蓝 83 | # 255,255,0 淡黄 84 | # 计算食指尖和中指尖之间的距离distance,绘制好了的图像img,指尖连线的信息info 85 | distance, info, img = detector.findDistance((x1, y1), (x2, y2), img) # 会画圈 86 | # (6)确定鼠标移动的范围 87 | # 将食指指尖的移动范围从预制的窗口范围,映射到电脑屏幕范围 88 | x3 = np.interp(x1, (pt1[0], pt2[0]), (0, wScr)) 89 | y3 = np.interp(y1, (pt1[1], pt2[1]), (0, hScr)) 90 | # 手心坐标映射到屏幕范围 91 | x4 = np.interp(hand_cx, (pt1[0], pt2[0]), (0, wScr)) 92 | y4 = np.interp(hand_cy, (pt1[1], pt2[1]), (0, hScr)) 93 | # (7)平滑,使手指在移动鼠标时,鼠标箭头不会一直晃动 94 | cLocx = pLocx + (x3 - pLocx) / smooth # 当前的鼠标所在位置坐标 95 | cLocy = pLocy + (y3 - pLocy) / smooth 96 | # 记录当前手势状态 97 | current_state = fingers 98 | # 记录相同状态的帧数 99 | if (prev_state == current_state): 100 | frame = frame + 1 101 | else: 102 | frame = 0 103 | prev_state = current_state 104 | 105 | if fingers != [0, 0, 0, 0, 0] and toggle and frame >= 2: 106 | autopy.mouse.toggle(None, False) 107 | toggle = False 108 | print("释放左键") 109 | 110 | # 只有食指和中指竖起,就认为是移动鼠标 111 | if fingers[1] == 1 and fingers[2] == 1 and sum(fingers) == 2 and frame >= 1: 112 | # (8)移动鼠标 113 | autopy.mouse.move(cLocx, cLocy) # 给出鼠标移动位置坐标 114 | 115 | print("移动鼠标") 116 | 117 | # 更新前一帧的鼠标所在位置坐标,将当前帧鼠标所在位置,变成下一帧的鼠标前一帧所在位置 118 | pLocx, pLocy = cLocx, cLocy 119 | 120 | # (9)如果食指和中指都竖起,指尖距离小于某个值认为是单击鼠标 121 | # 当指间距离小于43(像素距离)就认为是点击鼠标 122 | if distance < 43 and frame >= 1: 123 | # 在食指尖画个绿色的圆,表示点击鼠标 124 | cv2.circle(img, (x1, y1), 15, (0, 255, 0), cv2.FILLED) 125 | 126 | # 左击鼠标 127 | autopy.mouse.click(button=autopy.mouse.Button.LEFT, delay=0) 128 | cv2.putText(img, "left_click", (150, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3) 129 | print("左击鼠标") 130 | else: 131 | cv2.putText(img, "move", (150, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3) 132 | # 中指弯下食指在上,右击鼠标 133 | elif fingers[1] == 1 and fingers[2] == 0 and sum(fingers) == 1 and frame >= 2: 134 | autopy.mouse.click(button=autopy.mouse.Button.RIGHT, delay=0) 135 | print("右击鼠标") 136 | cv2.putText(img, "rigth_click", (150, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3) 137 | cv2.circle(img, (x2, y2), 15, (0, 255, 0), cv2.FILLED) 138 | 139 | # 五指紧握,按紧左键进行拖拽 140 | elif fingers == [0, 0, 0, 0, 0]: 141 | if toggle == False: 142 | autopy.mouse.toggle(None, True) 143 | print("按紧左键") 144 | toggle = True 145 | autopy.mouse.move(cLocx, cLocy) 146 | pLocx, pLocy = cLocx, cLocy 147 | cv2.putText(img, "drag", (150, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3) 148 | print("拖拽鼠标") 149 | 150 | # 拇指张开,其他弯曲,按一次上键 151 | elif fingers == [1, 0, 0, 0, 0] and frame >= 2: 152 | cv2.putText(img, "UP", (150, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3) 153 | if (active_window_process_name == "cloudmusic.exe"): 154 | print("#############################################") 155 | autopy.key.toggle(autopy.key.Code.LEFT_ARROW, True, [autopy.key.Modifier.CONTROL]) 156 | autopy.key.toggle(autopy.key.Code.LEFT_ARROW, False, [autopy.key.Modifier.CONTROL]) 157 | print("上一曲") 158 | time.sleep(0.3) 159 | else: 160 | autopy.key.toggle(autopy.key.Code.UP_ARROW, True, []) 161 | autopy.key.toggle(autopy.key.Code.UP_ARROW, False, []) 162 | print("按下上键") 163 | 164 | time.sleep(0.3) 165 | 166 | # 拇指弯曲,其他竖直,按一次下键 167 | elif fingers == [0, 1, 1, 1, 1] and frame >= 2: 168 | cv2.putText(img, "Down", (150, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3) 169 | if (active_window_process_name == "cloudmusic.exe"): 170 | print("#############################################") 171 | autopy.key.toggle(autopy.key.Code.RIGHT_ARROW, True, [autopy.key.Modifier.CONTROL]) 172 | autopy.key.toggle(autopy.key.Code.RIGHT_ARROW, False, [autopy.key.Modifier.CONTROL]) 173 | print("下一曲") 174 | time.sleep(0.3) 175 | else: 176 | autopy.key.toggle(autopy.key.Code.DOWN_ARROW, True, []) 177 | autopy.key.toggle(autopy.key.Code.DOWN_ARROW, False, []) 178 | 179 | print("按下下键") 180 | time.sleep(0.3) 181 | 182 | # 类ok手势,进行调整音量 183 | elif fingers == [1, 0, 1, 1, 1] and frame >= 5: 184 | autopy.mouse.move(cLocx, cLocy) # 给出鼠标移动位置坐标 185 | length = cLocx - pLocx 186 | pLocx = cLocx 187 | pLocy = cLocy 188 | print("移动的length:", length) 189 | print("移动鼠标调整音量") 190 | currentVolumeLv = volume.GetMasterVolumeLevelScalar() 191 | print("currentVolume:", currentVolumeLv) 192 | currentVolumeLv += length / 50.0 193 | if currentVolumeLv > 1.0: 194 | currentVolumeLv = 1.0 195 | elif currentVolumeLv < 0.0: 196 | currentVolumeLv = 0.0 197 | volume.SetMasterVolumeLevelScalar(currentVolumeLv, None) 198 | setVolume = volume.GetMasterVolumeLevelScalar() 199 | volPer = setVolume 200 | volBar = 350 - int((volPer) * 200) 201 | cv2.rectangle(img, (20, 150), (50, 350), (255, 0, 255), 2) 202 | cv2.rectangle(img, (20, int(volBar)), (50, 350), (255, 0, 255), cv2.FILLED) 203 | cv2.putText(img, f'{int(volPer * 100)}%', (10, 380), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 255), 2) 204 | 205 | # (10)显示图像 206 | # 查看FPS 207 | cTime = time.time() # 处理完一帧图像的时间 208 | fps = 1 / (cTime - pTime) 209 | pTime = cTime # 重置起始时· 210 | print(fps) 211 | # 在视频上显示fps信息,先转换成整数再变成字符串形式,文本显示坐标,文本字体,文本大小 212 | cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3) 213 | 214 | # 显示图像,输入窗口名及图像数据 215 | cv2.imshow('frame', img) 216 | if cv2.waitKey(1) & 0xFF == 27: # 每帧滞留20毫秒后消失,ESC键退出 217 | break 218 | 219 | # 释放视频资源 220 | cap.release() 221 | cv2.destroyAllWindows() 222 | 223 | # if __name__ == '__main__': 224 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | autopy==4.0.0 2 | comtypes==1.4.1 3 | mediapipe==0.9.0.1 4 | numpy==1.21.6 5 | opencv_contrib_python==4.9.0.80 6 | opencv_python==4.9.0.80 7 | psutil==5.9.8 8 | pycaw==20240210 9 | pywin32==306 10 | --------------------------------------------------------------------------------