├── utils ├── __init__.py └── cvfpscalc.py ├── model ├── __init__.py └── keypoint_classifier │ ├── keypoint_classifier_label.csv │ ├── keypoint_classifier.tflite │ └── keypoint_classifier.py ├── README.md ├── .gitignore ├── _old └── app.py ├── LICENSE └── app.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from utils.cvfpscalc import CvFpsCalc -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.keypoint_classifier.keypoint_classifier import KeyPointClassifier -------------------------------------------------------------------------------- /model/keypoint_classifier/keypoint_classifier_label.csv: -------------------------------------------------------------------------------- 1 | Open 2 | Close 3 | Pointer 4 | -------------------------------------------------------------------------------- /model/keypoint_classifier/keypoint_classifier.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kazuhito00/simple-virtual-mouse-using-mediapipe/HEAD/model/keypoint_classifier/keypoint_classifier.tflite -------------------------------------------------------------------------------- /utils/cvfpscalc.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import cv2 as cv 3 | 4 | 5 | class CvFpsCalc(object): 6 | def __init__(self, buffer_len=1): 7 | self._start_tick = cv.getTickCount() 8 | self._freq = 1000.0 / cv.getTickFrequency() 9 | self._difftimes = deque(maxlen=buffer_len) 10 | 11 | def get(self): 12 | current_tick = cv.getTickCount() 13 | different_time = (current_tick - self._start_tick) * self._freq 14 | self._start_tick = current_tick 15 | 16 | self._difftimes.append(different_time) 17 | 18 | fps = 1000.0 / (sum(self._difftimes) / len(self._difftimes)) 19 | fps_rounded = round(fps, 2) 20 | 21 | return fps_rounded 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # simple-virtual-mouse-using-mediapipe 2 | MediaPipeを用いたハンドジェスチャーによる簡単なマウス操作を行うプログラムです。
3 | マウス移動：手の移動
4 | マウス左クリック：手をパーからグーに変える

5 |

6 | 7 | # Requirements 8 | * mediapipe 0.8.1 or Later 9 | * PyAutoGUI 0.9.52 or Later 10 | * OpenCV 3.4.2 or Later 11 | * Tensorflow 2.3.0 or Later 12 | 13 | # Demo 14 | Webカメラを使ったデモの実行方法は以下です。 15 | ```bash 16 | python app.py 17 | ``` 18 | 19 | デモ実行時には、以下のオプションが指定可能です。 20 | * --device
カメラデバイス番号の指定 (デフォルト：0) 21 | * --width
カメラキャプチャ時の横幅 (デフォルト：960) 22 | * --height
カメラキャプチャ時の縦幅 (デフォルト：540) 23 | * --min_detection_confidence
24 | 検出信頼値の閾値 (デフォルト：0.7) 25 | * --min_tracking_confidence
26 | トラッキング信頼値の閾値 (デフォルト：0.5) 27 | * --margin_width
ハンドジェスチャーの操作範囲(横幅のマージン割合)(デフォルト：0.2) 28 | * --margin_height
ハンドジェスチャーの操作範囲(縦幅のマージン割合)(デフォルト：0.2) 29 | 30 | # Reference 31 | * [MediaPipe：Hands](https://google.github.io/mediapipe/solutions/hands) 32 | 33 | # Author 34 | 高橋かずひと(https://twitter.com/KzhtTkhs) 35 | 36 | # License 37 | simple-virtual-mouse-using-mediapipe under [Apache-2.0 License](LICENSE). 38 | -------------------------------------------------------------------------------- /model/keypoint_classifier/keypoint_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | class KeyPointClassifier(object): 8 | def __init__( 9 | self, 10 | model_path='model/keypoint_classifier/keypoint_classifier.tflite', 11 | num_threads=1, 12 | ): 13 | self.interpreter = tf.lite.Interpreter(model_path=model_path, 14 | num_threads=num_threads) 15 | 16 | self.interpreter.allocate_tensors() 17 | self.input_details = self.interpreter.get_input_details() 18 | self.output_details = self.interpreter.get_output_details() 19 | 20 | def __call__( 21 | self, 22 | landmark_list, 23 | ): 24 | input_details_tensor_index = self.input_details[0]['index'] 25 | self.interpreter.set_tensor( 26 | input_details_tensor_index, 27 | np.array([landmark_list], dtype=np.float32)) 28 | self.interpreter.invoke() 29 | 30 | output_details_tensor_index = self.output_details[0]['index'] 31 | 32 | result = self.interpreter.get_tensor(output_details_tensor_index) 33 | 34 | result_index = np.argmax(np.squeeze(result)) 35 | 36 | return result_index 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # bat 132 | *.bat -------------------------------------------------------------------------------- /_old/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import copy 4 | import time 5 | import argparse 6 | from collections import deque 7 | 8 | import cv2 as cv 9 | import numpy as np 10 | import mediapipe as mp 11 | 12 | import pyautogui 13 | 14 | from utils import CvFpsCalc 15 | 16 | 17 | def get_args(): 18 | parser = argparse.ArgumentParser() 19 | 20 | parser.add_argument("--device", type=int, default=0) 21 | parser.add_argument("--width", help='cap width', type=int, default=960) 22 | parser.add_argument("--height", help='cap height', type=int, default=540) 23 | 24 | parser.add_argument("--min_detection_confidence", 25 | help='face mesh min_detection_confidence', 26 | type=float, 27 | default=0.75) 28 | parser.add_argument("--min_tracking_confidence", 29 | help='face mesh min_tracking_confidence', 30 | type=int, 31 | default=0.75) 32 | 33 | parser.add_argument('--use_left_hand', action='store_true') 34 | 35 | args = parser.parse_args() 36 | 37 | return args 38 | 39 | 40 | def main(): 41 | # 引数解析 ################################################################# 42 | args = get_args() 43 | 44 | cap_device = args.device 45 | cap_width = args.width 46 | cap_height = args.height 47 | 48 | min_detection_confidence = args.min_detection_confidence 49 | min_tracking_confidence = args.min_tracking_confidence 50 | 51 | use_left_hand = args.use_left_hand 52 | 53 | # カメラ準備 ############################################################### 54 | cap = cv.VideoCapture(cap_device) 55 | cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width) 56 | cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height) 57 | 58 | # モデルロード ############################################################# 59 | mp_holistic = mp.solutions.holistic 60 | holistic = mp_holistic.Holistic( 61 | upper_body_only=False, 62 | min_detection_confidence=min_detection_confidence, 63 | min_tracking_confidence=min_tracking_confidence, 64 | ) 65 | 66 | # ######################################################################## 67 | # 人差指のID 68 | ID_FINGER_TIP = 8 69 | 70 | history_length = 6 71 | point_x_history = deque(maxlen=history_length) 72 | point_y_history = deque(maxlen=history_length) 73 | point_z_history = deque(maxlen=history_length) 74 | 75 | display_size = pyautogui.size() 76 | 77 | # FPS計測モジュール ######################################################## 78 | start_time = time.time() 79 | cvFpsCalc = CvFpsCalc(buffer_len=10) 80 | 81 | while True: 82 | display_fps = cvFpsCalc.get() 83 | 84 | # キー処理(ESC：終了) ################################################# 85 | key = cv.waitKey(1) 86 | if key == 27: # ESC 87 | break 88 | 89 | # カメラキャプチャ ##################################################### 90 | ret, image = cap.read() 91 | if not ret: 92 | break 93 | image = cv.flip(image, 1) # ミラー表示 94 | image_width, image_height = image.shape[1], image.shape[0] 95 | debug_image = copy.deepcopy(image) 96 | 97 | # 検出実施 ############################################################# 98 | image = cv.cvtColor(image, cv.COLOR_BGR2RGB) 99 | image.flags.writeable = False 100 | results = holistic.process(image) 101 | image.flags.writeable = True 102 | 103 | # Hands ############################################################### 104 | left_hand_landmarks = results.left_hand_landmarks 105 | right_hand_landmarks = results.right_hand_landmarks 106 | landmarks = None 107 | # 右手 108 | if (left_hand_landmarks is not None) and (not use_left_hand): 109 | landmarks = calc_hands_landmarks(image, left_hand_landmarks) 110 | # 左手 111 | if (right_hand_landmarks is not None) and (use_left_hand): 112 | landmarks = calc_hands_landmarks(image, right_hand_landmarks) 113 | 114 | if landmarks is not None: 115 | point_x_history.append(landmarks[ID_FINGER_TIP][0]) 116 | point_y_history.append(landmarks[ID_FINGER_TIP][1]) 117 | point_z_history.append(landmarks[ID_FINGER_TIP][2]) 118 | 119 | point_x = int(sum(point_x_history) / len(point_x_history)) 120 | point_y = int(sum(point_y_history) / len(point_y_history)) 121 | point_z = point_z_history[-1] 122 | 123 | # 描画 124 | debug_image = draw_hands_landmarks(debug_image, point_x, point_y, 125 | point_z, display_fps) 126 | 127 | mouse_x = int(display_size.width * (point_x / image_width)) 128 | mouse_y = int(display_size.height * (point_y / image_height)) 129 | 130 | if (time.time() - start_time) > 0.3: 131 | start_time = time.time() 132 | pyautogui.moveTo(mouse_x, mouse_y) 133 | 134 | if len(point_z_history) >= history_length: 135 | diff_z = max(point_z_history) - min(point_z_history) 136 | else: 137 | diff_z = 0 138 | max_index_z = point_z_history.index(max(point_z_history)) 139 | min_index_z = point_z_history.index(min(point_z_history)) 140 | if diff_z > 0.10 and max_index_z < min_index_z: 141 | pyautogui.click() 142 | point_z_history.clear() 143 | 144 | else: 145 | if len(point_z_history): 146 | point_z_history.popleft() 147 | 148 | # 画面反映 ############################################################# 149 | image_width, image_height = debug_image.shape[1], debug_image.shape[0] 150 | debug_image = cv.resize(debug_image, 151 | (int(image_width / 2), int(image_height / 2))) 152 | cv.imshow('Simple Virtual Mouse Demo', debug_image) 153 | 154 | cap.release() 155 | cv.destroyAllWindows() 156 | 157 | 158 | def calc_hands_landmarks(image, landmarks): 159 | image_width, image_height = image.shape[1], image.shape[0] 160 | 161 | landmark_point = [] 162 | 163 | for _, landmark in enumerate(landmarks.landmark): 164 | landmark_x = min(int(landmark.x * image_width), image_width - 1) 165 | landmark_y = min(int(landmark.y * image_height), image_height - 1) 166 | landmark_z = landmark.z 167 | 168 | landmark_point.append((landmark_x, landmark_y, landmark_z)) 169 | 170 | return landmark_point 171 | 172 | 173 | def draw_hands_landmarks( 174 | image, 175 | point_x, 176 | point_y, 177 | point_z, 178 | display_fps, 179 | ): 180 | cv.circle(image, (point_x, point_y), 12, (255, 255, 255), 8) 181 | cv.circle(image, (point_x, point_y), 12, (0, 0, 0), 2) 182 | 183 | cv.putText(image, "z:" + str(round(point_z, 3)), 184 | (point_x - 20, point_y - 20), cv.FONT_HERSHEY_SIMPLEX, 1, 185 | (255, 255, 255), 6, cv.LINE_AA) 186 | cv.putText(image, "z:" + str(round(point_z, 3)), 187 | (point_x - 20, point_y - 20), cv.FONT_HERSHEY_SIMPLEX, 1, 188 | (0, 0, 0), 2, cv.LINE_AA) 189 | 190 | cv.putText(image, "FPS:" + str(display_fps), (10, 50), 191 | cv.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 8, cv.LINE_AA) 192 | cv.putText(image, "FPS:" + str(display_fps), (10, 50), 193 | cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 3, cv.LINE_AA) 194 | 195 | return image 196 | 197 | 198 | if __name__ == '__main__': 199 | main() 200 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import csv 4 | import time 5 | import copy 6 | import argparse 7 | import itertools 8 | from collections import Counter 9 | from collections import deque 10 | 11 | import cv2 as cv 12 | import numpy as np 13 | import mediapipe as mp 14 | 15 | import pyautogui 16 | 17 | from utils import CvFpsCalc 18 | from model import KeyPointClassifier 19 | 20 | 21 | def get_args(): 22 | parser = argparse.ArgumentParser() 23 | 24 | parser.add_argument("--device", type=int, default=0) 25 | parser.add_argument("--width", help='cap width', type=int, default=960) 26 | parser.add_argument("--height", help='cap height', type=int, default=540) 27 | 28 | parser.add_argument('--use_static_image_mode', action='store_true') 29 | parser.add_argument("--min_detection_confidence", 30 | help='min_detection_confidence', 31 | type=float, 32 | default=0.7) 33 | parser.add_argument("--min_tracking_confidence", 34 | help='min_tracking_confidence', 35 | type=int, 36 | default=0.5) 37 | 38 | parser.add_argument("--margin_width", type=float, default=0.2) 39 | parser.add_argument("--margin_height", type=float, default=0.2) 40 | 41 | args = parser.parse_args() 42 | 43 | return args 44 | 45 | 46 | def main(): 47 | # 引数解析 ################################################################# 48 | args = get_args() 49 | 50 | cap_device = args.device 51 | cap_width = args.width 52 | cap_height = args.height 53 | 54 | use_static_image_mode = args.use_static_image_mode 55 | min_detection_confidence = args.min_detection_confidence 56 | min_tracking_confidence = args.min_tracking_confidence 57 | 58 | margin_width = args.margin_width 59 | margin_height = args.margin_height 60 | 61 | # カメラ準備 ############################################################### 62 | cap = cv.VideoCapture(cap_device) 63 | cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width) 64 | cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height) 65 | 66 | # モデルロード ############################################################# 67 | mp_hands = mp.solutions.hands 68 | hands = mp_hands.Hands( 69 | static_image_mode=use_static_image_mode, 70 | max_num_hands=1, 71 | min_detection_confidence=min_detection_confidence, 72 | min_tracking_confidence=min_tracking_confidence, 73 | ) 74 | 75 | keypoint_classifier = KeyPointClassifier() 76 | 77 | # ラベル読み込み ########################################################### 78 | with open('model/keypoint_classifier/keypoint_classifier_label.csv', 79 | encoding='utf-8-sig') as f: 80 | keypoint_classifier_labels = csv.reader(f) 81 | keypoint_classifier_labels = [ 82 | row[0] for row in keypoint_classifier_labels 83 | ] 84 | 85 | # FPS計測モジュール ######################################################## 86 | start_time = time.time() 87 | cvFpsCalc = CvFpsCalc(buffer_len=10) 88 | 89 | # ######################################################################## 90 | display_size = pyautogui.size() 91 | prev_hand_sign_id = 0 92 | 93 | while True: 94 | fps = cvFpsCalc.get() 95 | 96 | # キー処理(ESC：終了) ################################################# 97 | key = cv.waitKey(1) 98 | if key == 27: # ESC 99 | break 100 | 101 | # カメラキャプチャ ##################################################### 102 | ret, image = cap.read() 103 | if not ret: 104 | break 105 | image = cv.flip(image, 1) # ミラー表示 106 | image_width, image_height = image.shape[1], image.shape[0] 107 | debug_image = copy.deepcopy(image) 108 | 109 | # 検出実施 ############################################################# 110 | image = cv.cvtColor(image, cv.COLOR_BGR2RGB) 111 | 112 | image.flags.writeable = False 113 | results = hands.process(image) 114 | image.flags.writeable = True 115 | 116 | # #################################################################### 117 | if results.multi_hand_landmarks is not None: 118 | for hand_landmarks, handedness in zip(results.multi_hand_landmarks, 119 | results.multi_handedness): 120 | # 手の平重心計算 121 | cx, cy = calc_palm_moment(debug_image, hand_landmarks) 122 | # 外接矩形の計算 123 | brect = calc_bounding_rect(debug_image, hand_landmarks) 124 | # ランドマークの計算 125 | landmark_list = calc_landmark_list(debug_image, hand_landmarks) 126 | 127 | # 相対座標・正規化座標への変換 128 | pre_processed_landmark_list = pre_process_landmark( 129 | landmark_list) 130 | 131 | # ハンドサイン分類 132 | hand_sign_id = keypoint_classifier(pre_processed_landmark_list) 133 | 134 | # マウス移動 135 | area_x1 = int(image_width * margin_width) 136 | area_y1 = int(image_height * margin_height) 137 | area_x2 = image_width - area_x1 138 | area_y2 = image_height - area_y1 139 | mouse_x = int(display_size.width * ((cx - area_x1) / 140 | (area_x2 - area_x1))) 141 | mouse_y = int(display_size.height * ((cy - area_y1) / 142 | (area_y2 - area_y1))) 143 | mouse_x = 0 if mouse_x < 0 else mouse_x 144 | mouse_y = 0 if mouse_y < 0 else mouse_y 145 | mouse_x = display_size.width if mouse_x > display_size.width else mouse_x 146 | mouse_y = display_size.height if mouse_y > display_size.height else mouse_y 147 | if (time.time() - start_time) > 0.2: 148 | start_time = time.time() 149 | pyautogui.moveTo(mouse_x, mouse_y) 150 | 151 | # パー→グー：マウスクリック 152 | if hand_sign_id == 1 and prev_hand_sign_id == 0: 153 | pyautogui.click() 154 | prev_hand_sign_id = hand_sign_id 155 | elif prev_hand_sign_id == 1 and hand_sign_id == 0: 156 | prev_hand_sign_id = hand_sign_id 157 | 158 | # 描画 159 | debug_image = draw_bounding_rect(True, debug_image, brect) 160 | debug_image = draw_landmarks(debug_image, landmark_list, cx, 161 | cy) 162 | debug_image = draw_info_text( 163 | debug_image, 164 | brect, 165 | handedness, 166 | keypoint_classifier_labels[hand_sign_id], 167 | ) 168 | 169 | debug_image = draw_info(debug_image, fps, margin_width, margin_height) 170 | 171 | # 画面反映 ############################################################# 172 | debug_image = cv.resize(debug_image, 173 | (int(image_width / 2), int(image_height / 2))) 174 | cv.imshow('Hand Gesture Recognition', debug_image) 175 | 176 | cap.release() 177 | cv.destroyAllWindows() 178 | 179 | 180 | def calc_palm_moment(image, landmarks): 181 | image_width, image_height = image.shape[1], image.shape[0] 182 | 183 | palm_array = np.empty((0, 2), int) 184 | 185 | for index, landmark in enumerate(landmarks.landmark): 186 | landmark_x = min(int(landmark.x * image_width), image_width - 1) 187 | landmark_y = min(int(landmark.y * image_height), image_height - 1) 188 | 189 | landmark_point = [np.array((landmark_x, landmark_y))] 190 | 191 | if index == 0: # 手首1 192 | palm_array = np.append(palm_array, landmark_point, axis=0) 193 | if index == 1: # 手首2 194 | palm_array = np.append(palm_array, landmark_point, axis=0) 195 | if index == 5: # 人差指：付け根 196 | palm_array = np.append(palm_array, landmark_point, axis=0) 197 | if index == 9: # 中指：付け根 198 | palm_array = np.append(palm_array, landmark_point, axis=0) 199 | if index == 13: # 薬指：付け根 200 | palm_array = np.append(palm_array, landmark_point, axis=0) 201 | if index == 17: # 小指：付け根 202 | palm_array = np.append(palm_array, landmark_point, axis=0) 203 | M = cv.moments(palm_array) 204 | cx, cy = 0, 0 205 | if M['m00'] != 0: 206 | cx = int(M['m10'] / M['m00']) 207 | cy = int(M['m01'] / M['m00']) 208 | 209 | return cx, cy 210 | 211 | 212 | def calc_bounding_rect(image, landmarks): 213 | image_width, image_height = image.shape[1], image.shape[0] 214 | 215 | landmark_array = np.empty((0, 2), int) 216 | 217 | for _, landmark in enumerate(landmarks.landmark): 218 | landmark_x = min(int(landmark.x * image_width), image_width - 1) 219 | landmark_y = min(int(landmark.y * image_height), image_height - 1) 220 | 221 | landmark_point = [np.array((landmark_x, landmark_y))] 222 | 223 | landmark_array = np.append(landmark_array, landmark_point, axis=0) 224 | 225 | x, y, w, h = cv.boundingRect(landmark_array) 226 | 227 | return [x, y, x + w, y + h] 228 | 229 | 230 | def calc_landmark_list(image, landmarks): 231 | image_width, image_height = image.shape[1], image.shape[0] 232 | 233 | landmark_point = [] 234 | 235 | # キーポイント 236 | for _, landmark in enumerate(landmarks.landmark): 237 | landmark_x = min(int(landmark.x * image_width), image_width - 1) 238 | landmark_y = min(int(landmark.y * image_height), image_height - 1) 239 | # landmark_z = landmark.z 240 | 241 | landmark_point.append([landmark_x, landmark_y]) 242 | 243 | return landmark_point 244 | 245 | 246 | def pre_process_landmark(landmark_list): 247 | temp_landmark_list = copy.deepcopy(landmark_list) 248 | 249 | # 相対座標に変換 250 | base_x, base_y = 0, 0 251 | for index, landmark_point in enumerate(temp_landmark_list): 252 | if index == 0: 253 | base_x, base_y = landmark_point[0], landmark_point[1] 254 | 255 | temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x 256 | temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y 257 | 258 | # 1次元リストに変換 259 | temp_landmark_list = list( 260 | itertools.chain.from_iterable(temp_landmark_list)) 261 | 262 | # 正規化 263 | max_value = max(list(map(abs, temp_landmark_list))) 264 | 265 | def normalize_(n): 266 | return n / max_value 267 | 268 | temp_landmark_list = list(map(normalize_, temp_landmark_list)) 269 | 270 | return temp_landmark_list 271 | 272 | 273 | def draw_landmarks(image, landmark_point, cx, cy): 274 | # 接続線 275 | if len(landmark_point) > 0: 276 | # 親指 277 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), 278 | (0, 0, 0), 6) 279 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), 280 | (255, 255, 255), 2) 281 | cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), 282 | (0, 0, 0), 6) 283 | cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), 284 | (255, 255, 255), 2) 285 | 286 | # 人差指 287 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), 288 | (0, 0, 0), 6) 289 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), 290 | (255, 255, 255), 2) 291 | cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), 292 | (0, 0, 0), 6) 293 | cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), 294 | (255, 255, 255), 2) 295 | cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), 296 | (0, 0, 0), 6) 297 | cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), 298 | (255, 255, 255), 2) 299 | 300 | # 中指 301 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), 302 | (0, 0, 0), 6) 303 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), 304 | (255, 255, 255), 2) 305 | cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), 306 | (0, 0, 0), 6) 307 | cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), 308 | (255, 255, 255), 2) 309 | cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), 310 | (0, 0, 0), 6) 311 | cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), 312 | (255, 255, 255), 2) 313 | 314 | # 薬指 315 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), 316 | (0, 0, 0), 6) 317 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), 318 | (255, 255, 255), 2) 319 | cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), 320 | (0, 0, 0), 6) 321 | cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), 322 | (255, 255, 255), 2) 323 | cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), 324 | (0, 0, 0), 6) 325 | cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), 326 | (255, 255, 255), 2) 327 | 328 | # 小指 329 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), 330 | (0, 0, 0), 6) 331 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), 332 | (255, 255, 255), 2) 333 | cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), 334 | (0, 0, 0), 6) 335 | cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), 336 | (255, 255, 255), 2) 337 | cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), 338 | (0, 0, 0), 6) 339 | cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), 340 | (255, 255, 255), 2) 341 | 342 | # 手の平 343 | cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), 344 | (0, 0, 0), 6) 345 | cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), 346 | (255, 255, 255), 2) 347 | cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), 348 | (0, 0, 0), 6) 349 | cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), 350 | (255, 255, 255), 2) 351 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), 352 | (0, 0, 0), 6) 353 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), 354 | (255, 255, 255), 2) 355 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), 356 | (0, 0, 0), 6) 357 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), 358 | (255, 255, 255), 2) 359 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), 360 | (0, 0, 0), 6) 361 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), 362 | (255, 255, 255), 2) 363 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), 364 | (0, 0, 0), 6) 365 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), 366 | (255, 255, 255), 2) 367 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), 368 | (0, 0, 0), 6) 369 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), 370 | (255, 255, 255), 2) 371 | 372 | # キーポイント 373 | for index, landmark in enumerate(landmark_point): 374 | if index == 0: # 手首1 375 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 376 | -1) 377 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 378 | if index == 1: # 手首2 379 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 380 | -1) 381 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 382 | if index == 2: # 親指：付け根 383 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 384 | -1) 385 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 386 | if index == 3: # 親指：第1関節 387 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 388 | -1) 389 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 390 | if index == 4: # 親指：指先 391 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255), 392 | -1) 393 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1) 394 | if index == 5: # 人差指：付け根 395 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 396 | -1) 397 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 398 | if index == 6: # 人差指：第2関節 399 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 400 | -1) 401 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 402 | if index == 7: # 人差指：第1関節 403 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 404 | -1) 405 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 406 | if index == 8: # 人差指：指先 407 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255), 408 | -1) 409 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1) 410 | if index == 9: # 中指：付け根 411 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 412 | -1) 413 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 414 | if index == 10: # 中指：第2関節 415 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 416 | -1) 417 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 418 | if index == 11: # 中指：第1関節 419 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 420 | -1) 421 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 422 | if index == 12: # 中指：指先 423 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255), 424 | -1) 425 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1) 426 | if index == 13: # 薬指：付け根 427 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 428 | -1) 429 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 430 | if index == 14: # 薬指：第2関節 431 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 432 | -1) 433 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 434 | if index == 15: # 薬指：第1関節 435 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 436 | -1) 437 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 438 | if index == 16: # 薬指：指先 439 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255), 440 | -1) 441 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1) 442 | if index == 17: # 小指：付け根 443 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 444 | -1) 445 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 446 | if index == 18: # 小指：第2関節 447 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 448 | -1) 449 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 450 | if index == 19: # 小指：第1関節 451 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255), 452 | -1) 453 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1) 454 | if index == 20: # 小指：指先 455 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255), 456 | -1) 457 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1) 458 | 459 | cv.circle(image, (cx, cy), 10, (255, 255, 255), -1) 460 | cv.circle(image, (cx, cy), 8, (0, 0, 0), 1) 461 | 462 | return image 463 | 464 | 465 | def draw_bounding_rect(use_brect, image, brect): 466 | if use_brect: 467 | # 外接矩形 468 | cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]), 469 | (0, 0, 0), 1) 470 | 471 | return image 472 | 473 | 474 | def draw_info_text(image, brect, handedness, hand_sign_text): 475 | cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 22), 476 | (0, 0, 0), -1) 477 | 478 | info_text = handedness.classification[0].label[0:] 479 | if hand_sign_text != "": 480 | info_text = info_text + ':' + hand_sign_text 481 | cv.putText(image, info_text, (brect[0] + 5, brect[1] - 4), 482 | cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA) 483 | 484 | return image 485 | 486 | 487 | def draw_info(image, fps, margin_width, margin_height): 488 | image_width, image_height = image.shape[1], image.shape[0] 489 | 490 | area_x1 = int(image_width * margin_width) 491 | area_y1 = int(image_height * margin_height) 492 | area_x2 = image_width - area_x1 493 | area_y2 = image_height - area_y1 494 | cv.rectangle(image, (area_x1, area_y1), (area_x2, area_y2), 495 | (255, 255, 255), 2) 496 | 497 | cv.putText(image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 498 | 1.0, (0, 0, 0), 4, cv.LINE_AA) 499 | cv.putText(image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 500 | 1.0, (255, 255, 255), 2, cv.LINE_AA) 501 | return image 502 | 503 | 504 | if __name__ == '__main__': 505 | main() 506 | --------------------------------------------------------------------------------