├── utils
├── __init__.py
└── cvfpscalc.py
├── model
├── __init__.py
└── keypoint_classifier
│ ├── keypoint_classifier_label.csv
│ ├── keypoint_classifier.tflite
│ └── keypoint_classifier.py
├── README.md
├── .gitignore
├── _old
└── app.py
├── LICENSE
└── app.py
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from utils.cvfpscalc import CvFpsCalc
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.keypoint_classifier.keypoint_classifier import KeyPointClassifier
--------------------------------------------------------------------------------
/model/keypoint_classifier/keypoint_classifier_label.csv:
--------------------------------------------------------------------------------
1 | Open
2 | Close
3 | Pointer
4 |
--------------------------------------------------------------------------------
/model/keypoint_classifier/keypoint_classifier.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kazuhito00/simple-virtual-mouse-using-mediapipe/HEAD/model/keypoint_classifier/keypoint_classifier.tflite
--------------------------------------------------------------------------------
/utils/cvfpscalc.py:
--------------------------------------------------------------------------------
1 | from collections import deque
2 | import cv2 as cv
3 |
4 |
5 | class CvFpsCalc(object):
6 | def __init__(self, buffer_len=1):
7 | self._start_tick = cv.getTickCount()
8 | self._freq = 1000.0 / cv.getTickFrequency()
9 | self._difftimes = deque(maxlen=buffer_len)
10 |
11 | def get(self):
12 | current_tick = cv.getTickCount()
13 | different_time = (current_tick - self._start_tick) * self._freq
14 | self._start_tick = current_tick
15 |
16 | self._difftimes.append(different_time)
17 |
18 | fps = 1000.0 / (sum(self._difftimes) / len(self._difftimes))
19 | fps_rounded = round(fps, 2)
20 |
21 | return fps_rounded
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # simple-virtual-mouse-using-mediapipe
2 | MediaPipeを用いたハンドジェスチャーによる簡単なマウス操作を行うプログラムです。
3 | マウス移動:手の移動
4 | マウス左クリック:手をパーからグーに変える
5 |
6 |
7 | # Requirements
8 | * mediapipe 0.8.1 or Later
9 | * PyAutoGUI 0.9.52 or Later
10 | * OpenCV 3.4.2 or Later
11 | * Tensorflow 2.3.0 or Later
12 |
13 | # Demo
14 | Webカメラを使ったデモの実行方法は以下です。
15 | ```bash
16 | python app.py
17 | ```
18 |
19 | デモ実行時には、以下のオプションが指定可能です。
20 | * --device
カメラデバイス番号の指定 (デフォルト:0)
21 | * --width
カメラキャプチャ時の横幅 (デフォルト:960)
22 | * --height
カメラキャプチャ時の縦幅 (デフォルト:540)
23 | * --min_detection_confidence
24 | 検出信頼値の閾値 (デフォルト:0.7)
25 | * --min_tracking_confidence
26 | トラッキング信頼値の閾値 (デフォルト:0.5)
27 | * --margin_width
ハンドジェスチャーの操作範囲(横幅のマージン割合)(デフォルト:0.2)
28 | * --margin_height
ハンドジェスチャーの操作範囲(縦幅のマージン割合)(デフォルト:0.2)
29 |
30 | # Reference
31 | * [MediaPipe:Hands](https://google.github.io/mediapipe/solutions/hands)
32 |
33 | # Author
34 | 高橋かずひと(https://twitter.com/KzhtTkhs)
35 |
36 | # License
37 | simple-virtual-mouse-using-mediapipe under [Apache-2.0 License](LICENSE).
38 |
--------------------------------------------------------------------------------
/model/keypoint_classifier/keypoint_classifier.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import numpy as np
4 | import tensorflow as tf
5 |
6 |
7 | class KeyPointClassifier(object):
8 | def __init__(
9 | self,
10 | model_path='model/keypoint_classifier/keypoint_classifier.tflite',
11 | num_threads=1,
12 | ):
13 | self.interpreter = tf.lite.Interpreter(model_path=model_path,
14 | num_threads=num_threads)
15 |
16 | self.interpreter.allocate_tensors()
17 | self.input_details = self.interpreter.get_input_details()
18 | self.output_details = self.interpreter.get_output_details()
19 |
20 | def __call__(
21 | self,
22 | landmark_list,
23 | ):
24 | input_details_tensor_index = self.input_details[0]['index']
25 | self.interpreter.set_tensor(
26 | input_details_tensor_index,
27 | np.array([landmark_list], dtype=np.float32))
28 | self.interpreter.invoke()
29 |
30 | output_details_tensor_index = self.output_details[0]['index']
31 |
32 | result = self.interpreter.get_tensor(output_details_tensor_index)
33 |
34 | result_index = np.argmax(np.squeeze(result))
35 |
36 | return result_index
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # bat
132 | *.bat
--------------------------------------------------------------------------------
/_old/app.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import copy
4 | import time
5 | import argparse
6 | from collections import deque
7 |
8 | import cv2 as cv
9 | import numpy as np
10 | import mediapipe as mp
11 |
12 | import pyautogui
13 |
14 | from utils import CvFpsCalc
15 |
16 |
17 | def get_args():
18 | parser = argparse.ArgumentParser()
19 |
20 | parser.add_argument("--device", type=int, default=0)
21 | parser.add_argument("--width", help='cap width', type=int, default=960)
22 | parser.add_argument("--height", help='cap height', type=int, default=540)
23 |
24 | parser.add_argument("--min_detection_confidence",
25 | help='face mesh min_detection_confidence',
26 | type=float,
27 | default=0.75)
28 | parser.add_argument("--min_tracking_confidence",
29 | help='face mesh min_tracking_confidence',
30 | type=int,
31 | default=0.75)
32 |
33 | parser.add_argument('--use_left_hand', action='store_true')
34 |
35 | args = parser.parse_args()
36 |
37 | return args
38 |
39 |
40 | def main():
41 | # 引数解析 #################################################################
42 | args = get_args()
43 |
44 | cap_device = args.device
45 | cap_width = args.width
46 | cap_height = args.height
47 |
48 | min_detection_confidence = args.min_detection_confidence
49 | min_tracking_confidence = args.min_tracking_confidence
50 |
51 | use_left_hand = args.use_left_hand
52 |
53 | # カメラ準備 ###############################################################
54 | cap = cv.VideoCapture(cap_device)
55 | cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
56 | cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)
57 |
58 | # モデルロード #############################################################
59 | mp_holistic = mp.solutions.holistic
60 | holistic = mp_holistic.Holistic(
61 | upper_body_only=False,
62 | min_detection_confidence=min_detection_confidence,
63 | min_tracking_confidence=min_tracking_confidence,
64 | )
65 |
66 | # ########################################################################
67 | # 人差指のID
68 | ID_FINGER_TIP = 8
69 |
70 | history_length = 6
71 | point_x_history = deque(maxlen=history_length)
72 | point_y_history = deque(maxlen=history_length)
73 | point_z_history = deque(maxlen=history_length)
74 |
75 | display_size = pyautogui.size()
76 |
77 | # FPS計測モジュール ########################################################
78 | start_time = time.time()
79 | cvFpsCalc = CvFpsCalc(buffer_len=10)
80 |
81 | while True:
82 | display_fps = cvFpsCalc.get()
83 |
84 | # キー処理(ESC:終了) #################################################
85 | key = cv.waitKey(1)
86 | if key == 27: # ESC
87 | break
88 |
89 | # カメラキャプチャ #####################################################
90 | ret, image = cap.read()
91 | if not ret:
92 | break
93 | image = cv.flip(image, 1) # ミラー表示
94 | image_width, image_height = image.shape[1], image.shape[0]
95 | debug_image = copy.deepcopy(image)
96 |
97 | # 検出実施 #############################################################
98 | image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
99 | image.flags.writeable = False
100 | results = holistic.process(image)
101 | image.flags.writeable = True
102 |
103 | # Hands ###############################################################
104 | left_hand_landmarks = results.left_hand_landmarks
105 | right_hand_landmarks = results.right_hand_landmarks
106 | landmarks = None
107 | # 右手
108 | if (left_hand_landmarks is not None) and (not use_left_hand):
109 | landmarks = calc_hands_landmarks(image, left_hand_landmarks)
110 | # 左手
111 | if (right_hand_landmarks is not None) and (use_left_hand):
112 | landmarks = calc_hands_landmarks(image, right_hand_landmarks)
113 |
114 | if landmarks is not None:
115 | point_x_history.append(landmarks[ID_FINGER_TIP][0])
116 | point_y_history.append(landmarks[ID_FINGER_TIP][1])
117 | point_z_history.append(landmarks[ID_FINGER_TIP][2])
118 |
119 | point_x = int(sum(point_x_history) / len(point_x_history))
120 | point_y = int(sum(point_y_history) / len(point_y_history))
121 | point_z = point_z_history[-1]
122 |
123 | # 描画
124 | debug_image = draw_hands_landmarks(debug_image, point_x, point_y,
125 | point_z, display_fps)
126 |
127 | mouse_x = int(display_size.width * (point_x / image_width))
128 | mouse_y = int(display_size.height * (point_y / image_height))
129 |
130 | if (time.time() - start_time) > 0.3:
131 | start_time = time.time()
132 | pyautogui.moveTo(mouse_x, mouse_y)
133 |
134 | if len(point_z_history) >= history_length:
135 | diff_z = max(point_z_history) - min(point_z_history)
136 | else:
137 | diff_z = 0
138 | max_index_z = point_z_history.index(max(point_z_history))
139 | min_index_z = point_z_history.index(min(point_z_history))
140 | if diff_z > 0.10 and max_index_z < min_index_z:
141 | pyautogui.click()
142 | point_z_history.clear()
143 |
144 | else:
145 | if len(point_z_history):
146 | point_z_history.popleft()
147 |
148 | # 画面反映 #############################################################
149 | image_width, image_height = debug_image.shape[1], debug_image.shape[0]
150 | debug_image = cv.resize(debug_image,
151 | (int(image_width / 2), int(image_height / 2)))
152 | cv.imshow('Simple Virtual Mouse Demo', debug_image)
153 |
154 | cap.release()
155 | cv.destroyAllWindows()
156 |
157 |
158 | def calc_hands_landmarks(image, landmarks):
159 | image_width, image_height = image.shape[1], image.shape[0]
160 |
161 | landmark_point = []
162 |
163 | for _, landmark in enumerate(landmarks.landmark):
164 | landmark_x = min(int(landmark.x * image_width), image_width - 1)
165 | landmark_y = min(int(landmark.y * image_height), image_height - 1)
166 | landmark_z = landmark.z
167 |
168 | landmark_point.append((landmark_x, landmark_y, landmark_z))
169 |
170 | return landmark_point
171 |
172 |
173 | def draw_hands_landmarks(
174 | image,
175 | point_x,
176 | point_y,
177 | point_z,
178 | display_fps,
179 | ):
180 | cv.circle(image, (point_x, point_y), 12, (255, 255, 255), 8)
181 | cv.circle(image, (point_x, point_y), 12, (0, 0, 0), 2)
182 |
183 | cv.putText(image, "z:" + str(round(point_z, 3)),
184 | (point_x - 20, point_y - 20), cv.FONT_HERSHEY_SIMPLEX, 1,
185 | (255, 255, 255), 6, cv.LINE_AA)
186 | cv.putText(image, "z:" + str(round(point_z, 3)),
187 | (point_x - 20, point_y - 20), cv.FONT_HERSHEY_SIMPLEX, 1,
188 | (0, 0, 0), 2, cv.LINE_AA)
189 |
190 | cv.putText(image, "FPS:" + str(display_fps), (10, 50),
191 | cv.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 8, cv.LINE_AA)
192 | cv.putText(image, "FPS:" + str(display_fps), (10, 50),
193 | cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 3, cv.LINE_AA)
194 |
195 | return image
196 |
197 |
198 | if __name__ == '__main__':
199 | main()
200 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import csv
4 | import time
5 | import copy
6 | import argparse
7 | import itertools
8 | from collections import Counter
9 | from collections import deque
10 |
11 | import cv2 as cv
12 | import numpy as np
13 | import mediapipe as mp
14 |
15 | import pyautogui
16 |
17 | from utils import CvFpsCalc
18 | from model import KeyPointClassifier
19 |
20 |
21 | def get_args():
22 | parser = argparse.ArgumentParser()
23 |
24 | parser.add_argument("--device", type=int, default=0)
25 | parser.add_argument("--width", help='cap width', type=int, default=960)
26 | parser.add_argument("--height", help='cap height', type=int, default=540)
27 |
28 | parser.add_argument('--use_static_image_mode', action='store_true')
29 | parser.add_argument("--min_detection_confidence",
30 | help='min_detection_confidence',
31 | type=float,
32 | default=0.7)
33 | parser.add_argument("--min_tracking_confidence",
34 | help='min_tracking_confidence',
35 | type=int,
36 | default=0.5)
37 |
38 | parser.add_argument("--margin_width", type=float, default=0.2)
39 | parser.add_argument("--margin_height", type=float, default=0.2)
40 |
41 | args = parser.parse_args()
42 |
43 | return args
44 |
45 |
46 | def main():
47 | # 引数解析 #################################################################
48 | args = get_args()
49 |
50 | cap_device = args.device
51 | cap_width = args.width
52 | cap_height = args.height
53 |
54 | use_static_image_mode = args.use_static_image_mode
55 | min_detection_confidence = args.min_detection_confidence
56 | min_tracking_confidence = args.min_tracking_confidence
57 |
58 | margin_width = args.margin_width
59 | margin_height = args.margin_height
60 |
61 | # カメラ準備 ###############################################################
62 | cap = cv.VideoCapture(cap_device)
63 | cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
64 | cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)
65 |
66 | # モデルロード #############################################################
67 | mp_hands = mp.solutions.hands
68 | hands = mp_hands.Hands(
69 | static_image_mode=use_static_image_mode,
70 | max_num_hands=1,
71 | min_detection_confidence=min_detection_confidence,
72 | min_tracking_confidence=min_tracking_confidence,
73 | )
74 |
75 | keypoint_classifier = KeyPointClassifier()
76 |
77 | # ラベル読み込み ###########################################################
78 | with open('model/keypoint_classifier/keypoint_classifier_label.csv',
79 | encoding='utf-8-sig') as f:
80 | keypoint_classifier_labels = csv.reader(f)
81 | keypoint_classifier_labels = [
82 | row[0] for row in keypoint_classifier_labels
83 | ]
84 |
85 | # FPS計測モジュール ########################################################
86 | start_time = time.time()
87 | cvFpsCalc = CvFpsCalc(buffer_len=10)
88 |
89 | # ########################################################################
90 | display_size = pyautogui.size()
91 | prev_hand_sign_id = 0
92 |
93 | while True:
94 | fps = cvFpsCalc.get()
95 |
96 | # キー処理(ESC:終了) #################################################
97 | key = cv.waitKey(1)
98 | if key == 27: # ESC
99 | break
100 |
101 | # カメラキャプチャ #####################################################
102 | ret, image = cap.read()
103 | if not ret:
104 | break
105 | image = cv.flip(image, 1) # ミラー表示
106 | image_width, image_height = image.shape[1], image.shape[0]
107 | debug_image = copy.deepcopy(image)
108 |
109 | # 検出実施 #############################################################
110 | image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
111 |
112 | image.flags.writeable = False
113 | results = hands.process(image)
114 | image.flags.writeable = True
115 |
116 | # ####################################################################
117 | if results.multi_hand_landmarks is not None:
118 | for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
119 | results.multi_handedness):
120 | # 手の平重心計算
121 | cx, cy = calc_palm_moment(debug_image, hand_landmarks)
122 | # 外接矩形の計算
123 | brect = calc_bounding_rect(debug_image, hand_landmarks)
124 | # ランドマークの計算
125 | landmark_list = calc_landmark_list(debug_image, hand_landmarks)
126 |
127 | # 相対座標・正規化座標への変換
128 | pre_processed_landmark_list = pre_process_landmark(
129 | landmark_list)
130 |
131 | # ハンドサイン分類
132 | hand_sign_id = keypoint_classifier(pre_processed_landmark_list)
133 |
134 | # マウス移動
135 | area_x1 = int(image_width * margin_width)
136 | area_y1 = int(image_height * margin_height)
137 | area_x2 = image_width - area_x1
138 | area_y2 = image_height - area_y1
139 | mouse_x = int(display_size.width * ((cx - area_x1) /
140 | (area_x2 - area_x1)))
141 | mouse_y = int(display_size.height * ((cy - area_y1) /
142 | (area_y2 - area_y1)))
143 | mouse_x = 0 if mouse_x < 0 else mouse_x
144 | mouse_y = 0 if mouse_y < 0 else mouse_y
145 | mouse_x = display_size.width if mouse_x > display_size.width else mouse_x
146 | mouse_y = display_size.height if mouse_y > display_size.height else mouse_y
147 | if (time.time() - start_time) > 0.2:
148 | start_time = time.time()
149 | pyautogui.moveTo(mouse_x, mouse_y)
150 |
151 | # パー→グー:マウスクリック
152 | if hand_sign_id == 1 and prev_hand_sign_id == 0:
153 | pyautogui.click()
154 | prev_hand_sign_id = hand_sign_id
155 | elif prev_hand_sign_id == 1 and hand_sign_id == 0:
156 | prev_hand_sign_id = hand_sign_id
157 |
158 | # 描画
159 | debug_image = draw_bounding_rect(True, debug_image, brect)
160 | debug_image = draw_landmarks(debug_image, landmark_list, cx,
161 | cy)
162 | debug_image = draw_info_text(
163 | debug_image,
164 | brect,
165 | handedness,
166 | keypoint_classifier_labels[hand_sign_id],
167 | )
168 |
169 | debug_image = draw_info(debug_image, fps, margin_width, margin_height)
170 |
171 | # 画面反映 #############################################################
172 | debug_image = cv.resize(debug_image,
173 | (int(image_width / 2), int(image_height / 2)))
174 | cv.imshow('Hand Gesture Recognition', debug_image)
175 |
176 | cap.release()
177 | cv.destroyAllWindows()
178 |
179 |
180 | def calc_palm_moment(image, landmarks):
181 | image_width, image_height = image.shape[1], image.shape[0]
182 |
183 | palm_array = np.empty((0, 2), int)
184 |
185 | for index, landmark in enumerate(landmarks.landmark):
186 | landmark_x = min(int(landmark.x * image_width), image_width - 1)
187 | landmark_y = min(int(landmark.y * image_height), image_height - 1)
188 |
189 | landmark_point = [np.array((landmark_x, landmark_y))]
190 |
191 | if index == 0: # 手首1
192 | palm_array = np.append(palm_array, landmark_point, axis=0)
193 | if index == 1: # 手首2
194 | palm_array = np.append(palm_array, landmark_point, axis=0)
195 | if index == 5: # 人差指:付け根
196 | palm_array = np.append(palm_array, landmark_point, axis=0)
197 | if index == 9: # 中指:付け根
198 | palm_array = np.append(palm_array, landmark_point, axis=0)
199 | if index == 13: # 薬指:付け根
200 | palm_array = np.append(palm_array, landmark_point, axis=0)
201 | if index == 17: # 小指:付け根
202 | palm_array = np.append(palm_array, landmark_point, axis=0)
203 | M = cv.moments(palm_array)
204 | cx, cy = 0, 0
205 | if M['m00'] != 0:
206 | cx = int(M['m10'] / M['m00'])
207 | cy = int(M['m01'] / M['m00'])
208 |
209 | return cx, cy
210 |
211 |
212 | def calc_bounding_rect(image, landmarks):
213 | image_width, image_height = image.shape[1], image.shape[0]
214 |
215 | landmark_array = np.empty((0, 2), int)
216 |
217 | for _, landmark in enumerate(landmarks.landmark):
218 | landmark_x = min(int(landmark.x * image_width), image_width - 1)
219 | landmark_y = min(int(landmark.y * image_height), image_height - 1)
220 |
221 | landmark_point = [np.array((landmark_x, landmark_y))]
222 |
223 | landmark_array = np.append(landmark_array, landmark_point, axis=0)
224 |
225 | x, y, w, h = cv.boundingRect(landmark_array)
226 |
227 | return [x, y, x + w, y + h]
228 |
229 |
230 | def calc_landmark_list(image, landmarks):
231 | image_width, image_height = image.shape[1], image.shape[0]
232 |
233 | landmark_point = []
234 |
235 | # キーポイント
236 | for _, landmark in enumerate(landmarks.landmark):
237 | landmark_x = min(int(landmark.x * image_width), image_width - 1)
238 | landmark_y = min(int(landmark.y * image_height), image_height - 1)
239 | # landmark_z = landmark.z
240 |
241 | landmark_point.append([landmark_x, landmark_y])
242 |
243 | return landmark_point
244 |
245 |
246 | def pre_process_landmark(landmark_list):
247 | temp_landmark_list = copy.deepcopy(landmark_list)
248 |
249 | # 相対座標に変換
250 | base_x, base_y = 0, 0
251 | for index, landmark_point in enumerate(temp_landmark_list):
252 | if index == 0:
253 | base_x, base_y = landmark_point[0], landmark_point[1]
254 |
255 | temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
256 | temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
257 |
258 | # 1次元リストに変換
259 | temp_landmark_list = list(
260 | itertools.chain.from_iterable(temp_landmark_list))
261 |
262 | # 正規化
263 | max_value = max(list(map(abs, temp_landmark_list)))
264 |
265 | def normalize_(n):
266 | return n / max_value
267 |
268 | temp_landmark_list = list(map(normalize_, temp_landmark_list))
269 |
270 | return temp_landmark_list
271 |
272 |
273 | def draw_landmarks(image, landmark_point, cx, cy):
274 | # 接続線
275 | if len(landmark_point) > 0:
276 | # 親指
277 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]),
278 | (0, 0, 0), 6)
279 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]),
280 | (255, 255, 255), 2)
281 | cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]),
282 | (0, 0, 0), 6)
283 | cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]),
284 | (255, 255, 255), 2)
285 |
286 | # 人差指
287 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]),
288 | (0, 0, 0), 6)
289 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]),
290 | (255, 255, 255), 2)
291 | cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]),
292 | (0, 0, 0), 6)
293 | cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]),
294 | (255, 255, 255), 2)
295 | cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]),
296 | (0, 0, 0), 6)
297 | cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]),
298 | (255, 255, 255), 2)
299 |
300 | # 中指
301 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]),
302 | (0, 0, 0), 6)
303 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]),
304 | (255, 255, 255), 2)
305 | cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]),
306 | (0, 0, 0), 6)
307 | cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]),
308 | (255, 255, 255), 2)
309 | cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]),
310 | (0, 0, 0), 6)
311 | cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]),
312 | (255, 255, 255), 2)
313 |
314 | # 薬指
315 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]),
316 | (0, 0, 0), 6)
317 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]),
318 | (255, 255, 255), 2)
319 | cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]),
320 | (0, 0, 0), 6)
321 | cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]),
322 | (255, 255, 255), 2)
323 | cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]),
324 | (0, 0, 0), 6)
325 | cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]),
326 | (255, 255, 255), 2)
327 |
328 | # 小指
329 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]),
330 | (0, 0, 0), 6)
331 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]),
332 | (255, 255, 255), 2)
333 | cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]),
334 | (0, 0, 0), 6)
335 | cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]),
336 | (255, 255, 255), 2)
337 | cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]),
338 | (0, 0, 0), 6)
339 | cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]),
340 | (255, 255, 255), 2)
341 |
342 | # 手の平
343 | cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]),
344 | (0, 0, 0), 6)
345 | cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]),
346 | (255, 255, 255), 2)
347 | cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]),
348 | (0, 0, 0), 6)
349 | cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]),
350 | (255, 255, 255), 2)
351 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]),
352 | (0, 0, 0), 6)
353 | cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]),
354 | (255, 255, 255), 2)
355 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]),
356 | (0, 0, 0), 6)
357 | cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]),
358 | (255, 255, 255), 2)
359 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]),
360 | (0, 0, 0), 6)
361 | cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]),
362 | (255, 255, 255), 2)
363 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]),
364 | (0, 0, 0), 6)
365 | cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]),
366 | (255, 255, 255), 2)
367 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]),
368 | (0, 0, 0), 6)
369 | cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]),
370 | (255, 255, 255), 2)
371 |
372 | # キーポイント
373 | for index, landmark in enumerate(landmark_point):
374 | if index == 0: # 手首1
375 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
376 | -1)
377 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
378 | if index == 1: # 手首2
379 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
380 | -1)
381 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
382 | if index == 2: # 親指:付け根
383 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
384 | -1)
385 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
386 | if index == 3: # 親指:第1関節
387 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
388 | -1)
389 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
390 | if index == 4: # 親指:指先
391 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255),
392 | -1)
393 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1)
394 | if index == 5: # 人差指:付け根
395 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
396 | -1)
397 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
398 | if index == 6: # 人差指:第2関節
399 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
400 | -1)
401 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
402 | if index == 7: # 人差指:第1関節
403 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
404 | -1)
405 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
406 | if index == 8: # 人差指:指先
407 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255),
408 | -1)
409 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1)
410 | if index == 9: # 中指:付け根
411 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
412 | -1)
413 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
414 | if index == 10: # 中指:第2関節
415 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
416 | -1)
417 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
418 | if index == 11: # 中指:第1関節
419 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
420 | -1)
421 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
422 | if index == 12: # 中指:指先
423 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255),
424 | -1)
425 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1)
426 | if index == 13: # 薬指:付け根
427 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
428 | -1)
429 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
430 | if index == 14: # 薬指:第2関節
431 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
432 | -1)
433 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
434 | if index == 15: # 薬指:第1関節
435 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
436 | -1)
437 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
438 | if index == 16: # 薬指:指先
439 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255),
440 | -1)
441 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1)
442 | if index == 17: # 小指:付け根
443 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
444 | -1)
445 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
446 | if index == 18: # 小指:第2関節
447 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
448 | -1)
449 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
450 | if index == 19: # 小指:第1関節
451 | cv.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
452 | -1)
453 | cv.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
454 | if index == 20: # 小指:指先
455 | cv.circle(image, (landmark[0], landmark[1]), 8, (255, 255, 255),
456 | -1)
457 | cv.circle(image, (landmark[0], landmark[1]), 8, (0, 0, 0), 1)
458 |
459 | cv.circle(image, (cx, cy), 10, (255, 255, 255), -1)
460 | cv.circle(image, (cx, cy), 8, (0, 0, 0), 1)
461 |
462 | return image
463 |
464 |
465 | def draw_bounding_rect(use_brect, image, brect):
466 | if use_brect:
467 | # 外接矩形
468 | cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]),
469 | (0, 0, 0), 1)
470 |
471 | return image
472 |
473 |
474 | def draw_info_text(image, brect, handedness, hand_sign_text):
475 | cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 22),
476 | (0, 0, 0), -1)
477 |
478 | info_text = handedness.classification[0].label[0:]
479 | if hand_sign_text != "":
480 | info_text = info_text + ':' + hand_sign_text
481 | cv.putText(image, info_text, (brect[0] + 5, brect[1] - 4),
482 | cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA)
483 |
484 | return image
485 |
486 |
487 | def draw_info(image, fps, margin_width, margin_height):
488 | image_width, image_height = image.shape[1], image.shape[0]
489 |
490 | area_x1 = int(image_width * margin_width)
491 | area_y1 = int(image_height * margin_height)
492 | area_x2 = image_width - area_x1
493 | area_y2 = image_height - area_y1
494 | cv.rectangle(image, (area_x1, area_y1), (area_x2, area_y2),
495 | (255, 255, 255), 2)
496 |
497 | cv.putText(image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX,
498 | 1.0, (0, 0, 0), 4, cv.LINE_AA)
499 | cv.putText(image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX,
500 | 1.0, (255, 255, 255), 2, cv.LINE_AA)
501 | return image
502 |
503 |
504 | if __name__ == '__main__':
505 | main()
506 |
--------------------------------------------------------------------------------