├── .gitignore ├── LICENSE ├── LICENSE.6DRepNet ├── LICENSE.SynergyNet ├── LICENSE.WHENet ├── README.md ├── convert_script.txt ├── demo_video.py ├── make_hardshrink_6drepnet.py ├── make_hardshrink_synergynet.py └── make_hardshrink_whenet.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.onnx 2 | .vscode/ 3 | *.mp4 4 | *.MP4 5 | *.mov 6 | *.MOV 7 | *.engine 8 | *.profile 9 | saved_model/ 10 | dmhead_mask_*/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Katsuya Hyodo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE.6DRepNet: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Thorsten Hempel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE.SynergyNet: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Cho Ying Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE.WHENet: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020. Huawei Technologies Co., Ltd. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DMHead 2 | Dual model head pose estimation. Fusion of SOTA models. 360° 6D HeadPose detection. All pre-processing and post-processing are fused together, allowing end-to-end processing in a single inference. 3 | 4 | ## 1. Summary 5 | ![icon_design drawio (14)](https://user-images.githubusercontent.com/33194443/175760025-b359e1d2-ac16-456e-8cf6-2c58514fbc7c.png) 6 | - **`[Front side]`** Wearing a mask mode - 6DRepNet (RepVGG-B1g2) 7 | 8 | - Paper 9 | 10 | ![image](https://user-images.githubusercontent.com/33194443/175760351-bd8d2e61-bb49-48f3-8023-c45c12cbd800.png) 11 | 12 | - Fine tune (My own training) 13 | ``` 14 | Yaw: 3.3193, Pitch: 4.9063, Roll: 3.3687, MAE: 3.8648 15 | ``` 16 | 17 | - **`[Front side]`** Not wearing a mask mode - SynergyNet (MobileNetV2) 18 | 19 | - Paper 20 | 21 | ![image](https://user-images.githubusercontent.com/33194443/174690800-272e5a06-c932-414f-8397-861d7d6284d0.png) 22 | 23 | - **`[Rear side]`** WHENet 24 | 25 | - Paper 26 | 27 | ![image](https://user-images.githubusercontent.com/33194443/175760218-4e61da30-71b6-4d2a-8ca4-ddc4c2ec5df0.png) 28 | 29 | ## 2. Inference Test 30 | 31 | ```bash 32 | wget https://github.com/PINTO0309/DMHead/releases/download/1.1.2/yolov4_headdetection_480x640_post.onnx 33 | wget https://github.com/PINTO0309/DMHead/releases/download/1.1.2/dmhead_mask_Nx3x224x224.onnx 34 | wget https://github.com/PINTO0309/DMHead/releases/download/1.1.2/dmhead_nomask_Nx3x224x224.onnx 35 | 36 | python demo_video.py 37 | ``` 38 | 39 | ```bash 40 | python demo_video.py \ 41 | [-h] \ 42 | [--device DEVICE] \ 43 | [--height_width HEIGHT_WIDTH] \ 44 | [--mask_or_nomask {mask,nomask}] 45 | 46 | optional arguments: 47 | -h, --help 48 | Show this help message and exit. 49 | 50 | --device DEVICE 51 | Path of the mp4 file or device number of the USB camera. 52 | Default: 0 53 | 54 | --height_width HEIGHT_WIDTH 55 | {H}x{W}. 56 | Default: 480x640 57 | 58 | --mask_or_nomask {mask,nomask} 59 | Select either a model that provides high accuracy when wearing a mask or 60 | a model that provides high accuracy when not wearing a mask. 61 | Default: mask 62 | ``` 63 | 64 | ## 3. Atmosphere 65 | - August 15, 2022 - MAE: 3.8648 66 | 67 | https://user-images.githubusercontent.com/33194443/184782685-52aa9fe3-d086-4104-8ea1-00c4a7418142.mp4 68 | 69 | https://user-images.githubusercontent.com/33194443/184784102-089a82b9-765a-4431-bf33-43370b5c8174.mp4 70 | 71 | ## 4. Benchmark 72 | - 6DRepNet 73 | - Official Paper FineTuned 74 | ``` 75 | Yaw: 3.6266, Pitch: 4.9066, Roll: 3.3734, MAE: 3.9688 76 | ``` 77 | - Trained on 300W-LP (Custom, Mask-wearing face image augmentation) 78 | - Test on AFLW2000 79 | - June 20, 2022 80 | ``` 81 | Yaw: 3.6129, Pitch: 5.5801, Roll: 3.8468, MAE: 4.3466 82 | ``` 83 | - July 3, 2022 `_epoch_321.pth` 84 | ``` 85 | Yaw: 3.3346, Pitch: 5.0004, Roll: 3.5381, MAE: 3.9577 86 | ``` 87 | - August 15, 2022 88 | ``` 89 | Yaw: 3.3193, Pitch: 4.9063, Roll: 3.3687, MAE: 3.8648 90 | ``` 91 | 92 | ## 5. Model Structure 93 | - INPUTS: `Float32 [N,3,224,224]` 94 | - OUTPUTS: `Float32 [N,3]`, `[Yaw,Roll,Pitch]` 95 | 96 |
Click to expand
97 | 98 | ![pinheadpose_1x3x224x224 onnx](https://user-images.githubusercontent.com/33194443/174504855-bf03e294-c9c9-477d-9faf-07b3d0393463.png) 99 | 100 |
101 | 102 | ## 6. References 103 | 1. https://github.com/choyingw/SynergyNet 104 | 2. https://github.com/thohemp/6DRepNet 105 | 3. https://github.com/Ascend-Research/HeadPoseEstimation-WHENet 106 | 4. https://github.com/PINTO0309/Face_Mask_Augmentation 107 | 5. https://github.com/PINTO0309/PINTO_model_zoo/tree/main/383_DirectMHP/post_process_gen_tools 108 | 6. https://github.com/PINTO0309/PINTO_model_zoo/tree/main/383_DirectMHP 109 | 110 | ## 7. Citation 111 | ``` 112 | @misc{https://doi.org/10.48550/arxiv.2005.10353, 113 | doi = {10.48550/ARXIV.2005.10353}, 114 | url = {https://arxiv.org/abs/2005.10353}, 115 | author = {Zhou, Yijun and Gregson, James}, 116 | title = {WHENet: Real-time Fine-Grained Estimation for Wide Range Head Pose}, 117 | publisher = {arXiv}, 118 | year = {2020}, 119 | } 120 | ``` 121 | ``` 122 | @misc{hempel20226d, 123 | title={6D Rotation Representation For Unconstrained Head Pose Estimation}, 124 | author={Thorsten Hempel and Ahmed A. Abdelrahman and Ayoub Al-Hamadi}, 125 | year={2022}, 126 | eprint={2202.12555}, 127 | archivePrefix={arXiv}, 128 | primaryClass={cs.CV} 129 | } 130 | ``` 131 | ``` 132 | @INPROCEEDINGS{wu2021synergy, 133 | author={Wu, Cho-Ying and Xu, Qiangeng and Neumann, Ulrich}, 134 | booktitle={2021 International Conference on 3D Vision (3DV)}, 135 | title={Synergy between 3DMM and 3D Landmarks for Accurate 3D Facial Geometry}, 136 | year={2021} 137 | } 138 | ``` 139 | -------------------------------------------------------------------------------- /convert_script.txt: -------------------------------------------------------------------------------- 1 | ACTIVATION_ANGLE=90 2 | 3 | ############ Add 4 | OP=Add 5 | LOWEROP=${OP,,} 6 | NUM_BATCHES=1 7 | OPSET=11 8 | sog4onnx \ 9 | --op_type ${OP} \ 10 | --opset ${OPSET} \ 11 | --op_name ${LOWEROP}${OPSET} \ 12 | --input_variables whenet_a float32 [${NUM_BATCHES},3] \ 13 | --input_variables 6drepnet_b float32 [${NUM_BATCHES},3] \ 14 | --output_variables yaw_roll_pitch float32 [${NUM_BATCHES},3] \ 15 | --output_onnx_file_path ${OP}${OPSET}_merged_add.onnx 16 | 17 | 18 | ############ WHENet shrink 19 | snc4onnx \ 20 | --input_onnx_file_paths whenet_1x3x224x224_prepost.onnx shrunk_whenet_${ACTIVATION_ANGLE}.onnx \ 21 | --srcop_destop yaw_roll_pitch shrunk_input \ 22 | --output_onnx_file_path whenet_prepost_disable_front_side_detection_1x3x224x224.onnx 23 | 24 | onnxsim whenet_prepost_disable_front_side_detection_1x3x224x224.onnx whenet_prepost_disable_front_side_detection_1x3x224x224.onnx 25 | 26 | ############ 6DRepNet shrink 27 | snc4onnx \ 28 | --input_onnx_file_paths sixdrepnet_300w_lp_maskplus_prepost_1x3x224x224.onnx shrunk_6drepnet_${ACTIVATION_ANGLE}.onnx \ 29 | --srcop_destop output_6drep_yaw_roll_pitch shrunk_input \ 30 | --output_onnx_file_path sixdrepnet_300w_lp_maskplus_prepost_disable_rear_side_detection_1x3x224x224.onnx 31 | 32 | onnxsim sixdrepnet_300w_lp_maskplus_prepost_disable_rear_side_detection_1x3x224x224.onnx sixdrepnet_300w_lp_maskplus_prepost_disable_rear_side_detection_1x3x224x224.onnx 33 | 34 | 35 | ############ WHENet + 6DRepNet 36 | snc4onnx \ 37 | --input_onnx_file_paths whenet_prepost_disable_front_side_detection_1x3x224x224.onnx sixdrepnet_300w_lp_maskplus_prepost_disable_rear_side_detection_1x3x224x224.onnx \ 38 | --srcop_destop input input \ 39 | --op_prefixes_after_merging whenet 6drepnet \ 40 | --output_onnx_file_path dmhead_merged.onnx 41 | 42 | ############ outputs rename 43 | sor4onnx \ 44 | --input_onnx_file_path dmhead_merged.onnx \ 45 | --old_new "whenet_whenet_" "whenet_" \ 46 | --mode outputs \ 47 | --output_onnx_file_path dmhead_merged.onnx 48 | 49 | sor4onnx \ 50 | --input_onnx_file_path dmhead_merged.onnx \ 51 | --old_new "6drepnet_6drepnet_" "6drepnet_" \ 52 | --mode outputs \ 53 | --output_onnx_file_path dmhead_merged.onnx 54 | 55 | ############ dmhead_merged + Add 56 | snc4onnx \ 57 | --input_onnx_file_paths dmhead_merged.onnx Add11_merged_add.onnx \ 58 | --srcop_destop whenet_shrunk_output whenet_a 6drepnet_shrunk_output 6drepnet_b \ 59 | --output_onnx_file_path dmhead_mask_1x3x224x224.onnx 60 | 61 | onnxsim dmhead_mask_1x3x224x224.onnx dmhead_mask_1x3x224x224.onnx 62 | 63 | 64 | 65 | 66 | 67 | N-Batch ########################################################################################################### 68 | 69 | ############ Add 70 | OP=Add 71 | LOWEROP=${OP,,} 72 | OPSET=11 73 | sog4onnx \ 74 | --op_type ${OP} \ 75 | --opset ${OPSET} \ 76 | --op_name ${LOWEROP}${OPSET} \ 77 | --input_variables whenet_a float32 [\'N\',3] \ 78 | --input_variables 6drepnet_b float32 [\'N\',3] \ 79 | --output_variables yaw_roll_pitch float32 [\'N\',3] \ 80 | --output_onnx_file_path ${OP}${OPSET}_merged_add_N.onnx 81 | 82 | ############ WHENet shrink 83 | snc4onnx \ 84 | --input_onnx_file_paths whenet_Nx3x224x224_prepost.onnx shrunk_whenet_N_${ACTIVATION_ANGLE}.onnx \ 85 | --srcop_destop yaw_roll_pitch shrunk_input \ 86 | --output_onnx_file_path whenet_prepost_disable_front_side_detection_Nx3x224x224.onnx 87 | 88 | 89 | ############ 6DRepNet shrink 90 | snc4onnx \ 91 | --input_onnx_file_paths sixdrepnet_300w_lp_maskplus_prepost_Nx3x224x224.onnx shrunk_6drepnet_N_${ACTIVATION_ANGLE}.onnx \ 92 | --srcop_destop output_6drep_yaw_roll_pitch shrunk_input \ 93 | --output_onnx_file_path sixdrepnet_300w_lp_maskplus_prepost_disable_rear_side_detection_Nx3x224x224.onnx 94 | 95 | 96 | 97 | ############ WHENet + 6DRepNet 98 | snc4onnx \ 99 | --input_onnx_file_paths whenet_prepost_disable_front_side_detection_Nx3x224x224.onnx sixdrepnet_300w_lp_maskplus_prepost_disable_rear_side_detection_Nx3x224x224.onnx \ 100 | --srcop_destop input input \ 101 | --op_prefixes_after_merging whenet 6drepnet \ 102 | --output_onnx_file_path dmhead_merged_N.onnx 103 | 104 | ############ outputs rename 105 | sor4onnx \ 106 | --input_onnx_file_path dmhead_merged_N.onnx \ 107 | --old_new "whenet_whenet_" "whenet_" \ 108 | --mode outputs \ 109 | --output_onnx_file_path dmhead_merged_N.onnx 110 | 111 | sor4onnx \ 112 | --input_onnx_file_path dmhead_merged_N.onnx \ 113 | --old_new "6drepnet_6drepnet_" "6drepnet_" \ 114 | --mode outputs \ 115 | --output_onnx_file_path dmhead_merged_N.onnx 116 | 117 | ############ dmhead_merged + Add 118 | snc4onnx \ 119 | --input_onnx_file_paths dmhead_merged_N.onnx Add11_merged_add_N.onnx \ 120 | --srcop_destop whenet_shrunk_output whenet_a 6drepnet_shrunk_output 6drepnet_b \ 121 | --output_onnx_file_path dmhead_mask_Nx3x224x224.onnx 122 | 123 | onnxsim dmhead_mask_Nx3x224x224.onnx dmhead_mask_Nx3x224x224.onnx 124 | 125 | 126 | 127 | 128 | 129 | ############ Add 130 | OP=Add 131 | LOWEROP=${OP,,} 132 | OPSET=11 133 | sog4onnx \ 134 | --op_type ${OP} \ 135 | --opset ${OPSET} \ 136 | --op_name ${LOWEROP}${OPSET} \ 137 | --input_variables whenet_a float32 [\'N\',3] \ 138 | --input_variables synergynet_b float32 [\'N\',3] \ 139 | --output_variables yaw_roll_pitch float32 [\'N\',3] \ 140 | --output_onnx_file_path ${OP}${OPSET}_merged_add_N.onnx 141 | 142 | ############ WHENet shrink 143 | snc4onnx \ 144 | --input_onnx_file_paths whenet_Nx3x224x224_prepost.onnx shrunk_whenet_N_${ACTIVATION_ANGLE}.onnx \ 145 | --srcop_destop yaw_roll_pitch shrunk_input \ 146 | --output_onnx_file_path whenet_prepost_disable_front_side_detection_Nx3x224x224.onnx 147 | 148 | sor4onnx \ 149 | --input_onnx_file_path synergynet_1x3x224x224.onnx \ 150 | --old_new "yaw_roll_pitch" "output_synergy_yaw_roll_pitch" \ 151 | --output_onnx_file_path synergynet_1x3x224x224.onnx \ 152 | --mode outputs 153 | 154 | sor4onnx \ 155 | --input_onnx_file_path synergynet_Nx3x224x224.onnx \ 156 | --old_new "yaw_roll_pitch" "output_synergy_yaw_roll_pitch" \ 157 | --output_onnx_file_path synergynet_Nx3x224x224.onnx \ 158 | --mode outputs 159 | 160 | 161 | sbi4onnx \ 162 | --input_onnx_file_path shrunk_whenet.onnx \ 163 | --output_onnx_file_path shrunk_whenet_N.onnx \ 164 | --initialization_character_string N 165 | 166 | sbi4onnx \ 167 | --input_onnx_file_path shrunk_synergynet.onnx \ 168 | --output_onnx_file_path shrunk_synergynet_N.onnx \ 169 | --initialization_character_string N 170 | 171 | 172 | 173 | 174 | 175 | ############ SynergyNet shrink 176 | snc4onnx \ 177 | --input_onnx_file_paths synergynet_Nx3x224x224.onnx shrunk_synergynet_N_${ACTIVATION_ANGLE}.onnx \ 178 | --srcop_destop output_synergy_yaw_roll_pitch shrunk_input \ 179 | --output_onnx_file_path synergynet_prepost_disable_rear_side_detection_Nx3x224x224.onnx 180 | 181 | 182 | ############ WHENet + SynergyNet 183 | snc4onnx \ 184 | --input_onnx_file_paths whenet_prepost_disable_front_side_detection_Nx3x224x224.onnx synergynet_prepost_disable_rear_side_detection_Nx3x224x224.onnx \ 185 | --srcop_destop input input \ 186 | --op_prefixes_after_merging whenet synergynet \ 187 | --output_onnx_file_path dmhead_merged_N.onnx 188 | 189 | 190 | sor4onnx \ 191 | --input_onnx_file_path dmhead_merged_N.onnx \ 192 | --old_new "whenet_whenet_shrunk_output" "whenet_shrunk_output" \ 193 | --output_onnx_file_path dmhead_merged_N.onnx \ 194 | --mode outputs 195 | 196 | sor4onnx \ 197 | --input_onnx_file_path dmhead_merged_N.onnx \ 198 | --old_new "synergynet_synergynet_shrunk_output" "synergynet_shrunk_output" \ 199 | --output_onnx_file_path dmhead_merged_N.onnx \ 200 | --mode outputs 201 | 202 | 203 | ############ dmhead_merged + Add 204 | snc4onnx \ 205 | --input_onnx_file_paths dmhead_merged_N.onnx Add11_merged_add_N.onnx \ 206 | --srcop_destop whenet_shrunk_output whenet_a synergynet_shrunk_output synergynet_b \ 207 | --output_onnx_file_path dmhead_nomask_Nx3x224x224.onnx 208 | -------------------------------------------------------------------------------- /demo_video.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | import time 4 | import math 5 | import copy 6 | import argparse 7 | import onnxruntime 8 | import numpy as np 9 | from math import cos, sin 10 | from typing import Tuple, Optional, List 11 | 12 | 13 | class YOLOv7ONNX(object): 14 | def __init__( 15 | self, 16 | model_path: Optional[str] = 'yolov7_tiny_head_0.768_post_480x640.onnx', 17 | class_score_th: Optional[float] = 0.20, 18 | providers: Optional[List] = [ 19 | ( 20 | 'TensorrtExecutionProvider', { 21 | 'trt_engine_cache_enable': True, 22 | 'trt_engine_cache_path': '.', 23 | 'trt_fp16_enable': True, 24 | } 25 | ), 26 | 'CUDAExecutionProvider', 27 | 'CPUExecutionProvider', 28 | ], 29 | ): 30 | """YOLOv7ONNX 31 | 32 | Parameters 33 | ---------- 34 | model_path: Optional[str] 35 | ONNX file path for YOLOv7 36 | class_score_th: Optional[float] 37 | Score threshold. Default: 0.20 38 | providers: Optional[List] 39 | Name of onnx execution providers 40 | Default: 41 | [ 42 | ( 43 | 'TensorrtExecutionProvider', { 44 | 'trt_engine_cache_enable': True, 45 | 'trt_engine_cache_path': '.', 46 | 'trt_fp16_enable': True, 47 | } 48 | ), 49 | 'CUDAExecutionProvider', 50 | 'CPUExecutionProvider', 51 | ] 52 | """ 53 | # Threshold 54 | self.class_score_th = class_score_th 55 | 56 | # Model loading 57 | session_option = onnxruntime.SessionOptions() 58 | session_option.log_severity_level = 3 59 | self.onnx_session = onnxruntime.InferenceSession( 60 | model_path, 61 | sess_options=session_option, 62 | providers=providers, 63 | ) 64 | self.providers = self.onnx_session.get_providers() 65 | 66 | self.input_shapes = [ 67 | input.shape for input in self.onnx_session.get_inputs() 68 | ] 69 | self.input_names = [ 70 | input.name for input in self.onnx_session.get_inputs() 71 | ] 72 | self.output_names = [ 73 | output.name for output in self.onnx_session.get_outputs() 74 | ] 75 | 76 | 77 | def __call__( 78 | self, 79 | image: np.ndarray, 80 | ) -> Tuple[np.ndarray, np.ndarray]: 81 | """YOLOv7ONNX 82 | 83 | Parameters 84 | ---------- 85 | image: np.ndarray 86 | Entire image 87 | 88 | Returns 89 | ------- 90 | face_boxes: np.ndarray 91 | Predicted face boxes: [facecount, y1, x1, y2, x2] 92 | face_scores: np.ndarray 93 | Predicted face box scores: [facecount, score] 94 | """ 95 | temp_image = copy.deepcopy(image) 96 | 97 | # PreProcess 98 | resized_image = self.__preprocess( 99 | temp_image, 100 | ) 101 | 102 | # Inference 103 | inferece_image = np.asarray([resized_image], dtype=np.float32) 104 | scores, boxes = self.onnx_session.run( 105 | self.output_names, 106 | {input_name: inferece_image for input_name in self.input_names}, 107 | ) 108 | 109 | # PostProcess 110 | face_boxes, face_scores = self.__postprocess( 111 | image=temp_image, 112 | scores=scores, 113 | boxes=boxes, 114 | ) 115 | 116 | return face_boxes, face_scores 117 | 118 | 119 | def __preprocess( 120 | self, 121 | image: np.ndarray, 122 | swap: Optional[Tuple[int,int,int]] = (2, 0, 1), 123 | ) -> np.ndarray: 124 | """__preprocess 125 | 126 | Parameters 127 | ---------- 128 | image: np.ndarray 129 | Entire image 130 | swap: tuple 131 | HWC to CHW: (2,0,1) 132 | CHW to HWC: (1,2,0) 133 | HWC to HWC: (0,1,2) 134 | CHW to CHW: (0,1,2) 135 | 136 | Returns 137 | ------- 138 | resized_image: np.ndarray 139 | Resized and normalized image. 140 | """ 141 | # Normalization + BGR->RGB 142 | resized_image = cv2.resize( 143 | image, 144 | ( 145 | int(self.input_shapes[0][3]), 146 | int(self.input_shapes[0][2]), 147 | ) 148 | ) 149 | resized_image = np.divide(resized_image, 255.0) 150 | resized_image = resized_image[..., ::-1] 151 | resized_image = resized_image.transpose(swap) 152 | resized_image = np.ascontiguousarray( 153 | resized_image, 154 | dtype=np.float32, 155 | ) 156 | return resized_image 157 | 158 | 159 | def __postprocess( 160 | self, 161 | image: np.ndarray, 162 | scores: np.ndarray, 163 | boxes: np.ndarray, 164 | ) -> Tuple[np.ndarray, np.ndarray]: 165 | """__postprocess 166 | 167 | Parameters 168 | ---------- 169 | image: np.ndarray 170 | Entire image. 171 | scores: np.ndarray 172 | float32[N, 1] 173 | boxes: np.ndarray 174 | int64[N, 6] 175 | 176 | Returns 177 | ------- 178 | faceboxes: np.ndarray 179 | Predicted face boxes: [facecount, y1, x1, y2, x2] 180 | facescores: np.ndarray 181 | Predicted face box confs: [facecount, score] 182 | """ 183 | image_height = image.shape[0] 184 | image_width = image.shape[1] 185 | 186 | """ 187 | Head Detector is 188 | N -> Number of boxes detected 189 | batchno -> always 0: BatchNo.0 190 | classid -> always 0: "Head" 191 | scores: float32[N,1], 192 | batchno_classid_y1x1y2x2: int64[N,6], 193 | """ 194 | scores = scores 195 | keep_idxs = scores[:, 0] > self.class_score_th 196 | scores_keep = scores[keep_idxs, :] 197 | boxes_keep = boxes[keep_idxs, :] 198 | faceboxes = [] 199 | facescores = [] 200 | 201 | if len(boxes_keep) > 0: 202 | for box, score in zip(boxes_keep, scores_keep): 203 | x_min = max(int(box[3]), 0) 204 | y_min = max(int(box[2]), 0) 205 | x_max = min(int(box[5]), image_width) 206 | y_max = min(int(box[4]), image_height) 207 | 208 | faceboxes.append( 209 | [x_min, y_min, x_max, y_max] 210 | ) 211 | facescores.append( 212 | score 213 | ) 214 | 215 | return np.asarray(faceboxes), np.asarray(facescores) 216 | 217 | 218 | 219 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size=100): 220 | # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose 221 | if math.isnan(yaw) or math.isnan(pitch) or math.isnan(roll): 222 | return img 223 | pitch = pitch * np.pi / 180 224 | yaw = -(yaw * np.pi / 180) 225 | roll = roll * np.pi / 180 226 | if tdx != None and tdy != None: 227 | tdx = tdx 228 | tdy = tdy 229 | else: 230 | height, width = img.shape[:2] 231 | tdx = width / 2 232 | tdy = height / 2 233 | # X-Axis pointing to right. drawn in red 234 | x1 = size * (cos(yaw) * cos(roll)) + tdx 235 | y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy 236 | # Y-Axis | drawn in green 237 | # v 238 | x2 = size * (-cos(yaw) * sin(roll)) + tdx 239 | y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy 240 | # Z-Axis (out of the screen) drawn in blue 241 | x3 = size * (sin(yaw)) + tdx 242 | y3 = size * (-cos(yaw) * sin(pitch)) + tdy 243 | cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2) 244 | cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2) 245 | cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) 246 | return img 247 | 248 | 249 | def main(args): 250 | # YOLOv7_tiny_Head 251 | yolov7_head = YOLOv7ONNX( 252 | class_score_th=0.20, 253 | ) 254 | 255 | # DMHead 256 | model_file_path = '' 257 | dmhead_input_name = None 258 | mask_or_nomask = args.mask_or_nomask 259 | 260 | if mask_or_nomask == 'mask': 261 | model_file_path = 'dmhead_mask_Nx3x224x224.onnx' 262 | elif mask_or_nomask == 'nomask': 263 | model_file_path = 'dmhead_nomask_Nx3x224x224.onnx' 264 | 265 | dmhead = onnxruntime.InferenceSession( 266 | path_or_bytes=model_file_path, 267 | providers=[ 268 | ( 269 | 'TensorrtExecutionProvider', { 270 | 'trt_engine_cache_enable': True, 271 | 'trt_engine_cache_path': '.', 272 | 'trt_fp16_enable': True, 273 | } 274 | ), 275 | 'CUDAExecutionProvider', 276 | 'CPUExecutionProvider', 277 | ] 278 | ) 279 | dmhead_input_name = dmhead.get_inputs()[0].name 280 | dmhead_H = dmhead.get_inputs()[0].shape[2] 281 | dmhead_W = dmhead.get_inputs()[0].shape[3] 282 | 283 | cap_width = int(args.height_width.split('x')[1]) 284 | cap_height = int(args.height_width.split('x')[0]) 285 | if args.device.isdecimal(): 286 | cap = cv2.VideoCapture(int(args.device)) 287 | else: 288 | cap = cv2.VideoCapture(args.device) 289 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, cap_width) 290 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, cap_height) 291 | WINDOWS_NAME = 'Demo' 292 | cv2.namedWindow(WINDOWS_NAME, cv2.WINDOW_NORMAL) 293 | cv2.resizeWindow(WINDOWS_NAME, cap_width, cap_height) 294 | 295 | cap_fps = cap.get(cv2.CAP_PROP_FPS) 296 | w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 297 | h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 298 | fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') 299 | video_writer = cv2.VideoWriter( 300 | filename='output.mp4', 301 | fourcc=fourcc, 302 | fps=cap_fps, 303 | frameSize=(w, h), 304 | ) 305 | 306 | while True: 307 | ret, frame = cap.read() 308 | if not ret: 309 | break 310 | 311 | start = time.time() 312 | 313 | # ============================================================= YOLOv7_tiny_Head 314 | heads, head_scores = yolov7_head(frame) 315 | 316 | canvas = copy.deepcopy(frame) 317 | # ============================================================= DMHead 318 | croped_resized_frame = None 319 | 320 | if len(heads) > 0: 321 | dmhead_inputs = [] 322 | dmhead_position = [] 323 | 324 | for head in heads: 325 | x_min = int(head[0]) 326 | y_min = int(head[1]) 327 | x_max = int(head[2]) 328 | y_max = int(head[3]) 329 | 330 | # enlarge the bbox to include more background margin 331 | y_min = max(0, y_min - abs(y_min - y_max) / 10) 332 | y_max = min(frame.shape[0], y_max + abs(y_min - y_max) / 10) 333 | x_min = max(0, x_min - abs(x_min - x_max) / 5) 334 | x_max = min(frame.shape[1], x_max + abs(x_min - x_max) / 5) 335 | x_max = min(x_max, frame.shape[1]) 336 | croped_frame = frame[int(y_min):int(y_max), int(x_min):int(x_max)] 337 | 338 | # h,w -> 224,224 339 | croped_resized_frame = cv2.resize(croped_frame, (dmhead_W, dmhead_H)) 340 | # bgr --> rgb 341 | rgb = croped_resized_frame[..., ::-1] 342 | # hwc --> chw 343 | chw = rgb.transpose(2, 0, 1) 344 | dmhead_inputs.append(chw) 345 | dmhead_position.append([x_min,y_min,x_max,y_max]) 346 | # chw --> nchw 347 | nchw = np.asarray(dmhead_inputs, dtype=np.float32) 348 | positions = np.asarray(dmhead_position, dtype=np.int32) 349 | 350 | yaw = 0.0 351 | pitch = 0.0 352 | roll = 0.0 353 | # Inference DMHead 354 | outputs = dmhead.run( 355 | None, 356 | input_feed = {dmhead_input_name: nchw} 357 | )[0] 358 | 359 | for (yaw, roll, pitch), position in zip(outputs, positions): 360 | yaw, pitch, roll = np.squeeze([yaw, pitch, roll]) 361 | print(f'yaw: {yaw}, pitch: {pitch}, roll: {roll}') 362 | 363 | x_min,y_min,x_max,y_max = position 364 | 365 | # BBox draw 366 | deg_norm = 1.0 - abs(yaw / 180) 367 | blue = int(255 * deg_norm) 368 | cv2.rectangle( 369 | canvas, 370 | (int(x_min), int(y_min)), 371 | (int(x_max), int(y_max)), 372 | color=(blue, 0, 255-blue), 373 | thickness=2 374 | ) 375 | 376 | # Draw 377 | draw_axis( 378 | canvas, 379 | yaw, 380 | pitch, 381 | roll, 382 | tdx=(x_min+x_max)/2, 383 | tdy=(y_min+y_max)/2, 384 | size=abs(x_max-x_min)//2 385 | ) 386 | cv2.putText( 387 | canvas, 388 | f'yaw: {np.round(yaw)}', 389 | (int(x_min), int(y_min)), 390 | cv2.FONT_HERSHEY_SIMPLEX, 391 | 0.4, 392 | (100, 255, 0), 393 | 1 394 | ) 395 | cv2.putText( 396 | canvas, 397 | f'pitch: {np.round(pitch)}', 398 | (int(x_min), int(y_min) - 15), 399 | cv2.FONT_HERSHEY_SIMPLEX, 400 | 0.4, 401 | (100, 255, 0), 402 | 1 403 | ) 404 | cv2.putText( 405 | canvas, 406 | f'roll: {np.round(roll)}', 407 | (int(x_min), int(y_min)-30), 408 | cv2.FONT_HERSHEY_SIMPLEX, 409 | 0.4, 410 | (100, 255, 0), 411 | 1 412 | ) 413 | 414 | time_txt = f'{(time.time()-start)*1000:.2f} ms (inference+post-process)' 415 | cv2.putText( 416 | canvas, 417 | time_txt, 418 | (20, 35), 419 | cv2.FONT_HERSHEY_SIMPLEX, 420 | 0.8, 421 | (255, 255, 255), 422 | 2, 423 | cv2.LINE_AA, 424 | ) 425 | cv2.putText( 426 | canvas, 427 | time_txt, 428 | (20, 35), 429 | cv2.FONT_HERSHEY_SIMPLEX, 430 | 0.8, 431 | (0, 255, 0), 432 | 1, 433 | cv2.LINE_AA, 434 | ) 435 | 436 | key = cv2.waitKey(1) 437 | if key == 27: # ESC 438 | break 439 | 440 | cv2.imshow(WINDOWS_NAME, canvas) 441 | video_writer.write(canvas) 442 | 443 | cv2.destroyAllWindows() 444 | 445 | if video_writer: 446 | video_writer.release() 447 | 448 | if cap: 449 | cap.release() 450 | 451 | if __name__ == "__main__": 452 | parser = argparse.ArgumentParser() 453 | parser.add_argument( 454 | '--device', 455 | type=str, 456 | default='0', 457 | help='Path of the mp4 file or device number of the USB camera. Default: 0', 458 | ) 459 | parser.add_argument( 460 | '--height_width', 461 | type=str, 462 | default='480x640', 463 | help='{H}x{W}. Default: 480x640', 464 | ) 465 | parser.add_argument( 466 | '--mask_or_nomask', 467 | type=str, 468 | default='mask', 469 | choices=[ 470 | 'mask', 471 | 'nomask', 472 | ], 473 | help='\ 474 | Select either a model that provides high accuracy when wearing \ 475 | a mask or a model that provides high accuracy when not wearing a mask.', 476 | ) 477 | args = parser.parse_args() 478 | main(args) 479 | -------------------------------------------------------------------------------- /make_hardshrink_6drepnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | class Model(nn.Module): 6 | def __init__(self): 7 | super(Model, self).__init__() 8 | 9 | def forward(self, x): 10 | x = x[..., np.newaxis] 11 | yaw = x[:,0,:] 12 | roll = x[:,1,:] 13 | pitch = x[:,2,:] 14 | shrunk_yaw = torch.clip( 15 | yaw, 16 | min=torch.tensor(-90.0, dtype=torch.float32), 17 | max=torch.tensor(90.0, dtype=torch.float32), 18 | ) 19 | eps = 1e-5 20 | shrunk_roll = (roll * shrunk_yaw) / (shrunk_yaw + eps) 21 | shrunk_pitch = (pitch * shrunk_yaw) / (shrunk_yaw + eps) 22 | output = torch.cat([shrunk_yaw,shrunk_roll,shrunk_pitch], dim=1) 23 | return output 24 | 25 | if __name__ == "__main__": 26 | model = Model() 27 | 28 | import onnx 29 | from onnxsim import simplify 30 | MODEL = f'shrunk_6drepnet' 31 | onnx_file = f"{MODEL}.onnx" 32 | 33 | x = torch.randn(1, 3) 34 | 35 | torch.onnx.export( 36 | model, 37 | args=(x), 38 | f=onnx_file, 39 | opset_version=11, 40 | input_names = ['shrunk_input'], 41 | output_names=['6drepnet_shrunk_output'], 42 | ) 43 | model_onnx1 = onnx.load(onnx_file) 44 | model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 45 | onnx.save(model_onnx1, onnx_file) 46 | 47 | model_onnx2 = onnx.load(onnx_file) 48 | model_simp, check = simplify(model_onnx2) 49 | onnx.save(model_simp, onnx_file) 50 | 51 | import sys 52 | sys.exit(0) -------------------------------------------------------------------------------- /make_hardshrink_synergynet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | class Model(nn.Module): 6 | def __init__(self): 7 | super(Model, self).__init__() 8 | 9 | def forward(self, x): 10 | x = x[..., np.newaxis] 11 | yaw = x[:,0,:] 12 | roll = x[:,1,:] 13 | pitch = x[:,2,:] 14 | shrunk_yaw = torch.clip( 15 | yaw, 16 | min=torch.tensor(-90.0, dtype=torch.float32), 17 | max=torch.tensor(90.0, dtype=torch.float32), 18 | ) 19 | eps = 1e-5 20 | shrunk_roll = (roll * shrunk_yaw) / (shrunk_yaw + eps) 21 | shrunk_pitch = (pitch * shrunk_yaw) / (shrunk_yaw + eps) 22 | output = torch.cat([shrunk_yaw,shrunk_roll,shrunk_pitch], dim=1) 23 | return output 24 | 25 | if __name__ == "__main__": 26 | model = Model() 27 | 28 | import onnx 29 | from onnxsim import simplify 30 | MODEL = f'shrunk_synergynet' 31 | onnx_file = f"{MODEL}.onnx" 32 | 33 | x = torch.randn(1, 3) 34 | 35 | torch.onnx.export( 36 | model, 37 | args=(x), 38 | f=onnx_file, 39 | opset_version=11, 40 | input_names = ['shrunk_input'], 41 | output_names=['synergynet_shrunk_output'], 42 | ) 43 | model_onnx1 = onnx.load(onnx_file) 44 | model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 45 | onnx.save(model_onnx1, onnx_file) 46 | 47 | model_onnx2 = onnx.load(onnx_file) 48 | model_simp, check = simplify(model_onnx2) 49 | onnx.save(model_simp, onnx_file) 50 | 51 | import sys 52 | sys.exit(0) -------------------------------------------------------------------------------- /make_hardshrink_whenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | class Model(nn.Module): 6 | def __init__(self): 7 | super(Model, self).__init__() 8 | 9 | def forward(self, x): 10 | shrink = nn.Hardshrink(lambd=90) 11 | x = x[..., np.newaxis] 12 | yaw = x[:,0,:] 13 | roll = x[:,1,:] 14 | pitch = x[:,2,:] 15 | shrunk_yaw = shrink(yaw) 16 | eps = 1e-5 17 | shrunk_roll = (roll * shrunk_yaw) / (shrunk_yaw + eps) 18 | shrunk_pitch = (pitch * shrunk_yaw) / (shrunk_yaw + eps) 19 | output = torch.cat([shrunk_yaw,shrunk_roll,shrunk_pitch], dim=1) 20 | return output 21 | 22 | if __name__ == "__main__": 23 | model = Model() 24 | 25 | import onnx 26 | from onnxsim import simplify 27 | MODEL = f'shrunk_whenet' 28 | onnx_file = f"{MODEL}.onnx" 29 | 30 | x = torch.randn(1, 3) 31 | 32 | torch.onnx.export( 33 | model, 34 | args=(x), 35 | f=onnx_file, 36 | opset_version=11, 37 | input_names = ['shrunk_input'], 38 | output_names=['whenet_shrunk_output'], 39 | ) 40 | model_onnx1 = onnx.load(onnx_file) 41 | model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 42 | onnx.save(model_onnx1, onnx_file) 43 | 44 | model_onnx2 = onnx.load(onnx_file) 45 | model_simp, check = simplify(model_onnx2) 46 | onnx.save(model_simp, onnx_file) 47 | 48 | import sys 49 | sys.exit(0) --------------------------------------------------------------------------------