├── .gitignore ├── CLA.md ├── LICENSE.md ├── README.md ├── checkpoint ├── A2J │ └── README.md ├── CenterNet │ └── README.md └── SSD │ └── README.md ├── model ├── A2J │ ├── a2j.py │ ├── a2j_utilities │ │ ├── a2j_branchs.py │ │ ├── a2j_utils.py │ │ └── post_processing.py │ ├── back_bone │ │ ├── mobilenet.py │ │ └── resnet.py │ └── model.py ├── CenterNet │ └── centernet.py └── run_model.py ├── pipeline ├── azure_kinect.py ├── constants.py ├── model_setup.py └── utils.py └── readme_files └── realtime_inference.gif /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore pytorch saved models 2 | *.pth 3 | *.trt 4 | 5 | # ignore python cache fils 6 | *__pycache__* 7 | 8 | # ingnore vscode config 9 | *.vscode 10 | 11 | -------------------------------------------------------------------------------- /CLA.md: -------------------------------------------------------------------------------- 1 | ## Individual Contributor License Agreement (CLA) 2 | 3 | **Thank you for submitting your contributions to this project.** 4 | 5 | By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions 6 | to the project. 7 | 8 | ### License. 9 | 10 | You hereby represent that all present, past and future contributions are governed by the 11 | [MIT License](https://opensource.org/licenses/MIT) 12 | copyright statement. 13 | 14 | This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights 15 | of the code or documents you contribute to the project itself or its maintainers. 16 | Furthermore you also represent that you have the authority to perform the above waiver 17 | with respect to the entirety of you contributions. 18 | 19 | ### Moral Rights. 20 | 21 | To the fullest extent permitted under applicable law, you hereby waive, and agree not to 22 | assert, all of your “moral rights” in or relating to your contributions for the benefit of the project. 23 | 24 | ### Third Party Content. 25 | 26 | If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, 27 | specifications, documentation, data, materials, feedback, information or other works of authorship that were not 28 | authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary 29 | rights associated with your Contribution (“Third Party Rights”), 30 | then you agree to include with the submission of your Contribution full details respecting such Third Party 31 | Content and Third Party Rights, including, without limitation, identification of which aspects of your 32 | Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the 33 | Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable 34 | third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater 35 | certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights 36 | do not apply to any portion of a Project that is incorporated into your Contribution to that same Project. 37 | 38 | ### Representations. 39 | 40 | You represent that, other than the Third Party Content and Third Party Rights identified by 41 | you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled 42 | to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were 43 | created in the course of your employment with your past or present employer(s), you represent that such 44 | employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer 45 | (s) has waived all of their right, title or interest in or to your Contributions. 46 | 47 | ### Disclaimer. 48 | 49 | To the fullest extent permitted under applicable law, your Contributions are provided on an "as is" 50 | basis, without any warranties or conditions, express or implied, including, without limitation, any implied 51 | warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not 52 | required to provide support for your Contributions, except to the extent you desire to provide support. 53 | 54 | ### No Obligation. 55 | 56 | You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions 57 | into the project. The decision to use or incorporate your contributions into the project will be made at the 58 | sole discretion of the maintainers or their authorized delegates. 59 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Boshen Zhang
2 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 3D_HandPose 2 | 3 | This repository implements a realtime 3D hand posture estimation pipeline running on Jetson platform using a [**Azure Kinect** camera](https://azure.microsoft.com/en-us/services/kinect-dk/).
4 | Please refer to the following repositories before getting started here: 5 | - [centernet_kinet](https://github.com/NVIDIA-AI-IOT/centernet_kinect) 6 | - [Hand Posture Estimation](https://github.com/NVIDIA-AI-IOT/a2j_handpose_3d) 7 | 8 |

9 | landing graphic 10 |

11 | 12 | 13 | There are 2 stages to our pipeline 14 | 15 | * ## [CenterNet Bounding Box](#centernet_bounding_box) 16 | * ## [A2J Posture Detection](#a2j_posture_detection) 17 | * ## [Run inference](#run_infrence) 18 | 19 | 20 | ## CenterNet Bounding Box 21 | 22 | The first stage will localize the hand using a fusion of infrared and depth image.
23 | **NOTE:** more detail can be found in the centernet_kinect repository 24 | 25 | 26 | ## A2J Posture Detection 27 | 28 | The second stage would perform 3D hand posture estimation on the region of intrest selected by the previous step.
29 | **NOTE:** for training a model please refer to the Hand Posture Estimation repository 30 | 31 | 32 | ## Run inference 33 | 34 | - Initially configure the *pipeline/constants.py* file: 35 | - **CENTERNET_MODEL_PATH** please place the centernet model weights in *"/checkpoint/CenterNet"*
36 | with the naming convention that was provided in the original repository 37 | - Configure the centernet portion of the file as its been described in the original [repository](https://github.com/NVIDIA-AI-IOT/centernet_kinect#get_pre_trained_weights).
38 | if you are using the weights directly from the original repository you dont have to modify this section. 39 | - **A2J_MODEL_PATH** please place the A2J model weights in *"/checkpoint/A2J"*
40 | with the naming convention that was provided in the original repository 41 | - Configure the a2j portion of the file as you have set up the training pipeline for [Hand Posture Estimation](https://github.com/NVIDIA-AI-IOT/a2j_handpose_3d).
42 | - For Faster inference we use TensorRT inference engine to optimize the models. this will take some time to compile the models and create a TRT engine
43 | - Run realtime inference on a jetson platform. 44 | ```bash 45 | cd pipeline 46 | python3 azure_kinect.py 47 | 48 | # Optional for faster inference 49 | python3 azure_kinect.py --trt True # for optimizing the models with TensorRT fp16 50 | ``` 51 | -------------------------------------------------------------------------------- /checkpoint/A2J/README.md: -------------------------------------------------------------------------------- 1 | # A2J checkpoint directory 2 | -------------------------------------------------------------------------------- /checkpoint/CenterNet/README.md: -------------------------------------------------------------------------------- 1 | # CenterNet checkpoint directory 2 | -------------------------------------------------------------------------------- /checkpoint/SSD/README.md: -------------------------------------------------------------------------------- 1 | # SSD checkpoint directory 2 | -------------------------------------------------------------------------------- /model/A2J/a2j.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import torch 12 | import torch.nn as nn 13 | 14 | # PROJ ROOT DIR 15 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # A2J 16 | MODEL_PATH = os.path.join(DIR_PATH, os.path.pardir) # Model 17 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root 18 | 19 | sys.path.append(ROOT_PATH) 20 | 21 | # Import Project Library 22 | from model.A2J.back_bone.resnet import ResnetBackbone 23 | from model.A2J.back_bone.mobilenet import MobileNet 24 | from model.A2J.a2j_utilities.a2j_branchs import DepthRegression, OffsetRegression, JointClassification 25 | 26 | A2J_BACKBONE_CONFIG = { 27 | "resnet18": {"backbone": ResnetBackbone, "common_trunk": 256, "Regression_trunk": 512}, 28 | "resnet34": {"backbone": ResnetBackbone, "common_trunk": 256, "Regression_trunk": 512}, 29 | "resnet50": {"backbone": ResnetBackbone, "common_trunk": 1024, "Regression_trunk": 2048}, 30 | "resnet101": {"backbone": ResnetBackbone, "common_trunk": 1024, "Regression_trunk": 2048}, 31 | "resnet152": {"backbone": ResnetBackbone, "common_trunk": 1024, "Regression_trunk": 2048}, 32 | "mobilenet": {"backbone": MobileNet, "common_trunk": 512, "Regression_trunk": 1024}, 33 | } 34 | 35 | class BacknoneNetwork(nn.Module): 36 | """ 37 | Backbone Network Base Class" 38 | """ 39 | def __init__(self, backbone_name="resnet18", backbone_pretrained=True): 40 | """ 41 | Class constructor 42 | 43 | :param backbone_name: the name of the backbone network 44 | :param backbone_pretrained: load a pretrained backbone network 45 | """ 46 | super(BacknoneNetwork, self).__init__() 47 | self.model = A2J_BACKBONE_CONFIG[backbone_name]["backbone"](backbone_name="resnet18", backbone_pretrained=True) 48 | 49 | def forward(self, x): 50 | x1, x2 = self.model(x) 51 | return x1, x2 52 | 53 | class A2J(nn.Module): 54 | """ 55 | A2J model class 56 | """ 57 | def __init__(self, num_joints=18, backbone_name="resnet18", backbone_pretrained=True): 58 | """ 59 | Class constructor 60 | 61 | :param num_joints: number of joints to predict 62 | :param backbone_name: the name of the backbone network 63 | :param backbone_pretrained: load a pretrained backbone network 64 | """ 65 | super(A2J, self).__init__() 66 | Backbone_Model = A2J_BACKBONE_CONFIG[backbone_name]["backbone"] 67 | 68 | self.back_bone = Backbone_Model(name=backbone_name, pretrained=backbone_pretrained) 69 | 70 | self.offset_regression = OffsetRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints) 71 | self.depth_regression = DepthRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints) 72 | self.joint_classification = JointClassification(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["common_trunk"], num_joints=num_joints) 73 | 74 | # self.Backbone = Backbone_Model(name=backbone_name, pretrained=backbone_pretrained) 75 | 76 | # self.regressionModel = OffsetRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints) 77 | # self.DepthRegressionModel = DepthRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints) 78 | # self.classificationModel = JointClassification(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["common_trunk"], num_joints=num_joints) 79 | 80 | 81 | def forward(self, x): 82 | out3, out4 = self.back_bone(x) 83 | offset_regression = self.offset_regression(out4) 84 | depth_regression = self.depth_regression(out4) 85 | joint_classification = self.joint_classification(out3) 86 | 87 | # out3, out4 = self.Backbone(x) 88 | # offset_regression = self.regressionModel(out4) 89 | # depth_regression = self.DepthRegressionModel(out4) 90 | # joint_classification = self.classificationModel(out3) 91 | 92 | 93 | return joint_classification, offset_regression, depth_regression 94 | -------------------------------------------------------------------------------- /model/A2J/a2j_utilities/a2j_branchs.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import torch.nn as nn 12 | 13 | # PROJ ROOT DIR 14 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # a2j_utilities 15 | A2J_PATH = os.path.join(DIR_PATH, os.path.pardir) # A2J 16 | MODEL_PATH = os.path.join(A2J_PATH, os.path.pardir) # model 17 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root 18 | sys.path.append(ROOT_PATH) 19 | 20 | # Import Project Library 21 | from model.A2J.back_bone.resnet import get_ResNet 22 | 23 | class DepthRegression(nn.Module): 24 | """ 25 | Depth regression module 26 | 27 | regress the depth of the joints from the anchor points 28 | """ 29 | def __init__(self, input_channels, output_channels=256, num_anchors=16, num_joints=18): 30 | """ 31 | Class initializer 32 | 33 | :param input_channels: number of input channels 34 | :param output_channels: number of output channels 35 | :param num_anchors: total number of anchor points 36 | :param num_joints: total number of joints to predict 37 | """ 38 | super(DepthRegression, self).__init__() 39 | self.num_joints = num_joints 40 | self.num_anchors = num_anchors 41 | 42 | self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1) 43 | self.bn1 = nn.BatchNorm2d(output_channels) 44 | 45 | self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 46 | self.bn2 = nn.BatchNorm2d(output_channels) 47 | 48 | self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 49 | self.bn3 = nn.BatchNorm2d(output_channels) 50 | 51 | self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 52 | self.bn4 = nn.BatchNorm2d(output_channels) 53 | 54 | self.output = nn.Conv2d(output_channels, num_anchors*num_joints, kernel_size=3, padding=1) 55 | 56 | # Activation Function 57 | self.relu = nn.LeakyReLU(inplace=True) 58 | 59 | self._initialize() 60 | 61 | def _initialize(self): 62 | for m in self.modules(): 63 | if isinstance(m, nn.Conv2d): 64 | nn.init.xavier_normal_(m.weight.data) 65 | elif isinstance(m, nn.BatchNorm2d): 66 | m.weight.data.fill_(1) 67 | m.bias.data.zero_() 68 | 69 | def forward(self, x): 70 | # (N, inChannels, 10, 9) 71 | out = self.conv1(x) # (N, 256, 10, 9) 72 | out = self.bn1(out) # (N, 256, 10, 9) 73 | out = self.relu(out) # (N, 256, 10, 9) 74 | 75 | out = self.conv2(out) # (N, 256, 10, 9) 76 | out = self.bn2(out) # (N, 256, 10, 9) 77 | out = self.relu(out) # (N, 256, 10, 9) 78 | 79 | out = self.conv3(out) # (N, 256, 10, 9) 80 | out = self.bn3(out) # (N, 256, 10, 9) 81 | out = self.relu(out) # (N, 256, 10, 9) 82 | 83 | out = self.conv4(out) # (N, 256, 10, 9) 84 | out = self.bn4(out) # (N, 256, 10, 9) 85 | out = self.relu(out) # (N, 256, 10, 9) 86 | 87 | out = self.output(out) # (N, num_joints*num_anchors, 10, 9) 88 | 89 | out = out.permute(0, 3, 2, 1) # (N, 9, 10, num_joints*num_anchors) 90 | batch_size, width, height, channels = out.shape 91 | out = out.view(batch_size, width, height, self.num_anchors, self.num_joints) # (N, 9, 10, num_anchors, num_joints) 92 | return out.contiguous().view(batch_size, -1, self.num_joints) # (N, 9*10*num_anchors, num_joint) 93 | 94 | class OffsetRegression(nn.Module): 95 | """ 96 | Offset Regression class 97 | 98 | estimate the joint offsets from the anchorpoints 99 | """ 100 | def __init__(self, input_channels, output_channels=256, num_anchors=16, num_joints=18): 101 | """ 102 | Class initializer 103 | 104 | :param input_channels: number of input channels 105 | :param output_channels: number of output channels 106 | :param num_anchors: total number of anchor points 107 | :param num_joints: total number of joints to predict 108 | """ 109 | super(OffsetRegression, self).__init__() 110 | 111 | self.num_anchors = num_anchors 112 | self.num_joints = num_joints 113 | 114 | self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1) 115 | self.bn1 = nn.BatchNorm2d(output_channels) 116 | 117 | self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 118 | self.bn2 = nn.BatchNorm2d(output_channels) 119 | 120 | self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 121 | self.bn3 = nn.BatchNorm2d(output_channels) 122 | 123 | self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 124 | self.bn4 = nn.BatchNorm2d(output_channels) 125 | 126 | self.output = nn.Conv2d(output_channels, num_anchors*num_joints*2, kernel_size=3, padding=1) 127 | 128 | # Activation Function 129 | self.relu = nn.LeakyReLU(inplace=True) 130 | 131 | self._initialize() 132 | 133 | def _initialize(self): 134 | for m in self.modules(): 135 | if isinstance(m, nn.Conv2d): 136 | nn.init.xavier_normal_(m.weight.data) 137 | elif isinstance(m, nn.BatchNorm2d): 138 | m.weight.data.fill_(1) 139 | m.bias.data.zero_() 140 | 141 | def forward(self, x): 142 | out = self.conv1(x) # (N, 256, 10, 9) 143 | out = self.bn1(out) # (N, 256, 10, 9) 144 | out = self.relu(out) # (N, 256, 10, 9) 145 | 146 | out = self.conv2(out) # (N, 256, 10, 9) 147 | out = self.bn2(out) # (N, 256, 10, 9) 148 | out = self.relu(out) # (N, 256, 10, 9) 149 | 150 | out = self.conv3(out) # (N, 256, 10, 9) 151 | out = self.bn3(out) # (N, 256, 10, 9) 152 | out = self.relu(out) # (N, 256, 10, 9) 153 | 154 | out = self.conv4(out) # (N, 256, 10, 9) 155 | out = self.bn4(out) # (N, 256, 10, 9) 156 | out = self.relu(out) # (N, 256, 10, 9) 157 | 158 | out = self.output(out) # (N, num_joints*num_anchors*2, 10, 9) 159 | 160 | out = out.permute(0, 3, 2, 1) # (N, 9, 10, num_joints*num_anchors*2) 161 | batch_size, width, height, channels = out.shape 162 | out = out.view(batch_size, width, height, self.num_anchors, self.num_joints, 2) # (N, 9, 10, num_anchors, num_joints, 2) 163 | return out.contiguous().view(batch_size, -1, self.num_joints, 2) # (N, 9*10*num_anchors, num_joints, 2) 164 | 165 | class JointClassification(nn.Module): 166 | """ 167 | Joint classification class 168 | """ 169 | def __init__(self, input_channels, output_channels=256, num_anchors=16, num_joints=18): 170 | """ 171 | Class initializer 172 | 173 | :param input_channels: number of input channels 174 | :param output_channels: number of output channels 175 | :param num_anchors: total number of anchor points 176 | :param num_joints: total number of joints to predict 177 | """ 178 | super(JointClassification, self).__init__() 179 | 180 | self.num_anchors = num_anchors 181 | self.num_joints = num_joints 182 | 183 | self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1) 184 | self.bn1 = nn.BatchNorm2d(output_channels) 185 | 186 | self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 187 | self.bn2 = nn.BatchNorm2d(output_channels) 188 | 189 | self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 190 | self.bn3 = nn.BatchNorm2d(output_channels) 191 | 192 | self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1) 193 | self.bn4 = nn.BatchNorm2d(output_channels) 194 | 195 | self.output = nn.Conv2d(output_channels, num_anchors*num_joints, kernel_size=3, padding=1) 196 | 197 | # Activation Function 198 | self.relu = nn.LeakyReLU(inplace=True) 199 | 200 | self._initialize() 201 | 202 | def _initialize(self): 203 | for m in self.modules(): 204 | if isinstance(m, nn.Conv2d): 205 | nn.init.xavier_normal_(m.weight.data) 206 | elif isinstance(m, nn.BatchNorm2d): 207 | m.weight.data.fill_(1) 208 | m.bias.data.zero_() 209 | 210 | def forward(self, x): 211 | out = self.conv1(x) 212 | out = self.bn1(out) 213 | out = self.relu(out) 214 | 215 | out = self.conv2(out) 216 | out = self.bn2(out) 217 | out = self.relu(out) 218 | 219 | out = self.conv3(out) 220 | out = self.bn3(out) 221 | out = self.relu(out) 222 | 223 | out = self.conv4(out) 224 | out = self.bn4(out) 225 | out = self.relu(out) 226 | 227 | out = self.output(out) 228 | 229 | out = out.permute(0, 3, 2, 1) 230 | batch_size, width, height, channels = out.shape 231 | out = out.view(batch_size, width, height, self.num_anchors, self.num_joints) 232 | return out.contiguous().view(batch_size, -1, self.num_joints) 233 | -------------------------------------------------------------------------------- /model/A2J/a2j_utilities/a2j_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import torch 12 | import numpy as np 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | # PROJ ROOT DIR 17 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # a2j_utilities 18 | A2J_PATH = os.path.join(DIR_PATH, os.path.pardir) # A2J 19 | MODEL_PATH = os.path.join(A2J_PATH, os.path.pardir) # model 20 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root 21 | sys.path.append(ROOT_PATH) 22 | 23 | # Import Project Libraries 24 | import pipeline.constants as const 25 | 26 | 27 | 28 | def generate_anchors(p_h=None, p_w=None): 29 | """ 30 | Generate anchor shape 31 | 32 | :param p_h: anchor hieght layout 33 | :param p_w: anchor width layout 34 | """ 35 | if p_h is None: 36 | p_h = np.array([2, 6, 10, 14]) 37 | 38 | if p_w is None: 39 | p_w = np.array([2, 6, 10, 14]) 40 | 41 | num_anchors = len(p_h) * len(p_w) 42 | 43 | # Initialize the anchor points 44 | k = 0 45 | anchors = np.zeros((num_anchors, 2)) 46 | for i in range(len(p_w)): 47 | for j in range(len(p_h)): 48 | anchors[k,1] = p_w[j] 49 | anchors[k,0] = p_h[i] 50 | k += 1 51 | return anchors 52 | 53 | def shift(shape, stride, anchor): 54 | """ 55 | Create the locations of all the anchonrs in the in put image 56 | 57 | :param shape: common trunk (H, W) 58 | :param stride: the downsampling factor from input to common trunk 59 | :param anchor: anchor 60 | """ 61 | shift_h = np.arange(0, shape[0]) * stride # (shape[0]) 10 62 | shift_w = np.arange(0, shape[1]) * stride # (shape[1]) 9 63 | 64 | shift_h, shift_w = np.meshgrid(shift_h, shift_w) # (shape[1], shape[0]) (9, 10), (shape[1], shape[0]) (9, 10) 65 | shifts = np.vstack( (shift_h.ravel(), shift_w.ravel()) ).transpose() # (shape[0]*shape[1], 2) (90, 2) 66 | 67 | A = anchor.shape[0] # 16 68 | K = shifts.shape[0] # (shape[0]*shape[1]) (90) 69 | 70 | all_anchors = (anchor.reshape(1,A,2) + shifts.reshape((1, K, 2)).transpose((1, 0, 2))) # (shape[0]*shape[1], A, 2) 71 | all_anchors = all_anchors.reshape((K*A, 2)) # (shape[0]*shape[1]*A, 2) 72 | return all_anchors 73 | -------------------------------------------------------------------------------- /model/A2J/a2j_utilities/post_processing.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | # PROJ ROOT DIR 16 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # a2j_utilities 17 | A2J_PATH = os.path.join(DIR_PATH, os.path.pardir) # A2J 18 | MODEL_PATH = os.path.join(A2J_PATH, os.path.pardir) # model 19 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root 20 | sys.path.append(ROOT_PATH) 21 | 22 | # Import Project Library 23 | import pipeline.constants as const 24 | from model.A2J.a2j_utilities.a2j_utils import generate_anchors, shift 25 | 26 | class PostProcess(nn.Module): 27 | """ 28 | PosrProcessing class 29 | """ 30 | def __init__(self, p_h=None, p_w=None, shape=[const.A2J_TARGET_SIZE[1]//16, const.A2J_TARGET_SIZE[0]//16],\ 31 | stride=const.A2J_STRIDE): 32 | """ 33 | Class constructior 34 | 35 | :param p_w: 36 | """ 37 | 38 | super(PostProcess, self).__init__() 39 | anchors = generate_anchors(p_h=p_h, p_w=p_w) 40 | self.all_anchors = torch.from_numpy(shift(shape, stride, anchors)).float() 41 | 42 | def forward(self, joint_classifications, offset_regressions, depth_regressions): 43 | """ 44 | forward pass through the module 45 | 46 | :param joint_classifications: type torch.tensor, joint classification output of the model 47 | :param offset_regressions: type torch.tensor, offset regression output of the model 48 | :param depth_regressions: type torch.tensor, depth rgression output of the model 49 | """ 50 | DEVICE = joint_classifications.device 51 | 52 | batch_size = joint_classifications.shape[0] 53 | anchor = self.all_anchors.to(DEVICE) # (shape[0]*shape[1]*anchor_stride, 2) (1440, 2) 54 | predictions = list() 55 | 56 | for i in range(batch_size): 57 | joint_classification = joint_classifications[i] # (shape[0]*shape[1]*anchor_stride, num_joints) (1440, 18) 58 | offset_regression = offset_regressions[i] # (shape[0]*shape[1]*anchor_stride, num_joints, 2) (1440, 18, 2) 59 | depth_regression = depth_regressions[i] # (shape[0]*shape[1]*anchor_stride, num_joits) (1440, 18) 60 | 61 | # xy_regression: is the location of each anchor point + the offset 62 | # offset_regression: is giving us the offset 63 | xy_regression = torch.unsqueeze(anchor, 1).to(DEVICE) + offset_regression # (shape[0]*shape[1]*anchor_stride, 2) (1440, 18, 2) 64 | 65 | # reg_weight: is gining us the classification (importance) of each anchor point 66 | reg_weight = F.softmax(joint_classification, dim=0) # (shape[0]*shape[1]*anchor_stride, num_joints) (1440, 18) 67 | 68 | # reg_weigh_xy: is reg_weight expanded to have to tensors to multiply to each x and y coordinates 69 | reg_weight_xy = reg_weight.unsqueeze(2).expand(reg_weight.shape[0], reg_weight.shape[1], 2).to(DEVICE) # (shape[0]*shape[1]*anchor_stride, num_joints, 2) (1440, 18, 2) 70 | 71 | prediction_xy = (reg_weight_xy * xy_regression).sum(0) 72 | prediction_depth = (reg_weight * depth_regression).sum(0) 73 | 74 | prediction_depth = prediction_depth.unsqueeze(1).to(DEVICE) 75 | 76 | prediction = torch.cat((prediction_xy, prediction_xy), 1) 77 | predictions.append(prediction) 78 | 79 | return predictions 80 | -------------------------------------------------------------------------------- /model/A2J/back_bone/mobilenet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import torch 10 | import torchvision 11 | import torch.nn.functional as F 12 | 13 | from torch import nn 14 | from math import sqrt 15 | from itertools import product as product 16 | 17 | # Set the global device variable to cuda is GPU is avalible 18 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 19 | 20 | class MobileNet(nn.Module): 21 | """ 22 | MobileNet Bass class to produce lower lever features 23 | """ 24 | def __init__(self, **kwargs): 25 | super(MobileNet, self).__init__() 26 | 27 | # Activation function 28 | self.relu = nn.LeakyReLU(0.01) 29 | 30 | # Standard MobileNet Convolution layers 31 | self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1) 32 | self.bn1_1 = nn.BatchNorm2d(32) 33 | self.conv1_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32) 34 | self.bn1_2 = nn.BatchNorm2d(32) 35 | self.conv1_3 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0) 36 | self.bn1_3 = nn.BatchNorm2d(64) 37 | self.conv1_4 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, groups=64) 38 | self.bn1_4 = nn.BatchNorm2d(64) 39 | 40 | self.conv2_1 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0) 41 | self.bn2_1 = nn.BatchNorm2d(128) 42 | self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128) 43 | self.bn2_2 = nn.BatchNorm2d(128) 44 | self.conv2_3 = nn.Conv2d(128, 128, kernel_size=1, stride=1, padding=0) 45 | self.bn2_3 = nn.BatchNorm2d(128) 46 | self.conv2_4 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1, groups=128) 47 | self.bn2_4 = nn.BatchNorm2d(128) 48 | 49 | self.conv3_1 = nn.Conv2d(128, 256, kernel_size=1, stride=1, padding=0) 50 | self.bn3_1 = nn.BatchNorm2d(256) 51 | self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, groups=256) 52 | self.bn3_2 = nn.BatchNorm2d(256) 53 | self.conv3_3 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0) 54 | self.bn3_3 = nn.BatchNorm2d(512) 55 | self.conv3_4 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, groups=512) 56 | self.bn3_4 = nn.BatchNorm2d(512) 57 | self.conv3_5 = nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0) 58 | self.bn3_5 = nn.BatchNorm2d(512) 59 | self.conv3_6 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, groups=512) 60 | self.bn3_6 = nn.BatchNorm2d(512) 61 | self.conv3_7 = nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0) 62 | self.bn3_7 = nn.BatchNorm2d(512) # <--- 63 | self.conv3_8 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1, groups=512) 64 | self.bn3_8 = nn.BatchNorm2d(512) 65 | 66 | self.conv4_1 = nn.Conv2d(512, 1024, kernel_size=1, stride=1, padding=0) 67 | self.bn4_1 = nn.BatchNorm2d(1024) 68 | self.conv4_2 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) 69 | self.bn4_2 = nn.BatchNorm2d(1024) 70 | self.conv4_3 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) 71 | self.bn4_3 = nn.BatchNorm2d(1024) 72 | self.conv4_4 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) 73 | self.bn4_4 = nn.BatchNorm2d(1024) 74 | self.conv4_5 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) 75 | self.bn4_5 = nn.BatchNorm2d(1024) 76 | self.conv4_6 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) 77 | self.bn4_6 = nn.BatchNorm2d(1024) 78 | self.conv4_7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) 79 | self.bn4_7 = nn.BatchNorm2d(1024) 80 | self.conv4_8 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) 81 | self.bn4_8 = nn.BatchNorm2d(1024) 82 | self.conv4_9 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) 83 | self.bn4_9 = nn.BatchNorm2d(1024) 84 | self.conv4_10 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) 85 | self.bn4_10 = nn.BatchNorm2d(1024) 86 | self.conv4_11 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) 87 | self.bn4_11 = nn.BatchNorm2d(1024) # <--- 88 | 89 | self._init_conv2d() 90 | 91 | def _init_conv2d(self): 92 | """ 93 | Initialize convolution parameters. 94 | """ 95 | for c in self.children(): 96 | if isinstance(c, nn.Conv2d): 97 | nn.init.xavier_uniform_(c.weight) 98 | nn.init.constant_(c.bias, 0.) 99 | 100 | 101 | def forward(self, x): 102 | out = self.relu(self.conv1_1(x)) # (N, 32, 150, 150) 103 | out = self.bn1_1(out) # (N, 32, 150, 150) 104 | out = self.relu(self.conv1_2(out)) # (N, 32, 150, 150) 105 | out = self.bn1_2(out) # (N, 32, 150, 150) 106 | out = self.relu(self.conv1_3(out)) # (N, 64, 150, 150) 107 | out = self.bn1_3(out) # (N, 64, 150, 150) 108 | 109 | out = self.relu(self.conv1_4(out)) # (N, 64, 75, 75) 110 | out = self.bn1_4(out) # (N, 64, 75, 75) 111 | out = self.relu(self.conv2_1(out)) # (N, 128, 75, 75) 112 | out = self.bn2_1(out) # (N, 128, 75, 75) 113 | out = self.relu(self.conv2_2(out)) # (N, 128, 75, 75) 114 | out = self.bn2_2(out) # (N, 128, 75, 75) 115 | out = self.relu(self.conv2_3(out)) # (N, 128, 75, 75) 116 | out = self.bn2_3(out) # (N, 128, 75, 75) 117 | 118 | out = self.relu(self.conv2_4(out)) # (N, 128, 38, 38) 119 | out = self.bn2_4(out) # (N, 128, 38, 38) 120 | out = self.relu(self.conv3_1(out)) # (N, 256, 38, 38) 121 | out = self.bn3_1(out) # (N, 256, 38, 38) 122 | out = self.relu(self.conv3_2(out)) # (N, 256, 38, 38) 123 | out = self.bn3_2(out) # (N, 256, 38, 38) 124 | out = self.relu(self.conv3_3(out)) # (N, 256, 38, 38) 125 | out = self.bn3_3(out) # (N, 512, 38, 38) 126 | out = self.relu(self.conv3_4(out)) # (N, 512, 38, 38) 127 | out = self.bn3_4(out) # (N, 512, 38, 38) 128 | out = self.relu(self.conv3_5(out)) # (N, 512, 38, 38) 129 | out = self.bn3_5(out) # (N, 512, 38, 38) 130 | out = self.relu(self.conv3_6(out)) # (N, 512, 38, 38) 131 | out = self.bn3_6(out) # (N, 512, 38, 38) 132 | out = self.relu(self.conv3_7(out)) # (N, 512, 38, 38) 133 | out = self.bn3_7(out) # (N, 512, 38, 38) 134 | out = self.relu(self.conv3_8(out)) # (N, 512, 19, 19) 135 | out = self.bn3_8(out) # (N, 256, 19, 19) 136 | conv3_8 = out 137 | 138 | out = self.relu(self.conv4_1(out)) # (N, 1024, 19, 19) 139 | out = self.bn4_1(out) # (N, 1024, 19, 19) 140 | out = self.relu(self.conv4_2(out)) # (N, 1024, 19, 19) 141 | out = self.bn4_2(out) # (N, 1024, 19, 19) 142 | out = self.relu(self.conv4_3(out)) # (N, 1024, 19, 19) 143 | out = self.bn4_3(out) # (N, 1024, 19, 19) 144 | out = self.relu(self.conv4_4(out)) # (N, 1024, 19, 19) 145 | out = self.bn4_4(out) # (N, 1024, 19, 19) 146 | out = self.relu(self.conv4_5(out)) # (N, 1024, 19, 19) 147 | out = self.bn4_5(out) # (N, 1024, 19, 19) 148 | out = self.relu(self.conv4_6(out)) # (N, 1024, 19, 19) 149 | out = self.bn4_6(out) # (N, 1024, 19, 19) 150 | out = self.relu(self.conv4_7(out)) # (N, 1024, 19, 19) 151 | out = self.bn4_7(out) # (N, 1024, 19, 19) 152 | out = self.relu(self.conv4_8(out)) # (N, 1024, 19, 19) 153 | out = self.bn4_8(out) # (N, 1024, 19, 19) 154 | out = self.relu(self.conv4_9(out)) # (N, 1024, 19, 19) 155 | out = self.bn4_9(out) # (N, 1024, 19, 19) 156 | out = self.relu(self.conv4_10(out)) # (N, 1024, 19, 19) 157 | out = self.bn4_10(out) # (N, 1024, 19, 19) 158 | out = self.relu(self.conv4_11(out)) # (N, 1024, 19, 19) 159 | out = self.bn4_11(out) # (N, 1024, 19, 19) <----- 160 | conv12_4 = out 161 | 162 | return conv3_8, conv12_4 163 | -------------------------------------------------------------------------------- /model/A2J/back_bone/resnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import torch.nn as nn 10 | import torch.utils.model_zoo as model_zoo 11 | 12 | PRETRAINED_MODELS = { 13 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 14 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 15 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 16 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 17 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 18 | } 19 | 20 | def conv3x3(in_planes, out_planes, stride=1, dilation=1): 21 | """3x3 convolution with padding""" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, dilation=dilation, 23 | padding=dilation, bias=False) 24 | 25 | 26 | def conv1x1(in_planes, out_planes, stride=1): 27 | """1x1 convolution""" 28 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 29 | 30 | 31 | 32 | class BasicBlock(nn.Module): 33 | """ 34 | Resnet Basic Residual Block 35 | """ 36 | expansion = 1 37 | def __init__(self, input_channels, output_channels, stride=1, dilation=1, downsample=None): 38 | """ 39 | Class constructor 40 | 41 | :param input_channels: number of input channels to the residual block 42 | :param output channels: number of putput channels of the residual block 43 | :param stride: stride of the first convolution in the residual block 44 | :param dilation: dilation of the second convolution in the residual block 45 | :param downsample: torch.nn function for down sampling the input x for concatenation in the residual layer 46 | """ 47 | super(BasicBlock, self).__init__() 48 | 49 | self.conv1 = conv3x3(input_channels, output_channels, stride=stride) 50 | self.bn1 = nn.BatchNorm2d(output_channels) 51 | 52 | self.conv2 = conv3x3(output_channels, output_channels, dilation=dilation) 53 | self.bn2 = nn.BatchNorm2d(output_channels) 54 | 55 | self.downsample = downsample 56 | self.stride = stride 57 | 58 | # Actiation function 59 | self.relu = nn.LeakyReLU(inplace=True) 60 | 61 | 62 | def forward(self, x): 63 | identity = x 64 | 65 | out = self.conv1(x) 66 | out = self.bn1(out) 67 | out = self.relu(out) 68 | 69 | out = self.conv2(out) 70 | out = self.bn2(out) 71 | 72 | if self.downsample is not None: 73 | identity = self.downsample(x) 74 | 75 | out += identity 76 | out = self.relu(out) 77 | 78 | return out 79 | 80 | class Bottleneck(nn.Module): 81 | """ 82 | Resnet Bottleneck network 83 | """ 84 | expansion = 4 85 | def __init__(self, input_channels, output_channels, stride=1, dilation=1, downsample=None): 86 | """ 87 | Class constructor 88 | 89 | :param input_channels: number of input channels to the residual block 90 | :param output channels: number of putput channels of the residual block 91 | :param stride: stride of the second convolution in the residual block 92 | :param dilation: dilation of the second convolution in the residual block 93 | :param downsample: torch.nn function for down sampling the input x for concatenation in the residual layer 94 | """ 95 | super(Bottleneck, self).__init__() 96 | 97 | self.conv1 = conv1x1(input_channels, output_channels) 98 | self.bn1 = nn.BatchNorm2d(output_channels) 99 | 100 | self.conv2 = conv3x3(output_channels, output_channels, stride=stride, dilation=dilation) 101 | self.bn2 = nn.BatchNorm2d(output_channels) 102 | 103 | self.conv3 = conv1x1(output_channels, output_channels*self.expansion) 104 | self.bn3 = nn.BatchNorm2d(output_channels*self.expansion) 105 | 106 | self.downsample = downsample 107 | self.stride = stride 108 | 109 | # Activation function 110 | self.relu = nn.LeakyReLU(inplace=True) 111 | 112 | 113 | 114 | def forward(self, x): 115 | identity = x 116 | 117 | out = self.conv1(x) 118 | out = self.bn1(out) 119 | out = self.relu(out) 120 | 121 | out = self.conv2(out) 122 | out = self.bn2(out) 123 | out = self.relu(out) 124 | 125 | out = self.conv3(out) 126 | out = self.bn3(out) 127 | 128 | if self.downsample is not None: 129 | identity = self.downsample(x) 130 | 131 | out += identity 132 | out = self.relu(out) 133 | 134 | return out 135 | 136 | 137 | class ResNet(nn.Module): 138 | """ 139 | ResNet Definition 140 | 141 | could create resnet (18, 34, 50, 101, 152) by setting the parameters 142 | """ 143 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): 144 | """ 145 | Class constructor 146 | 147 | :param block: type toch.nn, A residual block class instance (i.e. BasicBlock or Bottleneck) 148 | :param layers: type list, A list holding the number of residual blocks in each ResNet layer 149 | :param num_classes: if using a pretrained network make sure the number of classes are the same 150 | :param zero_init_residual: Zero Initialiaze the last batchnorm in each residual layer for higher accuracy 151 | """ 152 | super(ResNet, self).__init__() 153 | 154 | self.input_channels = 64 155 | 156 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 157 | self.bn1 = nn.BatchNorm2d(64) 158 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 159 | 160 | self.layer1 = self._make_resnet_layer(block, 64, layers[0]) 161 | self.layer2 = self._make_resnet_layer(block, 128, layers[1], stride=2) 162 | self.layer3 = self._make_resnet_layer(block, 256, layers[2], stride=2) 163 | self.layer4 = self._make_resnet_layer(block, 512, layers[3], stride=1, dilation=2) 164 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 165 | self.fc = nn.Linear(512*block.expansion, num_classes) 166 | 167 | # Activation function 168 | self.relu = nn.LeakyReLU(inplace=True) 169 | 170 | self._initialize() 171 | if zero_init_residual: 172 | self._zero_initialize() 173 | 174 | def _make_resnet_layer(self, block, output_channels, blocks, stride=1, dilation=1): 175 | """ 176 | Method to create residual block layer in resnet 177 | 178 | :param block: type torch.nn, a residual block block class instance (i.e. BasicBlock or Bottleneck) 179 | :param output_channels: type int, number of output channels of the residual block layer 180 | :param blocks: type int, number of residual blocks in this layer 181 | :param stride: type int 182 | :param dilation: type int 183 | """ 184 | downsample = None 185 | 186 | if (stride != 1) or (self.input_channels != output_channels*block.expansion): 187 | downsample = nn.Sequential( 188 | conv1x1(self.input_channels, output_channels*block.expansion, stride=stride), 189 | nn.BatchNorm2d(output_channels*block.expansion), 190 | ) 191 | 192 | layers = list() 193 | layers.append(block(self.input_channels, output_channels, stride=stride, downsample=downsample)) 194 | 195 | self.input_channels = output_channels * block.expansion 196 | 197 | for _ in range(1, blocks): 198 | layers.append(block(self.input_channels, output_channels, dilation=dilation)) 199 | 200 | return nn.Sequential(*layers) 201 | 202 | def _initialize(self): 203 | for m in self.modules(): 204 | if isinstance(m, nn.Conv2d): 205 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu') 206 | elif isinstance(m, nn.BatchNorm2d): 207 | nn.init.constant_(m.weight, 1) 208 | nn.init.constant_(m.bias, 0) 209 | 210 | def _zero_initialize(self): 211 | for m in self.modules(): 212 | for m in self.modules(): 213 | if isinstance(m, Bottleneck): 214 | nn.init.constant_(m.bn3.weight, 0) 215 | elif isinstance(m, BasicBlock): 216 | nn.init.constant_(m.bn2.weight, 0) 217 | 218 | def forward(self, x): 219 | x = self.conv1(x) 220 | x = self.bn1(x) 221 | x = self.relu(x) 222 | x = self.maxpool(x) 223 | 224 | x = self.layer1(x) 225 | x = self.layer2(x) 226 | x = self.layer3(x) 227 | x = self.layer4(x) 228 | 229 | x = self.avg_pool(x) 230 | x = x.view(x.size(0), -1) 231 | x = self.fc(x) 232 | 233 | return x 234 | 235 | 236 | def get_ResNet(resnet_model="resnet18", pretrained=False): 237 | 238 | resnet_setups = { 239 | "resnet18": {"block": BasicBlock, "layers": [2, 2, 2, 2]}, 240 | "resnet34": {"block": BasicBlock, "layers": [3, 4, 6, 3]}, 241 | "resnet50": {"block": Bottleneck, "layers": [3, 4, 6, 3]}, 242 | "resnet101": {"block": Bottleneck, "layers": [3, 4, 23, 3]}, 243 | "resnet152": {"block": Bottleneck, "layers": [3, 8, 36, 3]}, 244 | } 245 | model = ResNet(resnet_setups[resnet_model]["block"], resnet_setups[resnet_model]["layers"]) 246 | if pretrained: 247 | model.load_state_dict(model_zoo.load_url(PRETRAINED_MODELS[resnet_model])) 248 | 249 | return model 250 | 251 | class ResnetBackbone(nn.Module): 252 | """ 253 | The Resnet Backbone module 254 | """ 255 | def __init__(self, name="resnet18", pretrained=True): 256 | """ 257 | Class constructor 258 | 259 | :param name: name of the resnet model to load 260 | :param pretrained: weather or not to load the weight of a pretrained model on ImageNet 261 | """ 262 | super(ResnetBackbone, self).__init__() 263 | self.model = get_ResNet(resnet_model=name, pretrained=pretrained) 264 | 265 | def forward(self, x): 266 | n, c, h, w = x.size() 267 | 268 | x = x[:,0:1,:,:] # depth 269 | x = x.expand(n, 3, h, w) 270 | 271 | out = self.model.conv1(x) 272 | out = self.model.bn1(out) 273 | out = self.model.relu(out) 274 | out = self.model.maxpool(out) 275 | 276 | out1 = self.model.layer1(out) 277 | out2 = self.model.layer2(out1) 278 | out3 = self.model.layer3(out2) 279 | out4 = self.model.layer4(out3) 280 | 281 | return out3, out4 282 | -------------------------------------------------------------------------------- /model/A2J/model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import torch.nn as nn 10 | from torch.nn import init 11 | import torch.utils.model_zoo as model_zoo 12 | 13 | 14 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 15 | 'resnet152'] 16 | 17 | 18 | model_urls = { 19 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 20 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 21 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 22 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 23 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 24 | } 25 | 26 | 27 | def conv3x3(in_planes, out_planes, stride=1, dilation=1): 28 | """3x3 convolution with padding""" 29 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, dilation=dilation, 30 | padding=dilation, bias=False) 31 | 32 | 33 | def conv1x1(in_planes, out_planes, stride=1): 34 | """1x1 convolution""" 35 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 36 | 37 | 38 | class BasicBlock(nn.Module): 39 | expansion = 1 40 | 41 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 42 | super(BasicBlock, self).__init__() 43 | self.conv1 = conv3x3(inplanes, planes, stride) 44 | self.bn1 = nn.BatchNorm2d(planes) 45 | self.relu = nn.ReLU(inplace=True) 46 | self.conv2 = conv3x3(planes, planes, dilation=dilation) 47 | self.bn2 = nn.BatchNorm2d(planes) 48 | self.downsample = downsample 49 | self.stride = stride 50 | 51 | def forward(self, x): 52 | identity = x 53 | 54 | out = self.conv1(x) 55 | out = self.bn1(out) 56 | out = self.relu(out) 57 | 58 | out = self.conv2(out) 59 | out = self.bn2(out) 60 | 61 | if self.downsample is not None: 62 | identity = self.downsample(x) 63 | 64 | out += identity 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class Bottleneck(nn.Module): 71 | expansion = 4 72 | 73 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 74 | super(Bottleneck, self).__init__() 75 | self.conv1 = conv1x1(inplanes, planes) 76 | self.bn1 = nn.BatchNorm2d(planes) 77 | self.conv2 = conv3x3(planes, planes, stride, dilation=dilation) 78 | self.bn2 = nn.BatchNorm2d(planes) 79 | self.conv3 = conv1x1(planes, planes * self.expansion) 80 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 81 | self.relu = nn.ReLU(inplace=True) 82 | self.downsample = downsample 83 | self.stride = stride 84 | 85 | def forward(self, x): 86 | identity = x 87 | 88 | out = self.conv1(x) 89 | out = self.bn1(out) 90 | out = self.relu(out) 91 | 92 | out = self.conv2(out) 93 | out = self.bn2(out) 94 | out = self.relu(out) 95 | 96 | out = self.conv3(out) 97 | out = self.bn3(out) 98 | 99 | if self.downsample is not None: 100 | identity = self.downsample(x) 101 | 102 | out += identity 103 | out = self.relu(out) 104 | 105 | return out 106 | 107 | 108 | class ResNet(nn.Module): 109 | 110 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): 111 | super(ResNet, self).__init__() 112 | self.inplanes = 64 113 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 114 | bias=False) 115 | self.bn1 = nn.BatchNorm2d(64) 116 | self.relu = nn.ReLU(inplace=True) 117 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 118 | self.layer1 = self._make_layer(block, 64, layers[0]) 119 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 120 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 121 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1,dilation=2) 122 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 123 | self.fc = nn.Linear(512 * block.expansion, num_classes) 124 | 125 | for m in self.modules(): 126 | if isinstance(m, nn.Conv2d): 127 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 128 | elif isinstance(m, nn.BatchNorm2d): 129 | nn.init.constant_(m.weight, 1) 130 | nn.init.constant_(m.bias, 0) 131 | 132 | # Zero-initialize the last BN in each residual branch, 133 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 134 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 135 | if zero_init_residual: 136 | for m in self.modules(): 137 | if isinstance(m, Bottleneck): 138 | nn.init.constant_(m.bn3.weight, 0) 139 | elif isinstance(m, BasicBlock): 140 | nn.init.constant_(m.bn2.weight, 0) 141 | 142 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 143 | downsample = None 144 | if stride != 1 or self.inplanes != planes * block.expansion: 145 | downsample = nn.Sequential( 146 | conv1x1(self.inplanes, planes * block.expansion, stride), 147 | nn.BatchNorm2d(planes * block.expansion), 148 | ) 149 | 150 | layers = [] 151 | layers.append(block(self.inplanes, planes, stride, downsample)) 152 | self.inplanes = planes * block.expansion 153 | for _ in range(1, blocks): 154 | layers.append(block(self.inplanes, planes, dilation=dilation)) 155 | 156 | return nn.Sequential(*layers) 157 | 158 | def forward(self, x): 159 | x = self.conv1(x) 160 | x = self.bn1(x) 161 | x = self.relu(x) 162 | x = self.maxpool(x) 163 | 164 | x = self.layer1(x) 165 | x = self.layer2(x) 166 | x = self.layer3(x) 167 | x = self.layer4(x) 168 | 169 | x = self.avgpool(x) 170 | x = x.view(x.size(0), -1) 171 | x = self.fc(x) 172 | 173 | return x 174 | 175 | 176 | def resnet18(pretrained=False, **kwargs): 177 | """Constructs a ResNet-18 model. 178 | Args: 179 | pretrained (bool): If True, returns a model pre-trained on ImageNet 180 | """ 181 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 182 | if pretrained: 183 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 184 | return model 185 | 186 | 187 | def resnet34(pretrained=False, **kwargs): 188 | """Constructs a ResNet-34 model. 189 | Args: 190 | pretrained (bool): If True, returns a model pre-trained on ImageNet 191 | """ 192 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 193 | if pretrained: 194 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 195 | return model 196 | 197 | 198 | def resnet50(pretrained=False, **kwargs): 199 | """Constructs a ResNet-50 model. 200 | Args: 201 | pretrained (bool): If True, returns a model pre-trained on ImageNet 202 | """ 203 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 204 | if pretrained: 205 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 206 | return model 207 | 208 | 209 | def resnet101(pretrained=False, **kwargs): 210 | """Constructs a ResNet-101 model. 211 | Args: 212 | pretrained (bool): If True, returns a model pre-trained on ImageNet 213 | """ 214 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 215 | if pretrained: 216 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 217 | return model 218 | 219 | 220 | def resnet152(pretrained=False, **kwargs): 221 | """Constructs a ResNet-152 model. 222 | Args: 223 | pretrained (bool): If True, returns a model pre-trained on ImageNet 224 | """ 225 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 226 | if pretrained: 227 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 228 | return model 229 | 230 | class DepthRegressionModel(nn.Module): 231 | def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256): 232 | super(DepthRegressionModel, self).__init__() 233 | self.num_classes = num_classes 234 | self.num_anchors = num_anchors 235 | 236 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 237 | self.bn1 = nn.BatchNorm2d(feature_size) 238 | self.act1 = nn.ReLU() 239 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 240 | self.bn2 = nn.BatchNorm2d(feature_size) 241 | self.act2 = nn.ReLU() 242 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 243 | self.bn3 = nn.BatchNorm2d(feature_size) 244 | self.act3 = nn.ReLU() 245 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 246 | self.bn4 = nn.BatchNorm2d(feature_size) 247 | self.act4 = nn.ReLU() 248 | self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1) 249 | for m in self.modules(): 250 | if isinstance(m, nn.Conv2d): 251 | nn.init.xavier_normal_(m.weight.data) 252 | elif isinstance(m, nn.BatchNorm2d): 253 | m.weight.data.fill_(1) 254 | m.bias.data.zero_() 255 | 256 | def forward(self, x): 257 | out = self.conv1(x) 258 | out = self.bn1(out) 259 | out = self.act1(out) 260 | out = self.conv2(out) 261 | out = self.bn2(out) 262 | out = self.act2(out) 263 | out = self.conv3(out) 264 | out = self.bn3(out) 265 | out = self.act3(out) 266 | out = self.conv4(out) 267 | out = self.bn4(out) 268 | out = self.act4(out) 269 | out = self.output(out) 270 | 271 | # out is B x C x W x H, with C = 3*num_anchors 272 | out1 = out.permute(0, 3, 2, 1) 273 | batch_size, width, height, channels = out1.shape 274 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes) 275 | return out2.contiguous().view(out2.shape[0], -1, self.num_classes) 276 | 277 | class RegressionModel(nn.Module): 278 | def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256): 279 | super(RegressionModel, self).__init__() 280 | self.num_anchors = num_anchors 281 | self.num_classes = num_classes 282 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 283 | self.bn1 = nn.BatchNorm2d(feature_size) 284 | self.act1 = nn.ReLU() 285 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 286 | self.bn2 = nn.BatchNorm2d(feature_size) 287 | self.act2 = nn.ReLU() 288 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 289 | self.bn3 = nn.BatchNorm2d(feature_size) 290 | self.act3 = nn.ReLU() 291 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 292 | self.bn4 = nn.BatchNorm2d(feature_size) 293 | self.act4 = nn.ReLU() 294 | self.output = nn.Conv2d(feature_size, num_anchors*num_classes*2, kernel_size=3, padding=1) 295 | for m in self.modules(): 296 | if isinstance(m, nn.Conv2d): 297 | nn.init.xavier_normal_(m.weight.data) 298 | elif isinstance(m, nn.BatchNorm2d): 299 | m.weight.data.fill_(1) 300 | m.bias.data.zero_() 301 | 302 | def forward(self, x): 303 | out = self.conv1(x) 304 | out = self.bn1(out) 305 | out = self.act1(out) 306 | out = self.conv2(out) 307 | out = self.bn2(out) 308 | out = self.act2(out) 309 | out = self.conv3(out) 310 | out = self.bn3(out) 311 | out = self.act3(out) 312 | out = self.conv4(out) 313 | out = self.bn4(out) 314 | out = self.act4(out) 315 | out = self.output(out) 316 | 317 | # out is B x C x W x H, with C = 3*num_anchors 318 | out1 = out.permute(0, 3, 2, 1) 319 | batch_size, width, height, channels = out1.shape 320 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes, 2) 321 | return out2.contiguous().view(out2.shape[0], -1, self.num_classes, 2) 322 | 323 | class ClassificationModel(nn.Module): 324 | def __init__(self, num_features_in, num_anchors=16, num_classes=15, prior=0.01, feature_size=256): 325 | super(ClassificationModel, self).__init__() 326 | self.num_classes = num_classes 327 | self.num_anchors = num_anchors 328 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 329 | self.bn1 = nn.BatchNorm2d(feature_size) 330 | self.act1 = nn.ReLU() 331 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 332 | self.bn2 = nn.BatchNorm2d(feature_size) 333 | self.act2 = nn.ReLU() 334 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 335 | self.bn3 = nn.BatchNorm2d(feature_size) 336 | self.act3 = nn.ReLU() 337 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 338 | self.bn4 = nn.BatchNorm2d(feature_size) 339 | self.act4 = nn.ReLU() 340 | self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1) 341 | for m in self.modules(): 342 | if isinstance(m, nn.Conv2d): 343 | nn.init.xavier_normal_(m.weight.data) 344 | elif isinstance(m, nn.BatchNorm2d): 345 | m.weight.data.fill_(1) 346 | m.bias.data.zero_() 347 | 348 | def forward(self, x): 349 | out = self.conv1(x) 350 | out = self.bn1(out) 351 | out = self.act1(out) 352 | out = self.conv2(out) 353 | out = self.bn2(out) 354 | out = self.act2(out) 355 | out = self.conv3(out) 356 | out = self.bn3(out) 357 | out = self.act3(out) 358 | out = self.conv4(out) 359 | out = self.bn4(out) 360 | out = self.act4(out) 361 | out = self.output(out) 362 | 363 | # out is B x C x W x H, with C = n_classes + n_anchors 364 | out1 = out.permute(0, 3, 2, 1) 365 | batch_size, width, height, channels = out1.shape 366 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes) 367 | return out2.contiguous().view(x.shape[0], -1, self.num_classes) 368 | 369 | 370 | class ResNetBackBone(nn.Module): 371 | def __init__(self): 372 | super(ResNetBackBone, self).__init__() 373 | 374 | modelPreTrain50 = resnet50(pretrained=True) 375 | self.model = modelPreTrain50 376 | 377 | def forward(self, x): 378 | n, c, h, w = x.size() # x: [B, 1, H ,W] 379 | 380 | x = x[:,0:1,:,:] # depth 381 | x = x.expand(n,3,h,w) 382 | 383 | x = self.model.conv1(x) 384 | x = self.model.bn1(x) 385 | x = self.model.relu(x) 386 | x = self.model.maxpool(x) 387 | x1 = self.model.layer1(x) 388 | x2 = self.model.layer2(x1) 389 | x3 = self.model.layer3(x2) 390 | x4 = self.model.layer4(x3) 391 | 392 | return x3,x4 393 | 394 | class A2J_model(nn.Module): 395 | def __init__(self, num_classes, is_3D=True): 396 | super(A2J_model, self).__init__() 397 | self.is_3D = is_3D 398 | self.Backbone = ResNetBackBone() # 1 channel depth only, resnet50 399 | self.regressionModel = RegressionModel(2048, num_classes=num_classes) 400 | self.classificationModel = ClassificationModel(1024, num_classes=num_classes) 401 | if is_3D: 402 | self.DepthRegressionModel = DepthRegressionModel(2048, num_classes=num_classes) 403 | 404 | def forward(self, x): 405 | x3,x4 = self.Backbone(x) 406 | classification = self.classificationModel(x3) 407 | regression = self.regressionModel(x4) 408 | if self.is_3D: 409 | DepthRegressionModel = self.DepthRegressionModel(x4) 410 | return (classification, regression, DepthRegressionModel) 411 | return (classification, regression) 412 | -------------------------------------------------------------------------------- /model/CenterNet/centernet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import torch 12 | import torchvision 13 | import torch.nn as nn 14 | 15 | # Adding Project Path 16 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # CenterNet 17 | MODEL_PATH = os.path.join(DIR_PATH, os.path.pardir) # Model 18 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root 19 | 20 | sys.path.append(ROOT_PATH) 21 | 22 | # Importing Project Libraries 23 | import pipeline.constants as const 24 | 25 | class CRBUp(nn.Module): 26 | """ 27 | Convolution Residual Block Upsampling Class 28 | """ 29 | def __init__(self, in_channels: int, out_channels: int): 30 | super(CRBUp, self).__init__() 31 | self.layers = nn.Sequential( 32 | nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1), 33 | nn.BatchNorm2d(out_channels), 34 | nn.LeakyReLU() 35 | ) 36 | 37 | def forward(self, x): 38 | return self.layers(x) 39 | 40 | 41 | class Resnet18FeatureExtractor(nn.Module): 42 | 43 | def __init__(self, num_classes=const.CENTERNET_NUM_CLASSES, pretrained=True): 44 | super(Resnet18FeatureExtractor, self).__init__() 45 | self.num_classes = num_classes 46 | self.out_channels = 4 + num_classes 47 | self.model = torchvision.models.resnet18(pretrained=pretrained) 48 | 49 | self.up_sample1 = CRBUp(512, 256) 50 | self.up_sample2 = CRBUp(512, 128) 51 | self.up_sample3 = CRBUp(256, 64) 52 | self.up_sample4 = CRBUp(128, self.out_channels) 53 | 54 | self.sigmoid = nn.Sigmoid() 55 | 56 | 57 | def forward(self, x): 58 | x = self.model.conv1(x) 59 | x = self.model.bn1(x) 60 | x = self.model.relu(x) 61 | x = self.model.maxpool(x) 62 | 63 | x1 = self.model.layer1(x) 64 | x2 = self.model.layer2(x1) 65 | x3 = self.model.layer3(x2) 66 | x4 = self.model.layer4(x3) 67 | 68 | # Upsampling 69 | out = self.up_sample1(x4) 70 | out = self.up_sample2(torch.cat([x3, out], 1)) 71 | out = self.up_sample3(torch.cat([x2, out], 1)) 72 | out = self.up_sample4(torch.cat([x1, out], 1)) 73 | 74 | out = torch.cat([self.sigmoid(out[:,0:self.num_classes]), out[:, self.num_classes:]], dim=1) 75 | 76 | return out 77 | -------------------------------------------------------------------------------- /model/run_model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import torch 12 | import numpy as np 13 | 14 | # PROJ ROOT DIR 15 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) 16 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir) 17 | sys.path.append(ROOT_PATH) 18 | 19 | # Importing Project Library 20 | from pipeline.model_setup import ModelSetup 21 | from pipeline.utils import find_prediction_mask, get_bboxes, find_jaccard_overlap 22 | 23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | def run_ssd(model_setup:ModelSetup, image: torch.tensor, trt_optim=False): 26 | """ 27 | Perform inference on the image and return the boundiong boxes along with the images. 28 | 29 | :param model_setup: ModelSetup class instance (holds all teh informatioj about a model) 30 | :param image: a set of images (N, 1, 300, 300) 31 | :return: pred_boxes, pred_labels, pred_scores 32 | """ 33 | model_setup.bb_model.eval() 34 | 35 | model_setup.bb_model.to(DEVICE) 36 | image = image.to(DEVICE) 37 | with torch.no_grad(): 38 | pred_locs, pred_scores = model_setup.bb_model(image) 39 | pred_boxes, pred_labels, pred_scores = model_setup.priors.detect_objects(pred_locs, pred_scores) 40 | 41 | return pred_boxes[0].to("cpu"), pred_labels, pred_scores 42 | 43 | def run_centernet(model_setup: ModelSetup, image: torch.tensor, trt_optim=False): 44 | """ 45 | Run either training or validation on the model 46 | 47 | :param model_setup: ModelSetup, model setup state 48 | :param train: bool, run training or validation 49 | """ 50 | model_setup.bb_model.to(DEVICE) 51 | model_setup.bb_model.eval() 52 | 53 | image = image.to(DEVICE) 54 | with torch.no_grad(): 55 | if trt_optim: 56 | preds = model_setup.bb_model(image) 57 | else: 58 | preds = model_setup.bb_model(image) 59 | 60 | prediction = preds 61 | 62 | pred_heatmap = prediction[0][0:model_setup.centernet_num_classes].max(0)[0].float() 63 | pred_mask = find_prediction_mask(pred_heatmap)[0][0] 64 | pred_yx_locations = torch.nonzero(pred_mask) 65 | 66 | pred_height = prediction[0][-4][pred_mask] 67 | pred_width = prediction[0][-3][pred_mask] 68 | 69 | pred_offset_y = prediction[0][-2][pred_mask] 70 | pred_offset_x = prediction[0][-1][pred_mask] 71 | 72 | pred_bboxes = get_bboxes(pred_yx_locations, pred_height, pred_width, pred_offset_x, pred_offset_y) 73 | 74 | if pred_bboxes: 75 | pred_bboxes = torch.FloatTensor(pred_bboxes) 76 | # Do Non-Max suppression on the nearby boxes 77 | tmp_boxes = pred_bboxes.clone() 78 | tmp_boxes[:,2:4] += tmp_boxes[:,0:2] 79 | 80 | # Tensor of zeros for all valid boxes 81 | suppress = torch.zeros((tmp_boxes.size(0)), dtype=torch.uint8).to(DEVICE) 82 | # Over lap score [0-1] 83 | overlap = find_jaccard_overlap(tmp_boxes, tmp_boxes) 84 | for box in range(tmp_boxes.size(0)): 85 | if suppress[box] == 1: 86 | continue 87 | suppress = torch.max(suppress, torch.as_tensor(overlap[box] > 0.3, dtype=torch.uint8).to(DEVICE)) 88 | suppress[box] = 0 89 | 90 | # Get the list of the valid boxes 91 | pred_bboxes_list = [] 92 | for i, elem in enumerate(suppress): 93 | if elem.item() == 0: 94 | pred_bboxes_list.append(pred_bboxes[i].tolist()) 95 | pred_bboxes = torch.FloatTensor(pred_bboxes_list) 96 | else: 97 | pred_bboxes = None 98 | 99 | return pred_bboxes, None, None 100 | 101 | def run_a2j(model_setup:ModelSetup, image): 102 | """ 103 | Perform inference on the image and return the boundiong boxes along with the images. 104 | 105 | :param model_setup: ModelSetup class instance (holds all teh informatioj about a model) 106 | :param image: a set of images (N, 1, 144, 160) 107 | :return: pred_boxes, pred_labels, pred_scores 108 | """ 109 | model_setup.a2j_model.eval() 110 | model_setup.a2j_model.to(DEVICE) 111 | model_setup.post_process.to(DEVICE) 112 | image = image.to(DEVICE) 113 | 114 | with torch.no_grad(): 115 | joint_classification, offset_regression, depth_regression = model_setup.a2j_model(image.type(torch.float32)) 116 | pred_points = model_setup.post_process(joint_classification, offset_regression, depth_regression) 117 | 118 | return pred_points 119 | -------------------------------------------------------------------------------- /pipeline/azure_kinect.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | from pyk4a import PyK4A 10 | 11 | import os 12 | import sys 13 | import time 14 | import argparse 15 | import matplotlib.pyplot as plt 16 | import matplotlib.patches as patches 17 | 18 | # PROJ ROOT DIR 19 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) 20 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir) 21 | sys.path.append(ROOT_PATH) 22 | 23 | # PROJ LIBRARY 24 | import pipeline.constants as const 25 | from pipeline.utils import * 26 | from pipeline.model_setup import ModelSetup 27 | from model.run_model import run_centernet, run_ssd, run_a2j 28 | 29 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 30 | 31 | def parse_arguments(): 32 | """ 33 | Argument parser function for main.py 34 | """ 35 | parser = argparse.ArgumentParser() 36 | 37 | parser.add_argument('-t', '--trt', 38 | type=bool, 39 | default=False, 40 | help="Set to True for trt optimization") 41 | 42 | args = parser.parse_args() 43 | return args 44 | 45 | def run_camera_inferance(k4a, model_setup: ModelSetup, iterations=100, show_heatmap=False, trt_optim=False): 46 | """ 47 | Run the model for N number of frames 48 | 49 | :param model_setup: ModelSetup 50 | :param iterations: the total number of frames to run the model 51 | :param show_heatmap: set to visualize prediction heat map and mask 52 | """ 53 | fig = plt.figure(figsize=(6, 8)) 54 | fig.suptitle(f"{const.NUM_JOINTS} Joints", fontsize=16) 55 | ax_1 = fig.add_subplot(2,1,1) 56 | ax_2 = fig.add_subplot(2, 1, 2, projection='3d') 57 | 58 | bb_summary = Summary() 59 | a2j_summary = Summary() 60 | 61 | for i in range(1000): 62 | capture = k4a.get_capture() 63 | ir_img = capture.ir 64 | depth_img = capture.depth 65 | 66 | w, h = ir_img.shape[1], ir_img.shape[0] # Image (width, height) 67 | transformed_image = centernet_img_transform(ir_image=ir_img, depth_image=depth_img) # Image transfered to (1, 1, 300, 300) float tensor 68 | 69 | start_time = time.time() 70 | pred_boxes, _, _ = run_centernet(model_setup, transformed_image) # Perform Inference 71 | end_time = time.time() 72 | bb_summary.update(end_time-start_time) 73 | 74 | pred_joints_collections = [] 75 | median_depths = [] 76 | 77 | if pred_boxes != None: 78 | # Normalizing the pred boxes to original dimentions 79 | original_dims = torch.FloatTensor([w, h, w, h]).unsqueeze(0) 80 | pred_boxes[:,2:4] += pred_boxes[:,0:2] 81 | pred_boxes /= 320 82 | 83 | pred_boxes *= original_dims 84 | 85 | bboxs = [] # list of (x0, y0, x1, y1) 86 | for i in range(pred_boxes.size(0)): 87 | box_locs = pred_boxes[i].tolist() 88 | x, y = box_locs[0], box_locs[1] 89 | width, height = abs(box_locs[0] - box_locs[2]), abs(box_locs[1] - box_locs[3]) 90 | rect = patches.Rectangle((x,y),width,height,linewidth=1,edgecolor='g',facecolor='none') 91 | ax_1.add_patch(rect) 92 | 93 | bboxs.append([ 94 | int(box_locs[0]), 95 | int(box_locs[1]), 96 | int(box_locs[2]), 97 | int(box_locs[3]) 98 | ]) 99 | 100 | for bbox in bboxs: 101 | t_depth_image, median_depth = a2j_depth_image_transform(depth_img, bbox) 102 | # import pdb; pdb.set_trace() 103 | start_time = time.time() 104 | pred_points = run_a2j(model_setup, t_depth_image) 105 | end_time = time.time() 106 | a2j_summary.update(end_time-start_time) 107 | 108 | pred_joints_collections.append(pred_points[0]) 109 | median_depths.append(median_depth) 110 | 111 | normalized_joints = back_to_normal(pred_joints_collections, bboxs, median_depths) 112 | scats = vizualize_frams(ax_2, normalized_joints) 113 | 114 | ir_img[ir_img > 3000] = ir_img.mean() 115 | ax_1.imshow(ir_img, interpolation='nearest', cmap ='gray') 116 | 117 | plt.draw() 118 | plt.pause(0.001) 119 | 120 | ax_1.clear() 121 | if pred_boxes != None: 122 | [scat.remove() for scat in scats] 123 | ax_2.clear() 124 | 125 | print(f"BB Infrence time: {bb_summary.avg:1.4f} "\ 126 | f"A2J Infrence time: {a2j_summary.avg:1.4f} "\ 127 | f"Total Infrence time: {a2j_summary.avg + bb_summary.avg:1.4f}") 128 | 129 | print(f"BB Infrence time FPS: {1/bb_summary.avg:1.0f} "\ 130 | f"A2J Infrence time FPS: {1/a2j_summary.avg:1.0f} "\ 131 | f"Total Infrence time FPS: {1/(a2j_summary.avg + bb_summary.avg):1.0f}") 132 | 133 | 134 | def main(): 135 | # Load camera with default config 136 | k4a = PyK4A() 137 | k4a.start() 138 | 139 | args = parse_arguments() 140 | bbox_path, a2j_path = get_model() 141 | 142 | model_setup = ModelSetup(BBOX_MODEL_PATH=bbox_path, A2J_model_path=a2j_path, trt_optim=args.trt) 143 | 144 | run_camera_inferance(k4a, model_setup) 145 | 146 | main() 147 | -------------------------------------------------------------------------------- /pipeline/constants.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | 11 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir) 13 | 14 | CENTERNET_MODEL_PATH = os.path.join(ROOT_PATH, "checkpoint/CenterNet") 15 | A2J_MODEL_PATH = os.path.join(ROOT_PATH, "checkpoint/A2J") 16 | ############################################# 17 | ############# CenterNet SETUP ############### 18 | ############################################# 19 | CENTERNET_MODEL_NAME = "ResnetCenterNet" 20 | 21 | # Setup the data to be used for training (Depth images/ fusion of IR and Depth images) 22 | CENTERNET_DATA_LOADER_SWITCHER = { 23 | "depth": False, 24 | "fused": True, 25 | } 26 | CENTERNET_DATA_LOADER = [[elem[0] for elem in CENTERNET_DATA_LOADER_SWITCHER.items() if elem[1]][0]] [0] 27 | 28 | # Setup the heatmap loss MSE/Logistic loss 29 | CENTERNET_LOSS_SWITHCER = { 30 | "MSE": False, 31 | "Logistic": True, 32 | } 33 | CENTERNET_LOSS = [[elem[0] for elem in CENTERNET_LOSS_SWITHCER.items() if elem[1]][0]] [0] 34 | 35 | CENTERNET_IMG_SHAPE = (320, 320) 36 | 37 | CENTERNET_NUM_CLASSES = 1 38 | CENTERNET_STRIDE = 2 39 | 40 | THRESHOLD_ACC = 0.3 41 | 42 | INPUT_IMG_SIZE = (320, 320) 43 | 44 | ############################################# 45 | ################# A2J SETUP ################# 46 | ############################################# 47 | DATASET = "NYU" # "Personal", "NYU" 48 | 49 | DATA_SEGMENT = "1" # ALL, 1 50 | # List of availiblke backbones set the one you wantto use to true and all else to false 51 | A2J_BACKBONE_NAME = { 52 | "resnet18": False, 53 | "resnet34": False, 54 | "resnet50": True, 55 | "resnet101": False, 56 | "resnet152": False, 57 | "mobilenet": False, 58 | } 59 | 60 | A2J_TARGET_SIZE = (176, 176) 61 | DEPTH_THRESHOLD = 180 62 | A2J_STRIDE = 16 63 | NUM_JOINTS = 16 # 14, 16, 36, 21 64 | -------------------------------------------------------------------------------- /pipeline/model_setup.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import torch 11 | 12 | # PROJ LIBRARY 13 | import pipeline.constants as const 14 | 15 | from model.CenterNet.centernet import Resnet18FeatureExtractor 16 | 17 | from model.A2J.a2j import A2J 18 | from model.A2J.model import A2J_model 19 | from model.A2J.a2j_utilities.post_processing import PostProcess 20 | 21 | 22 | class ModelSetup(object): 23 | """ 24 | Class to setup Both SSD and A2J model 25 | """ 26 | def __init__(self, BBOX_MODEL_PATH:str, A2J_model_path=const.A2J_MODEL_PATH, trt_optim=False): 27 | """ 28 | 29 | :param SSD_model_path: string, full path to ssd Model checkpoint 30 | :param A2J_model_path: string, full path to A2J Model checkpoint 31 | """ 32 | self.bb_model_path = BBOX_MODEL_PATH 33 | self.a2j_path = A2J_model_path 34 | 35 | print("Loading CenterNet ...") 36 | centernet_check_point = torch.load(self.bb_model_path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")) 37 | self.centernet_model_name = centernet_check_point["model_name"] 38 | self.centernet_num_classes = centernet_check_point['num_classes'] 39 | self.bb_model = Resnet18FeatureExtractor(num_classes=self.centernet_num_classes) 40 | self.bb_model.load_state_dict(centernet_check_point["model"]) 41 | print("CenterNet Loading Finished! 42 | 43 | ") 44 | 45 | if trt_optim: 46 | import tensorrt as trt 47 | from torch2trt import torch2trt, TRTModule 48 | trt_model_path = self.bb_model_path.split(".")[0] + ".trt" 49 | if not os.path.exists(trt_model_path): 50 | print("Creating TRT Bounding Box Model...") 51 | i 52 | x = torch.ones((1, 3, const.INPUT_IMG_SIZE[0], const.INPUT_IMG_SIZE[1])).cuda() 53 | 54 | self.bb_model = torch2trt(self.bb_model.eval().cuda(), [x], fp16_mode=True) 55 | torch.save(self.bb_model.state_dict(), trt_model_path) 56 | print(f"TRT Bounding Box Model saved at: 57 | {trt_model_path} 58 | ") 59 | 60 | print("Loading TRT Bounding Box Model...") 61 | del self.bb_model 62 | 63 | self.bb_model = TRTModule() 64 | self.bb_model.load_state_dict(torch.load(trt_model_path)) 65 | print("TRT Bounding Box Model loaded! 66 | ") 67 | 68 | # Load A2J model 69 | print("Loading A2J ...") 70 | backbone_name = [elem[0] for idx, elem in enumerate(const.A2J_BACKBONE_NAME.items()) if elem[1]][0] 71 | a2j_check_point = torch.load(self.a2j_path, map_location=torch.device("cpu")) 72 | 73 | self.num_class = a2j_check_point["num_classes"] 74 | # self.a2j_model = A2J_model(num_classes=self.num_class) 75 | self.a2j_model = A2J(num_joints=self.num_class, backbone_name=backbone_name, backbone_pretrained=True) 76 | self.a2j_model.load_state_dict(a2j_check_point["model"]) 77 | self.post_process = PostProcess(shape=(const.A2J_TARGET_SIZE[1]//16, const.A2J_TARGET_SIZE[0]//16),\ 78 | stride=const.A2J_STRIDE) 79 | 80 | if trt_optim: 81 | from torch2trt import torch2trt, TRTModule 82 | trt_a2j_model_path = self.a2j_path.split(".")[0] + ".trt" 83 | if not os.path.exists(trt_a2j_model_path): 84 | print("Creating TRT A2J Model...") 85 | x = torch.empty((1, 1, const.A2J_TARGET_SIZE[0], const.A2J_TARGET_SIZE[1])).cuda().float() 86 | 87 | self.a2j_model = torch2trt(self.a2j_model.eval().cuda(), [x], fp16_mode=True) 88 | torch.save(self.a2j_model.state_dict(), trt_a2j_model_path) 89 | print(f"TRT A2J Model saved at: 90 | {trt_a2j_model_path} 91 | ") 92 | 93 | print("Loading TRT A2J Model...") 94 | del self.a2j_model 95 | 96 | self.a2j_model = TRTModule() 97 | self.a2j_model.load_state_dict(torch.load(trt_a2j_model_path)) 98 | print("TRT A2J Model loaded! 99 | ") 100 | 101 | print("A2J Loading Finished! 102 | 103 | ") 104 | -------------------------------------------------------------------------------- /pipeline/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2019 Boshen Zhang 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | ''' 9 | import os 10 | import sys 11 | import cv2 12 | import torch 13 | import numpy as np 14 | import torchvision.transforms.functional as FT 15 | 16 | from glob import glob 17 | from PIL import Image, ImageOps 18 | 19 | # PROJ ROOT DIR 20 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) 21 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir) 22 | sys.path.append(ROOT_PATH) 23 | 24 | # PROJ LIBRARY 25 | import pipeline.constants as const 26 | 27 | 28 | # Set the global device variable to cuda is GPU is avalible 29 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 30 | 31 | 32 | # DATASET INFO 33 | MEAN = -0.66877532 34 | STD = 28.32958208 35 | 36 | 37 | def xy_to_cxcy(xy): 38 | """ 39 | Convert bounding boxes from boundary coordinates (x_min, y_min, x_max, y_max) to center sized coordinates (c_x, c_y, w, h) 40 | 41 | :param xy: bounding box coordinate a tensor of size (n_boxes, 4) 42 | :return: bounding boxes in bouindary coordinates, a tensor of size (n_boxes, 4) 43 | """ 44 | return torch.cat([ (xy[:, 2:] + xy[:, :2])/2, # c_x, c_y 45 | xy[:, 2:] - xy[:, :2]], 1) # w, h 46 | 47 | def cxcy_to_xy(cxcy): 48 | """ 49 | Convert bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4) 50 | 51 | :param cxcy: bounding boxes in center-size coordinate (n_boxes, 4) 52 | :return: bounding boxes in boundary coordinates (n_boxes, 4) 53 | """ 54 | 55 | return torch.cat([cxcy[:, :2] - (cxcy[:, 2:] / 2), # x_min, y_min 56 | cxcy[:, :2] + (cxcy[:, 2:] / 2)], 1) # x_max, y_max 57 | 58 | def cxcy_to_gcxgcy(cxcy, priors_cxcy): 59 | """ 60 | Encode boundiong boxes (that are in center-size form) w.r.t the corresponding prior boxes. 61 | 62 | For the center coordinates, find the offset with respect to the prior box, and scale by the size of the prior box 63 | For the size coordinates, scale by the size of teh prior box, and convert to the log-space. 64 | 65 | In the model, we are predicting bounding box coordinates in this encoded form. 66 | 67 | :param cxcy: bounding boxes in center sized coordinates, (n_priors, 4) 68 | :param priors_xcxy: prior boxes with respect which the encoding must be preformed, (n_priors, 4) 69 | :return: encoded boundin boxes, (n_priors, 4) 70 | """ 71 | cxcy = cxcy.to(DEVICE) 72 | priors_cxcy = priors_cxcy.to(DEVICE) 73 | return torch.cat( 74 | [(cxcy[:, :2] - priors_cxcy[:, :2]) / (priors_cxcy[:, 2:] / 10), # g_c_x, g_c_y 75 | torch.log(cxcy[:, 2:] / priors_cxcy[:, 2:]) * 5], 1) # g_w, g_h 76 | 77 | def gcxgcy_to_cxcy(gcxgcy, priors_cxcy): 78 | """ 79 | Decode bounding box coordinates predicted by thr model, sice they are encoded in the form mentioned above. 80 | 81 | They are decoded into center size coordinates. 82 | 83 | This is invers of the above functions 84 | 85 | :param gcxgcy: encoded bounding box (i.e. output of model) (n_priors, 4) 86 | :param priors_cxcy: prior boxes with respect to which the encoding is defined (n_priors, 4) 87 | :return: decoded bounding boxes in center size form (n_priors, 4) 88 | """ 89 | gcxgcy = gcxgcy.to(DEVICE) 90 | priors_cxcy = priors_cxcy.to(DEVICE) 91 | return torch.cat( 92 | [gcxgcy[:, :2] * priors_cxcy[:, 2:] / 10 + priors_cxcy[:, :2], # c_x, c_y 93 | torch.exp(gcxgcy[:, 2:] / 5) * priors_cxcy[:, 2:]], 1) # w, h 94 | 95 | def find_intersection(set_1, set_2): 96 | """ 97 | Find the intersection of every box combination betweeen 2 sets of boxes that are in boundary coordinates. 98 | 99 | :param set_1: set_1 (n1, 4) 100 | :param set_2: set 2 (n2, 4) 101 | :return: intersection of each of the boxes in set 1 with respect to each of the set 2 (n1, n2) 102 | """ 103 | 104 | lower_bounds = torch.max(set_1[:, :2].unsqueeze(1).to(DEVICE), set_2[:, :2].unsqueeze(0).to(DEVICE)) # (n1, n2, 2) 105 | upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1).to(DEVICE), set_2[:, 2:].unsqueeze(0).to(DEVICE)) # (n1, n2, 2) 106 | intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0) # (n1, n2, 2) 107 | return intersection_dims[:, :, 0] * intersection_dims[:, :, 1] # (n1, n2) 108 | 109 | def find_jaccard_overlap(set_1, set_2): 110 | """ 111 | Find IoU of every box combination in between the 2 sets (boxes in boundary coordinates) 112 | 113 | :param set_1: set 1 (n1, 4) 114 | :param set2: set 2 (n2, 4) 115 | :return: Jaccard overlap of each of the boxes in the set 1 with respect to set 2 (n1, n2) 116 | """ 117 | 118 | intersection = find_intersection(set_1, set_2) 119 | 120 | area_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1]) # (n1) 121 | area_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1]) # (n1) 122 | 123 | union = area_set_1.unsqueeze(1).to(DEVICE) + area_set_2.unsqueeze(0).to(DEVICE) - intersection # (n1, n2) 124 | 125 | return intersection / union 126 | 127 | def decay_lr_rate(optim, scale): 128 | """ 129 | Scale the lr rate by a factor. 130 | 131 | :param optim: optimizer (SGD) 132 | :param scale: factor to scale the lr rate with. 133 | """ 134 | for param_group in optim.param_groups: 135 | param_group['lr'] = param_group['lr'] * scale 136 | 137 | class Summary(object): 138 | def __init__(self): 139 | self.item = 0 140 | self.sum = 0 141 | self.len = 0 142 | self.avg = 0.000001 143 | 144 | def update(self, value): 145 | self.item = value 146 | self.sum += value 147 | self.len += 1 148 | self.avg = self.sum / self.len 149 | 150 | 151 | def get_model(): 152 | """ 153 | The model weights are saved in CHECKPOINT_DIR specified in constants.py 154 | this functions loos into that directory and returns the path to the model. 155 | 156 | Please set the correct paths in pipeline/constans.py if not using default: 157 | SSD_MODEL_PATH 158 | SSD_DATASET_NAME 159 | SSD_MODEL_NAME 160 | 161 | A2J_BACKBONE_NAME 162 | A2J_MODEL_PATH 163 | 164 | :return: str, str: path to SSD model, path to A2J model 165 | """ 166 | centernet_model_path = const.CENTERNET_MODEL_PATH 167 | bb_models = glob(f"{centernet_model_path}/{const.CENTERNET_LOSS}_{const.CENTERNET_MODEL_NAME}_{const.CENTERNET_DATA_LOADER}.pth") 168 | if not bb_models: 169 | print(f" 170 | There are no CenterNet Model check points at:\ 171 | 172 | {centernet_model_path}\ 173 | 174 | Please Train a model or change the directory on constants.py 175 | ") 176 | exit(-1) 177 | 178 | backbone_name = [elem[0] for idx, elem in enumerate(const.A2J_BACKBONE_NAME.items()) if elem[1]][0] 179 | a2j_model_path = const.A2J_MODEL_PATH 180 | a2j_model_path = f"{a2j_model_path}/{const.DATASET}_{const.DATA_SEGMENT}_{backbone_name}_{const.NUM_JOINTS}_a2j.pth" 181 | a2j_models = glob(a2j_model_path) 182 | 183 | if not a2j_models: 184 | print(f" 185 | There are no A2J Model with {const.A2J_BACKBONE_NAME} backbone in check points at:\ 186 | 187 | {a2j_model_path}\ 188 | 189 | Please Train a model or change the directory on constants.py 190 | ") 191 | exit(-1) 192 | 193 | return bb_models[0], a2j_models[0] 194 | 195 | # Image Transforms 196 | def normalize(image: np.array, img_shape=tuple): 197 | """ 198 | Resize image to (300, 300) 199 | 200 | :param image: numpy array 201 | :return: normalized image Casted to torch 202 | """ 203 | image = cv2.resize(image, img_shape, interpolation=cv2.INTER_NEAREST) 204 | mean = np.mean(image) 205 | std = image.std() 206 | if std==0: 207 | std = 1 208 | new_image = (image - mean) / std 209 | 210 | # Cast to pytorch and expand dimentions for the model forward pass 211 | new_image = torch.from_numpy(new_image).type(torch.float32) 212 | 213 | new_image = new_image.unsqueeze(0) 214 | 215 | return new_image 216 | 217 | def centernet_img_transform(depth_image: np.array, ir_image: np.array, img_shape=const.INPUT_IMG_SIZE, input_type=const.CENTERNET_DATA_LOADER): 218 | """ 219 | Transform the images into the input images 220 | 221 | :param depth_img: np.array (uint16), depth image 222 | :param ir_image: np.array (uint16), depth image 223 | :param img_shape: tuple (h, w), image size 224 | :param input_type: str, with input type (Fused, Depth) is the model using 225 | :return: 226 | """ 227 | def depth_input(depth_image: np.array, **kwargs): 228 | c, h, w = depth_image.size() 229 | depth_image = depth_image[0:1,:,:] # depth 230 | new_image = depth_image.expand(1, 3, h, w) 231 | return new_image 232 | 233 | def fused_input(depth_image: np.array, ir_image:np.array): 234 | c, h, w = depth_image.size() 235 | new_image_c1 = depth_image.type(torch.float32) 236 | new_image_c2 = ir_image.type(torch.float32) 237 | new_image_c3 = (new_image_c1 + new_image_c2) / 2 238 | new_image = torch.cat((new_image_c1, new_image_c2, new_image_c3), 0) 239 | new_image = new_image.expand(1, 3, h, w) 240 | return new_image 241 | 242 | input_switcher = { 243 | "depth": depth_input, 244 | "fused": fused_input, 245 | } 246 | depth_image = normalize(depth_image, img_shape=img_shape) 247 | ir_image = normalize(ir_image, img_shape=img_shape) 248 | 249 | return input_switcher[input_type](depth_image=depth_image, ir_image=ir_image) 250 | 251 | 252 | ######################### 253 | ##### Model Helpers ##### 254 | ######################### 255 | 256 | def find_prediction_mask(pred_heatmap: torch.tensor, window_size=11, threshold=const.THRESHOLD_ACC): 257 | """ 258 | Find the mask of a giver heatmap, Have this in mind the follwoing heatmap might not have values as larg as 259 | 1, and we need to fins the local maximas of the heatmap. 260 | 261 | :param pred_heatmap: torch.tensor, predicted heatmap by the model 262 | :param window_size: int, size of the maxPooling window 263 | :return: torch.tensor (mask of the heatmap) 264 | """ 265 | pred_local_max = torch.max_pool2d(pred_heatmap[None, None, ...], kernel_size=window_size, stride=1, padding=window_size//2) 266 | return (pred_local_max == pred_heatmap) * (pred_heatmap > threshold) 267 | 268 | def get_bboxes(yx_locations: torch.tensor, height: torch.tensor, width: torch.tensor,\ 269 | offset_x: torch.tensor, offset_y: torch.tensor, stride=const.CENTERNET_STRIDE, img_shape=const.CENTERNET_IMG_SHAPE): 270 | """ 271 | Create a list of bounding boxes [[xmin, ymin, xmax, ymax], ...] 272 | 273 | :param yx_locations: torch.tensor, X and Y locations in the heatmap has to be mutiplied by the stride to go back to original dims 274 | :param height: torch.tensor, The height of the bbox 275 | :param width: torch.tensor, The width of the bbox 276 | :param offset_x: torch.tensor, The X offset value 277 | :param offst_y: torch.tensor, The Y offset value 278 | """ 279 | yx_locations *= stride 280 | bboxes = [] 281 | for i, yx_location in enumerate(yx_locations): 282 | y_center = yx_location[0].item() + offset_y[i].item() 283 | x_center = yx_location[1].item() + offset_x[i].item() 284 | h = height[i].item() 285 | w = width[i].item() 286 | 287 | x_min = max(0, x_center - w/2) 288 | y_min = max(0, y_center - h/2) 289 | 290 | bboxes.append([x_min, y_min, w, h]) 291 | 292 | return bboxes 293 | 294 | def get_median_depth(img, xy_locs:list): 295 | """ 296 | Get the median depth of the hand 297 | 298 | :param img: numpy array, depth image 299 | :param xy_locs: list, [x_min, y_min, x_max, y_max] locations of the bounding box 300 | :return: float, median depth 301 | """ 302 | return np.median(img) 303 | 304 | def a2j_depth_image_transform(img, xy_locs: list, target_size=const.A2J_TARGET_SIZE, depth_thresh=const.DEPTH_THRESHOLD): 305 | """ 306 | Transform the depth image to appropriate format for running through the model 307 | 308 | :param img: numpy array, depth image 309 | :param xy_locs: list, [x_min, y_min, x_max, y_max] locations of the bounding box 310 | :param target_size: tuple, input target size of the A2J network 311 | :paran depth_thresh: int, depth threshold to 0 out the unwanted pixels 312 | :return: processed depth image to feed into the a2j 313 | """ 314 | 315 | img_output = np.ones((target_size[1], target_size[0], 1), dtype="float32") 316 | 317 | new_Xmin = xy_locs[0] 318 | new_Ymin = xy_locs[1] 319 | new_Xmax = xy_locs[2] 320 | new_Ymax = xy_locs[3] 321 | 322 | img_crop = img[new_Ymin:new_Ymax, new_Xmin:new_Xmax] 323 | median_depth = get_median_depth(img_crop, xy_locs) 324 | 325 | center_x = (new_Xmax+new_Xmin)/2 326 | center_y = (new_Ymax+new_Ymin)/2 327 | new_Xmin = int(max(center_x-110, 0)) 328 | new_Ymin = int(max(center_y-110, 0)) 329 | new_Xmax = int(min(center_x+110, img.shape[1]-1)) 330 | new_Ymax = int(min(center_y+110, img.shape[0]-1)) 331 | img_crop = img[new_Ymin:new_Ymax, new_Xmin:new_Xmax] 332 | 333 | img_resize = cv2.resize(img_crop, target_size, interpolation=cv2.INTER_NEAREST) 334 | img_resize - np.asarray(img_resize, dtype="float32") 335 | img_resize[np.where(img_resize >= median_depth + depth_thresh)] = median_depth 336 | img_resize[np.where(img_resize <= median_depth - depth_thresh)] = median_depth 337 | img_resize = (img_resize - median_depth) 338 | img_resize = (img_resize - MEAN)/STD 339 | 340 | img_output[:,:,0] = img_resize 341 | 342 | 343 | img_output = np.asarray(img_output) 344 | img_NCHW_out = img_output.transpose(2, 0, 1) 345 | img_NCHW_out = np.asarray(img_NCHW_out) 346 | 347 | img_out = torch.from_numpy(img_NCHW_out) 348 | img_out = img_out.unsqueeze(0) 349 | 350 | # n, c, h, w = img_out.size() 351 | # img_out = img_out.expand(n, 3, h, w) 352 | 353 | return img_out, median_depth 354 | 355 | def back_to_normal(pred_joints, xy_locs:list, median_depths:float, target_size=const.A2J_TARGET_SIZE): 356 | """ 357 | Transform the predicted joint to the original space 358 | 359 | :param pred_joints: list of np.array, list of predicted joints 360 | :param xy_locs: list, [x_min, y_min, x_max, y_max] locations of the bounding box 361 | :param median_depth: float, the value of median depth 362 | """ 363 | 364 | normalized_joints = [] 365 | for i in range(len(pred_joints)): 366 | pred_joint = pred_joints[i].cpu() 367 | pred_joint = pred_joint.detach().numpy() 368 | 369 | xy_bb = xy_locs[i] 370 | median_depth = median_depths[i] 371 | 372 | p_j = np.ones((const.NUM_JOINTS, 3)) 373 | x_len = abs(xy_bb[0] - xy_bb[2]) 374 | y_len = abs(xy_bb[1] - xy_bb[3]) 375 | 376 | p_j[:,0] = ((pred_joint[:,1] * x_len) / target_size[0]) + xy_bb[0] 377 | p_j[:,1] = ((pred_joint[:,0] * y_len) / target_size[1]) + xy_bb[1] 378 | p_j[:,2] = pred_joint[:,2] + median_depth 379 | 380 | normalized_joints.append(p_j) 381 | 382 | return normalized_joints 383 | 384 | 385 | def get_xyz_lims(pred_joints_collections): 386 | max_range = [0, 0, 0] 387 | min_range = [float("inf"), float("inf"), float("inf")] 388 | 389 | for pred_joints in pred_joints_collections: 390 | min_x = pred_joints[:,0].min() 391 | if min_x < min_range[0]: 392 | min_range[0] = min_x 393 | min_y = pred_joints[:,1].min() 394 | if min_y < min_range[1]: 395 | min_range[1] = min_y 396 | min_z = pred_joints[:,2].min() 397 | if min_z < min_range[2]: 398 | min_range[2] = min_z 399 | 400 | max_x = pred_joints[:,0].max() 401 | if max_x > max_range[0]: 402 | max_range[0] = max_x 403 | max_y = pred_joints[:,1].max() 404 | if max_y > max_range[1]: 405 | max_range[1] = max_y 406 | max_z = pred_joints[:,2].max() 407 | if max_z > max_range[2]: 408 | max_range[2] = max_z 409 | 410 | return max_range, min_range 411 | 412 | def vizualize_frams(ax_2, pred_joints_collections): 413 | pred_joints_collections = np.array(pred_joints_collections) 414 | 415 | max_range, min_range = get_xyz_lims(pred_joints_collections) 416 | 417 | mid_x = (max_range[0] + min_range[0])/2 418 | mid_y = (max_range[1] + min_range[1])/2 419 | mid_z = (max_range[2] + min_range[2])/2 420 | 421 | # Second subplot 422 | ax_2.grid(True) 423 | ax_2.set_xticklabels([]) 424 | ax_2.set_yticklabels([]) 425 | ax_2.set_zticklabels([]) 426 | 427 | ax_2.set_xlim(mid_x - max_range[0]/2, mid_x + max_range[0]/2) 428 | ax_2.set_ylim(mid_y - max_range[1]/2, mid_y + max_range[1]/2) 429 | ax_2.set_zlim(mid_z - max_range[2]/2, mid_z + max_range[2]/2) 430 | 431 | scats = [] 432 | for pred_joints in pred_joints_collections: 433 | ax_2.scatter(pred_joints[:,0], pred_joints[:,1], pred_joints[:,2], c='r', marker='^', s=10) 434 | 435 | # MY SCRIPT 436 | if const.NUM_JOINTS == 36: 437 | ax_2.plot(pred_joints[0:6,0], pred_joints[0:6,1], pred_joints[0:6,2], color='b') 438 | ax_2.plot(pred_joints[6:12,0], pred_joints[6:12,1], pred_joints[6:12,2], color='b') 439 | ax_2.plot(pred_joints[12:18,0], pred_joints[12:18,1], pred_joints[12:18,2], color='b') 440 | ax_2.plot(pred_joints[18:24,0], pred_joints[18:24,1], pred_joints[18:24,2], color='b') 441 | ax_2.plot(pred_joints[24:30,0], pred_joints[24:30,1], pred_joints[24:30,2], color='b') 442 | 443 | 444 | # MY SCRIPT 16 JOINTS 445 | if const.NUM_JOINTS == 16: 446 | ax_2.plot(pred_joints[0:3,0], pred_joints[0:3,1], pred_joints[0:3,2], color='b') 447 | ax_2.plot(pred_joints[3:6,0], pred_joints[3:6,1], pred_joints[3:6,2], color='b') 448 | ax_2.plot(pred_joints[6:9,0], pred_joints[6:9,1], pred_joints[6:9,2], color='b') 449 | ax_2.plot(pred_joints[9:12,0], pred_joints[9:12,1], pred_joints[9:12,2], color='b') 450 | ax_2.plot(pred_joints[12:15,0], pred_joints[12:15,1], pred_joints[12:15,2], color='b') 451 | ax_2.plot([pred_joints[2,0], pred_joints[15,0]], [pred_joints[2,1], pred_joints[15,1]], [pred_joints[2,2], pred_joints[15,2]], color='b') 452 | ax_2.plot([pred_joints[5,0], pred_joints[15,0]], [pred_joints[5,1], pred_joints[15,1]], [pred_joints[5,2], pred_joints[15,2]], color='b') 453 | ax_2.plot([pred_joints[8,0], pred_joints[15,0]], [pred_joints[8,1], pred_joints[15,1]], [pred_joints[8,2], pred_joints[15,2]], color='b') 454 | ax_2.plot([pred_joints[11,0], pred_joints[15,0]], [pred_joints[11,1], pred_joints[15,1]], [pred_joints[11,2], pred_joints[15,2]], color='b') 455 | ax_2.plot([pred_joints[14,0], pred_joints[15,0]], [pred_joints[14,1], pred_joints[15,1]], [pred_joints[14,2], pred_joints[15,2]], color='b') 456 | 457 | 458 | 459 | ax_2.view_init(-70, -70) 460 | 461 | return scats 462 | -------------------------------------------------------------------------------- /readme_files/realtime_inference.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/realtime_handpose_3d/3f5ae9ccbf07defc39de7ce9e8b2213dda3be375/readme_files/realtime_inference.gif --------------------------------------------------------------------------------